base64/write/encoder.rs
1use crate::encode::encode_to_slice;
2use crate::{encode_config_slice, Config};
3use std::{
4 cmp, fmt,
5 io::{ErrorKind, Result, Write},
6};
7
8pub(crate) const BUF_SIZE: usize = 1024;
9/// The most bytes whose encoding will fit in `BUF_SIZE`
10const MAX_INPUT_LEN: usize = BUF_SIZE / 4 * 3;
11// 3 bytes of input = 4 bytes of base64, always (because we don't allow line wrapping)
12const MIN_ENCODE_CHUNK_SIZE: usize = 3;
13
14/// A `Write` implementation that base64 encodes data before delegating to the wrapped writer.
15///
16/// Because base64 has special handling for the end of the input data (padding, etc), there's a
17/// `finish()` method on this type that encodes any leftover input bytes and adds padding if
18/// appropriate. It's called automatically when deallocated (see the `Drop` implementation), but
19/// any error that occurs when invoking the underlying writer will be suppressed. If you want to
20/// handle such errors, call `finish()` yourself.
21///
22/// # Examples
23///
24/// ```
25/// use std::io::Write;
26///
27/// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
28/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), base64::STANDARD);
29///
30/// // handle errors as you normally would
31/// enc.write_all(b"asdf").unwrap();
32///
33/// // could leave this out to be called by Drop, if you don't care
34/// // about handling errors or getting the delegate writer back
35/// let delegate = enc.finish().unwrap();
36///
37/// // base64 was written to the writer
38/// assert_eq!(b"YXNkZg==", &delegate[..]);
39///
40/// ```
41///
42/// # Panics
43///
44/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
45/// error is invalid and will panic.
46///
47/// # Errors
48///
49/// Base64 encoding itself does not generate errors, but errors from the wrapped writer will be
50/// returned as per the contract of `Write`.
51///
52/// # Performance
53///
54/// It has some minor performance loss compared to encoding slices (a couple percent).
55/// It does not do any heap allocation.
56pub struct EncoderWriter<W: Write> {
57 config: Config,
58 /// Where encoded data is written to. It's an Option as it's None immediately before Drop is
59 /// called so that finish() can return the underlying writer. None implies that finish() has
60 /// been called successfully.
61 delegate: Option<W>,
62 /// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
63 /// with the next `write()`, encode it, then proceed with the rest of the input normally.
64 extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
65 /// How much of `extra` is occupied, in `[0, MIN_ENCODE_CHUNK_SIZE]`.
66 extra_input_occupied_len: usize,
67 /// Buffer to encode into. May hold leftover encoded bytes from a previous write call that the underlying writer
68 /// did not write last time.
69 output: [u8; BUF_SIZE],
70 /// How much of `output` is occupied with encoded data that couldn't be written last time
71 output_occupied_len: usize,
72 /// panic safety: don't write again in destructor if writer panicked while we were writing to it
73 panicked: bool,
74}
75
76impl<W: Write> fmt::Debug for EncoderWriter<W> {
77 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
78 write!(
79 f,
80 "extra_input: {:?} extra_input_occupied_len:{:?} output[..5]: {:?} output_occupied_len: {:?}",
81 self.extra_input,
82 self.extra_input_occupied_len,
83 &self.output[0..5],
84 self.output_occupied_len
85 )
86 }
87}
88
89impl<W: Write> EncoderWriter<W> {
90 /// Create a new encoder that will write to the provided delegate writer `w`.
91 pub fn new(w: W, config: Config) -> EncoderWriter<W> {
92 EncoderWriter {
93 config,
94 delegate: Some(w),
95 extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE],
96 extra_input_occupied_len: 0,
97 output: [0u8; BUF_SIZE],
98 output_occupied_len: 0,
99 panicked: false,
100 }
101 }
102
103 /// Encode all remaining buffered data and write it, including any trailing incomplete input
104 /// triples and associated padding.
105 ///
106 /// Once this succeeds, no further writes or calls to this method are allowed.
107 ///
108 /// This may write to the delegate writer multiple times if the delegate writer does not accept
109 /// all input provided to its `write` each invocation.
110 ///
111 /// If you don't care about error handling, it is not necessary to call this function, as the
112 /// equivalent finalization is done by the Drop impl.
113 ///
114 /// Returns the writer that this was constructed around.
115 ///
116 /// # Errors
117 ///
118 /// The first error that is not of `ErrorKind::Interrupted` will be returned.
119 pub fn finish(&mut self) -> Result<W> {
120 // If we could consume self in finish(), we wouldn't have to worry about this case, but
121 // finish() is retryable in the face of I/O errors, so we can't consume here.
122 if self.delegate.is_none() {
123 panic!("Encoder has already had finish() called")
124 };
125
126 self.write_final_leftovers()?;
127
128 let writer = self.delegate.take().expect("Writer must be present");
129
130 Ok(writer)
131 }
132
133 /// Write any remaining buffered data to the delegate writer.
134 fn write_final_leftovers(&mut self) -> Result<()> {
135 if self.delegate.is_none() {
136 // finish() has already successfully called this, and we are now in drop() with a None
137 // writer, so just no-op
138 return Ok(());
139 }
140
141 self.write_all_encoded_output()?;
142
143 if self.extra_input_occupied_len > 0 {
144 let encoded_len = encode_config_slice(
145 &self.extra_input[..self.extra_input_occupied_len],
146 self.config,
147 &mut self.output[..],
148 );
149
150 self.output_occupied_len = encoded_len;
151
152 self.write_all_encoded_output()?;
153
154 // write succeeded, do not write the encoding of extra again if finish() is retried
155 self.extra_input_occupied_len = 0;
156 }
157
158 Ok(())
159 }
160
161 /// Write as much of the encoded output to the delegate writer as it will accept, and store the
162 /// leftovers to be attempted at the next write() call. Updates `self.output_occupied_len`.
163 ///
164 /// # Errors
165 ///
166 /// Errors from the delegate writer are returned. In the case of an error,
167 /// `self.output_occupied_len` will not be updated, as errors from `write` are specified to mean
168 /// that no write took place.
169 fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
170 self.panicked = true;
171 let res = self
172 .delegate
173 .as_mut()
174 .expect("Writer must be present")
175 .write(&self.output[..current_output_len]);
176 self.panicked = false;
177
178 res.map(|consumed| {
179 debug_assert!(consumed <= current_output_len);
180
181 if consumed < current_output_len {
182 self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap();
183 // If we're blocking on I/O, the minor inefficiency of copying bytes to the
184 // start of the buffer is the least of our concerns...
185 // Rotate moves more than we need to, but copy_within isn't stabilized yet.
186 self.output.rotate_left(consumed);
187 } else {
188 self.output_occupied_len = 0;
189 }
190 })
191 }
192
193 /// Write all buffered encoded output. If this returns `Ok`, `self.output_occupied_len` is `0`.
194 ///
195 /// This is basically write_all for the remaining buffered data but without the undesirable
196 /// abort-on-`Ok(0)` behavior.
197 ///
198 /// # Errors
199 ///
200 /// Any error emitted by the delegate writer abort the write loop and is returned, unless it's
201 /// `Interrupted`, in which case the error is ignored and writes will continue.
202 fn write_all_encoded_output(&mut self) -> Result<()> {
203 while self.output_occupied_len > 0 {
204 let remaining_len = self.output_occupied_len;
205 match self.write_to_delegate(remaining_len) {
206 // try again on interrupts ala write_all
207 Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
208 // other errors return
209 Err(e) => return Err(e),
210 // success no-ops because remaining length is already updated
211 Ok(_) => {}
212 };
213 }
214
215 debug_assert_eq!(0, self.output_occupied_len);
216 Ok(())
217 }
218}
219
220impl<W: Write> Write for EncoderWriter<W> {
221 /// Encode input and then write to the delegate writer.
222 ///
223 /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
224 /// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which
225 /// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See
226 /// <https://github.com/rust-lang/rust/issues/56889> for more on that.
227 ///
228 /// If the previous call to `write` provided more (encoded) data than the delegate writer could
229 /// accept in a single call to its `write`, the remaining data is buffered. As long as buffered
230 /// data is present, subsequent calls to `write` will try to write the remaining buffered data
231 /// to the delegate and return either `Ok(0)` -- and therefore not consume any of `input` -- or
232 /// an error.
233 ///
234 /// # Errors
235 ///
236 /// Any errors emitted by the delegate writer are returned.
237 fn write(&mut self, input: &[u8]) -> Result<usize> {
238 if self.delegate.is_none() {
239 panic!("Cannot write more after calling finish()");
240 }
241
242 if input.is_empty() {
243 return Ok(0);
244 }
245
246 // The contract of `Write::write` places some constraints on this implementation:
247 // - a call to `write()` represents at most one call to a wrapped `Write`, so we can't
248 // iterate over the input and encode multiple chunks.
249 // - Errors mean that "no bytes were written to this writer", so we need to reset the
250 // internal state to what it was before the error occurred
251
252 // before reading any input, write any leftover encoded output from last time
253 if self.output_occupied_len > 0 {
254 let current_len = self.output_occupied_len;
255 return self
256 .write_to_delegate(current_len)
257 // did not read any input
258 .map(|_| 0);
259 }
260
261 debug_assert_eq!(0, self.output_occupied_len);
262
263 // how many bytes, if any, were read into `extra` to create a triple to encode
264 let mut extra_input_read_len = 0;
265 let mut input = input;
266
267 let orig_extra_len = self.extra_input_occupied_len;
268
269 let mut encoded_size = 0;
270 // always a multiple of MIN_ENCODE_CHUNK_SIZE
271 let mut max_input_len = MAX_INPUT_LEN;
272
273 // process leftover un-encoded input from last write
274 if self.extra_input_occupied_len > 0 {
275 debug_assert!(self.extra_input_occupied_len < 3);
276 if input.len() + self.extra_input_occupied_len >= MIN_ENCODE_CHUNK_SIZE {
277 // Fill up `extra`, encode that into `output`, and consume as much of the rest of
278 // `input` as possible.
279 // We could write just the encoding of `extra` by itself but then we'd have to
280 // return after writing only 4 bytes, which is inefficient if the underlying writer
281 // would make a syscall.
282 extra_input_read_len = MIN_ENCODE_CHUNK_SIZE - self.extra_input_occupied_len;
283 debug_assert!(extra_input_read_len > 0);
284 // overwrite only bytes that weren't already used. If we need to rollback extra_len
285 // (when the subsequent write errors), the old leading bytes will still be there.
286 self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE]
287 .copy_from_slice(&input[0..extra_input_read_len]);
288
289 let len = encode_to_slice(
290 &self.extra_input[0..MIN_ENCODE_CHUNK_SIZE],
291 &mut self.output[..],
292 self.config.char_set.encode_table(),
293 );
294 debug_assert_eq!(4, len);
295
296 input = &input[extra_input_read_len..];
297
298 // consider extra to be used up, since we encoded it
299 self.extra_input_occupied_len = 0;
300 // don't clobber where we just encoded to
301 encoded_size = 4;
302 // and don't read more than can be encoded
303 max_input_len = MAX_INPUT_LEN - MIN_ENCODE_CHUNK_SIZE;
304
305 // fall through to normal encoding
306 } else {
307 // `extra` and `input` are non empty, but `|extra| + |input| < 3`, so there must be
308 // 1 byte in each.
309 debug_assert_eq!(1, input.len());
310 debug_assert_eq!(1, self.extra_input_occupied_len);
311
312 self.extra_input[self.extra_input_occupied_len] = input[0];
313 self.extra_input_occupied_len += 1;
314 return Ok(1);
315 };
316 } else if input.len() < MIN_ENCODE_CHUNK_SIZE {
317 // `extra` is empty, and `input` fits inside it
318 self.extra_input[0..input.len()].copy_from_slice(input);
319 self.extra_input_occupied_len = input.len();
320 return Ok(input.len());
321 };
322
323 // either 0 or 1 complete chunks encoded from extra
324 debug_assert!(encoded_size == 0 || encoded_size == 4);
325 debug_assert!(
326 // didn't encode extra input
327 MAX_INPUT_LEN == max_input_len
328 // encoded one triple
329 || MAX_INPUT_LEN == max_input_len + MIN_ENCODE_CHUNK_SIZE
330 );
331
332 // encode complete triples only
333 let input_complete_chunks_len = input.len() - (input.len() % MIN_ENCODE_CHUNK_SIZE);
334 let input_chunks_to_encode_len = cmp::min(input_complete_chunks_len, max_input_len);
335 debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE);
336 debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE);
337
338 encoded_size += encode_to_slice(
339 &input[..(input_chunks_to_encode_len)],
340 &mut self.output[encoded_size..],
341 self.config.char_set.encode_table(),
342 );
343
344 // not updating `self.output_occupied_len` here because if the below write fails, it should
345 // "never take place" -- the buffer contents we encoded are ignored and perhaps retried
346 // later, if the consumer chooses.
347
348 self.write_to_delegate(encoded_size)
349 // no matter whether we wrote the full encoded buffer or not, we consumed the same
350 // input
351 .map(|_| extra_input_read_len + input_chunks_to_encode_len)
352 .map_err(|e| {
353 // in case we filled and encoded `extra`, reset extra_len
354 self.extra_input_occupied_len = orig_extra_len;
355
356 e
357 })
358 }
359
360 /// Because this is usually treated as OK to call multiple times, it will *not* flush any
361 /// incomplete chunks of input or write padding.
362 /// # Errors
363 ///
364 /// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
365 fn flush(&mut self) -> Result<()> {
366 self.write_all_encoded_output()?;
367 self.delegate
368 .as_mut()
369 .expect("Writer must be present")
370 .flush()
371 }
372}
373
374impl<W: Write> Drop for EncoderWriter<W> {
375 fn drop(&mut self) {
376 if !self.panicked {
377 // like `BufWriter`, ignore errors during drop
378 let _ = self.write_final_leftovers();
379 }
380 }
381}