base64/read/
decoder.rs

1use crate::{decode_config_slice, Config, DecodeError};
2use std::io::Read;
3use std::{cmp, fmt, io};
4
5// This should be large, but it has to fit on the stack.
6pub(crate) const BUF_SIZE: usize = 1024;
7
8// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
9const BASE64_CHUNK_SIZE: usize = 4;
10const DECODED_CHUNK_SIZE: usize = 3;
11
12/// A `Read` implementation that decodes base64 data read from an underlying reader.
13///
14/// # Examples
15///
16/// ```
17/// use std::io::Read;
18/// use std::io::Cursor;
19///
20/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
21/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
22/// let mut decoder = base64::read::DecoderReader::new(
23///     &mut wrapped_reader, base64::STANDARD);
24///
25/// // handle errors as you normally would
26/// let mut result = Vec::new();
27/// decoder.read_to_end(&mut result).unwrap();
28///
29/// assert_eq!(b"asdf", &result[..]);
30///
31/// ```
32pub struct DecoderReader<'a, R: 'a + io::Read> {
33    config: Config,
34    /// Where b64 data is read from
35    r: &'a mut R,
36
37    // Holds b64 data read from the delegate reader.
38    b64_buffer: [u8; BUF_SIZE],
39    // The start of the pending buffered data in b64_buffer.
40    b64_offset: usize,
41    // The amount of buffered b64 data.
42    b64_len: usize,
43    // Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
44    // decoded chunk in to, we have to be able to hang on to a few decoded bytes.
45    // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
46    // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
47    // into here, which seems like a lot of complexity for 1 extra byte of storage.
48    decoded_buffer: [u8; 3],
49    // index of start of decoded data
50    decoded_offset: usize,
51    // length of decoded data
52    decoded_len: usize,
53    // used to provide accurate offsets in errors
54    total_b64_decoded: usize,
55}
56
57impl<'a, R: io::Read> fmt::Debug for DecoderReader<'a, R> {
58    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
59        f.debug_struct("DecoderReader")
60            .field("config", &self.config)
61            .field("b64_offset", &self.b64_offset)
62            .field("b64_len", &self.b64_len)
63            .field("decoded_buffer", &self.decoded_buffer)
64            .field("decoded_offset", &self.decoded_offset)
65            .field("decoded_len", &self.decoded_len)
66            .field("total_b64_decoded", &self.total_b64_decoded)
67            .finish()
68    }
69}
70
71impl<'a, R: io::Read> DecoderReader<'a, R> {
72    /// Create a new decoder that will read from the provided reader `r`.
73    pub fn new(r: &'a mut R, config: Config) -> Self {
74        DecoderReader {
75            config,
76            r,
77            b64_buffer: [0; BUF_SIZE],
78            b64_offset: 0,
79            b64_len: 0,
80            decoded_buffer: [0; DECODED_CHUNK_SIZE],
81            decoded_offset: 0,
82            decoded_len: 0,
83            total_b64_decoded: 0,
84        }
85    }
86
87    /// Write as much as possible of the decoded buffer into the target buffer.
88    /// Must only be called when there is something to write and space to write into.
89    /// Returns a Result with the number of (decoded) bytes copied.
90    fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
91        debug_assert!(self.decoded_len > 0);
92        debug_assert!(buf.len() > 0);
93
94        let copy_len = cmp::min(self.decoded_len, buf.len());
95        debug_assert!(copy_len > 0);
96        debug_assert!(copy_len <= self.decoded_len);
97
98        buf[..copy_len].copy_from_slice(
99            &self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len],
100        );
101
102        self.decoded_offset += copy_len;
103        self.decoded_len -= copy_len;
104
105        debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
106
107        Ok(copy_len)
108    }
109
110    /// Read into the remaining space in the buffer after the current contents.
111    /// Must only be called when there is space to read into in the buffer.
112    /// Returns the number of bytes read.
113    fn read_from_delegate(&mut self) -> io::Result<usize> {
114        debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
115
116        let read = self
117            .r
118            .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
119        self.b64_len += read;
120
121        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
122
123        return Ok(read);
124    }
125
126    /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
127    /// caller's responsibility to choose the number of b64 bytes to decode correctly.
128    ///
129    /// Returns a Result with the number of decoded bytes written to `buf`.
130    fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> {
131        debug_assert!(self.b64_len >= num_bytes);
132        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
133        debug_assert!(buf.len() > 0);
134
135        let decoded = decode_config_slice(
136            &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
137            self.config,
138            &mut buf[..],
139        )
140        .map_err(|e| match e {
141            DecodeError::InvalidByte(offset, byte) => {
142                DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
143            }
144            DecodeError::InvalidLength => DecodeError::InvalidLength,
145            DecodeError::InvalidLastSymbol(offset, byte) => {
146                DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
147            }
148        })
149        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
150
151        self.total_b64_decoded += num_bytes;
152        self.b64_offset += num_bytes;
153        self.b64_len -= num_bytes;
154
155        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
156
157        Ok(decoded)
158    }
159}
160
161impl<'a, R: Read> Read for DecoderReader<'a, R> {
162    /// Decode input from the wrapped reader.
163    ///
164    /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
165    /// written in `buf`.
166    ///
167    /// Where possible, this function buffers base64 to minimize the number of read() calls to the
168    /// delegate reader.
169    ///
170    /// # Errors
171    ///
172    /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
173    /// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
174    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
175        if buf.len() == 0 {
176            return Ok(0);
177        }
178
179        // offset == BUF_SIZE when we copied it all last time
180        debug_assert!(self.b64_offset <= BUF_SIZE);
181        debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
182        debug_assert!(if self.b64_offset == BUF_SIZE {
183            self.b64_len == 0
184        } else {
185            self.b64_len <= BUF_SIZE
186        });
187
188        debug_assert!(if self.decoded_len == 0 {
189            // can be = when we were able to copy the complete chunk
190            self.decoded_offset <= DECODED_CHUNK_SIZE
191        } else {
192            self.decoded_offset < DECODED_CHUNK_SIZE
193        });
194
195        // We shouldn't ever decode into here when we can't immediately write at least one byte into
196        // the provided buf, so the effective length should only be 3 momentarily between when we
197        // decode and when we copy into the target buffer.
198        debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
199        debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
200
201        if self.decoded_len > 0 {
202            // we have a few leftover decoded bytes; flush that rather than pull in more b64
203            self.flush_decoded_buf(buf)
204        } else {
205            let mut at_eof = false;
206            while self.b64_len < BASE64_CHUNK_SIZE {
207                // Work around lack of copy_within, which is only present in 1.37
208                // Copy any bytes we have to the start of the buffer.
209                // We know we have < 1 chunk, so we can use a tiny tmp buffer.
210                let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE];
211                memmove_buf[..self.b64_len].copy_from_slice(
212                    &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len],
213                );
214                self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]);
215                self.b64_offset = 0;
216
217                // then fill in more data
218                let read = self.read_from_delegate()?;
219                if read == 0 {
220                    // we never pass in an empty buf, so 0 => we've hit EOF
221                    at_eof = true;
222                    break;
223                }
224            }
225
226            if self.b64_len == 0 {
227                debug_assert!(at_eof);
228                // we must be at EOF, and we have no data left to decode
229                return Ok(0);
230            };
231
232            debug_assert!(if at_eof {
233                // if we are at eof, we may not have a complete chunk
234                self.b64_len > 0
235            } else {
236                // otherwise, we must have at least one chunk
237                self.b64_len >= BASE64_CHUNK_SIZE
238            });
239
240            debug_assert_eq!(0, self.decoded_len);
241
242            if buf.len() < DECODED_CHUNK_SIZE {
243                // caller requested an annoyingly short read
244                // have to write to a tmp buf first to avoid double mutable borrow
245                let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
246                // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
247                // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
248                // tokens, not 1, since 1 token can't decode to 1 byte).
249                let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
250
251                let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
252                self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
253
254                self.decoded_offset = 0;
255                self.decoded_len = decoded;
256
257                // can be less than 3 on last block due to padding
258                debug_assert!(decoded <= 3);
259
260                self.flush_decoded_buf(buf)
261            } else {
262                let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
263                    .checked_mul(BASE64_CHUNK_SIZE)
264                    .expect("too many chunks");
265                debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
266
267                let b64_bytes_available_to_decode = if at_eof {
268                    self.b64_len
269                } else {
270                    // only use complete chunks
271                    self.b64_len - self.b64_len % 4
272                };
273
274                let actual_decode_len = cmp::min(
275                    b64_bytes_that_can_decode_into_buf,
276                    b64_bytes_available_to_decode,
277                );
278                self.decode_to_buf(actual_decode_len, buf)
279            }
280        }
281    }
282}