base64/read/decoder.rs
1use crate::{decode_config_slice, Config, DecodeError};
2use std::io::Read;
3use std::{cmp, fmt, io};
4
5// This should be large, but it has to fit on the stack.
6pub(crate) const BUF_SIZE: usize = 1024;
7
8// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
9const BASE64_CHUNK_SIZE: usize = 4;
10const DECODED_CHUNK_SIZE: usize = 3;
11
12/// A `Read` implementation that decodes base64 data read from an underlying reader.
13///
14/// # Examples
15///
16/// ```
17/// use std::io::Read;
18/// use std::io::Cursor;
19///
20/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
21/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
22/// let mut decoder = base64::read::DecoderReader::new(
23/// &mut wrapped_reader, base64::STANDARD);
24///
25/// // handle errors as you normally would
26/// let mut result = Vec::new();
27/// decoder.read_to_end(&mut result).unwrap();
28///
29/// assert_eq!(b"asdf", &result[..]);
30///
31/// ```
32pub struct DecoderReader<'a, R: 'a + io::Read> {
33 config: Config,
34 /// Where b64 data is read from
35 r: &'a mut R,
36
37 // Holds b64 data read from the delegate reader.
38 b64_buffer: [u8; BUF_SIZE],
39 // The start of the pending buffered data in b64_buffer.
40 b64_offset: usize,
41 // The amount of buffered b64 data.
42 b64_len: usize,
43 // Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
44 // decoded chunk in to, we have to be able to hang on to a few decoded bytes.
45 // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
46 // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
47 // into here, which seems like a lot of complexity for 1 extra byte of storage.
48 decoded_buffer: [u8; 3],
49 // index of start of decoded data
50 decoded_offset: usize,
51 // length of decoded data
52 decoded_len: usize,
53 // used to provide accurate offsets in errors
54 total_b64_decoded: usize,
55}
56
57impl<'a, R: io::Read> fmt::Debug for DecoderReader<'a, R> {
58 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
59 f.debug_struct("DecoderReader")
60 .field("config", &self.config)
61 .field("b64_offset", &self.b64_offset)
62 .field("b64_len", &self.b64_len)
63 .field("decoded_buffer", &self.decoded_buffer)
64 .field("decoded_offset", &self.decoded_offset)
65 .field("decoded_len", &self.decoded_len)
66 .field("total_b64_decoded", &self.total_b64_decoded)
67 .finish()
68 }
69}
70
71impl<'a, R: io::Read> DecoderReader<'a, R> {
72 /// Create a new decoder that will read from the provided reader `r`.
73 pub fn new(r: &'a mut R, config: Config) -> Self {
74 DecoderReader {
75 config,
76 r,
77 b64_buffer: [0; BUF_SIZE],
78 b64_offset: 0,
79 b64_len: 0,
80 decoded_buffer: [0; DECODED_CHUNK_SIZE],
81 decoded_offset: 0,
82 decoded_len: 0,
83 total_b64_decoded: 0,
84 }
85 }
86
87 /// Write as much as possible of the decoded buffer into the target buffer.
88 /// Must only be called when there is something to write and space to write into.
89 /// Returns a Result with the number of (decoded) bytes copied.
90 fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
91 debug_assert!(self.decoded_len > 0);
92 debug_assert!(buf.len() > 0);
93
94 let copy_len = cmp::min(self.decoded_len, buf.len());
95 debug_assert!(copy_len > 0);
96 debug_assert!(copy_len <= self.decoded_len);
97
98 buf[..copy_len].copy_from_slice(
99 &self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len],
100 );
101
102 self.decoded_offset += copy_len;
103 self.decoded_len -= copy_len;
104
105 debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
106
107 Ok(copy_len)
108 }
109
110 /// Read into the remaining space in the buffer after the current contents.
111 /// Must only be called when there is space to read into in the buffer.
112 /// Returns the number of bytes read.
113 fn read_from_delegate(&mut self) -> io::Result<usize> {
114 debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
115
116 let read = self
117 .r
118 .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
119 self.b64_len += read;
120
121 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
122
123 return Ok(read);
124 }
125
126 /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
127 /// caller's responsibility to choose the number of b64 bytes to decode correctly.
128 ///
129 /// Returns a Result with the number of decoded bytes written to `buf`.
130 fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> {
131 debug_assert!(self.b64_len >= num_bytes);
132 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
133 debug_assert!(buf.len() > 0);
134
135 let decoded = decode_config_slice(
136 &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
137 self.config,
138 &mut buf[..],
139 )
140 .map_err(|e| match e {
141 DecodeError::InvalidByte(offset, byte) => {
142 DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
143 }
144 DecodeError::InvalidLength => DecodeError::InvalidLength,
145 DecodeError::InvalidLastSymbol(offset, byte) => {
146 DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
147 }
148 })
149 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
150
151 self.total_b64_decoded += num_bytes;
152 self.b64_offset += num_bytes;
153 self.b64_len -= num_bytes;
154
155 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
156
157 Ok(decoded)
158 }
159}
160
161impl<'a, R: Read> Read for DecoderReader<'a, R> {
162 /// Decode input from the wrapped reader.
163 ///
164 /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
165 /// written in `buf`.
166 ///
167 /// Where possible, this function buffers base64 to minimize the number of read() calls to the
168 /// delegate reader.
169 ///
170 /// # Errors
171 ///
172 /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
173 /// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
174 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
175 if buf.len() == 0 {
176 return Ok(0);
177 }
178
179 // offset == BUF_SIZE when we copied it all last time
180 debug_assert!(self.b64_offset <= BUF_SIZE);
181 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
182 debug_assert!(if self.b64_offset == BUF_SIZE {
183 self.b64_len == 0
184 } else {
185 self.b64_len <= BUF_SIZE
186 });
187
188 debug_assert!(if self.decoded_len == 0 {
189 // can be = when we were able to copy the complete chunk
190 self.decoded_offset <= DECODED_CHUNK_SIZE
191 } else {
192 self.decoded_offset < DECODED_CHUNK_SIZE
193 });
194
195 // We shouldn't ever decode into here when we can't immediately write at least one byte into
196 // the provided buf, so the effective length should only be 3 momentarily between when we
197 // decode and when we copy into the target buffer.
198 debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
199 debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
200
201 if self.decoded_len > 0 {
202 // we have a few leftover decoded bytes; flush that rather than pull in more b64
203 self.flush_decoded_buf(buf)
204 } else {
205 let mut at_eof = false;
206 while self.b64_len < BASE64_CHUNK_SIZE {
207 // Work around lack of copy_within, which is only present in 1.37
208 // Copy any bytes we have to the start of the buffer.
209 // We know we have < 1 chunk, so we can use a tiny tmp buffer.
210 let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE];
211 memmove_buf[..self.b64_len].copy_from_slice(
212 &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len],
213 );
214 self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]);
215 self.b64_offset = 0;
216
217 // then fill in more data
218 let read = self.read_from_delegate()?;
219 if read == 0 {
220 // we never pass in an empty buf, so 0 => we've hit EOF
221 at_eof = true;
222 break;
223 }
224 }
225
226 if self.b64_len == 0 {
227 debug_assert!(at_eof);
228 // we must be at EOF, and we have no data left to decode
229 return Ok(0);
230 };
231
232 debug_assert!(if at_eof {
233 // if we are at eof, we may not have a complete chunk
234 self.b64_len > 0
235 } else {
236 // otherwise, we must have at least one chunk
237 self.b64_len >= BASE64_CHUNK_SIZE
238 });
239
240 debug_assert_eq!(0, self.decoded_len);
241
242 if buf.len() < DECODED_CHUNK_SIZE {
243 // caller requested an annoyingly short read
244 // have to write to a tmp buf first to avoid double mutable borrow
245 let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
246 // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
247 // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
248 // tokens, not 1, since 1 token can't decode to 1 byte).
249 let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
250
251 let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
252 self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
253
254 self.decoded_offset = 0;
255 self.decoded_len = decoded;
256
257 // can be less than 3 on last block due to padding
258 debug_assert!(decoded <= 3);
259
260 self.flush_decoded_buf(buf)
261 } else {
262 let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
263 .checked_mul(BASE64_CHUNK_SIZE)
264 .expect("too many chunks");
265 debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
266
267 let b64_bytes_available_to_decode = if at_eof {
268 self.b64_len
269 } else {
270 // only use complete chunks
271 self.b64_len - self.b64_len % 4
272 };
273
274 let actual_decode_len = cmp::min(
275 b64_bytes_that_can_decode_into_buf,
276 b64_bytes_available_to_decode,
277 );
278 self.decode_to_buf(actual_decode_len, buf)
279 }
280 }
281 }
282}