base64/
chunked_encoder.rs

1use crate::{
2    encode::{add_padding, encode_to_slice},
3    Config,
4};
5#[cfg(any(feature = "alloc", feature = "std", test))]
6use alloc::string::String;
7use core::cmp;
8#[cfg(any(feature = "alloc", feature = "std", test))]
9use core::str;
10
11/// The output mechanism for ChunkedEncoder's encoded bytes.
12pub trait Sink {
13    type Error;
14
15    /// Handle a chunk of encoded base64 data (as UTF-8 bytes)
16    fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>;
17}
18
19const BUF_SIZE: usize = 1024;
20
21/// A base64 encoder that emits encoded bytes in chunks without heap allocation.
22pub struct ChunkedEncoder {
23    config: Config,
24    max_input_chunk_len: usize,
25}
26
27impl ChunkedEncoder {
28    pub fn new(config: Config) -> ChunkedEncoder {
29        ChunkedEncoder {
30            config,
31            max_input_chunk_len: max_input_length(BUF_SIZE, config),
32        }
33    }
34
35    pub fn encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error> {
36        let mut encode_buf: [u8; BUF_SIZE] = [0; BUF_SIZE];
37        let encode_table = self.config.char_set.encode_table();
38
39        let mut input_index = 0;
40
41        while input_index < bytes.len() {
42            // either the full input chunk size, or it's the last iteration
43            let input_chunk_len = cmp::min(self.max_input_chunk_len, bytes.len() - input_index);
44
45            let chunk = &bytes[input_index..(input_index + input_chunk_len)];
46
47            let mut b64_bytes_written = encode_to_slice(chunk, &mut encode_buf, encode_table);
48
49            input_index += input_chunk_len;
50            let more_input_left = input_index < bytes.len();
51
52            if self.config.pad && !more_input_left {
53                // no more input, add padding if needed. Buffer will have room because
54                // max_input_length leaves room for it.
55                b64_bytes_written += add_padding(bytes.len(), &mut encode_buf[b64_bytes_written..]);
56            }
57
58            sink.write_encoded_bytes(&encode_buf[0..b64_bytes_written])?;
59        }
60
61        Ok(())
62    }
63}
64
65/// Calculate the longest input that can be encoded for the given output buffer size.
66///
67/// If the config requires padding, two bytes of buffer space will be set aside so that the last
68/// chunk of input can be encoded safely.
69///
70/// The input length will always be a multiple of 3 so that no encoding state has to be carried over
71/// between chunks.
72fn max_input_length(encoded_buf_len: usize, config: Config) -> usize {
73    let effective_buf_len = if config.pad {
74        // make room for padding
75        encoded_buf_len
76            .checked_sub(2)
77            .expect("Don't use a tiny buffer")
78    } else {
79        encoded_buf_len
80    };
81
82    // No padding, so just normal base64 expansion.
83    (effective_buf_len / 4) * 3
84}
85
86// A really simple sink that just appends to a string
87#[cfg(any(feature = "alloc", feature = "std", test))]
88pub(crate) struct StringSink<'a> {
89    string: &'a mut String,
90}
91
92#[cfg(any(feature = "alloc", feature = "std", test))]
93impl<'a> StringSink<'a> {
94    pub(crate) fn new(s: &mut String) -> StringSink {
95        StringSink { string: s }
96    }
97}
98
99#[cfg(any(feature = "alloc", feature = "std", test))]
100impl<'a> Sink for StringSink<'a> {
101    type Error = ();
102
103    fn write_encoded_bytes(&mut self, s: &[u8]) -> Result<(), Self::Error> {
104        self.string.push_str(str::from_utf8(s).unwrap());
105
106        Ok(())
107    }
108}
109
110#[cfg(test)]
111pub mod tests {
112    use super::*;
113    use crate::{encode_config_buf, tests::random_config, CharacterSet, STANDARD};
114
115    use rand::{
116        distributions::{Distribution, Uniform},
117        FromEntropy, Rng,
118    };
119
120    #[test]
121    fn chunked_encode_empty() {
122        assert_eq!("", chunked_encode_str(&[], STANDARD));
123    }
124
125    #[test]
126    fn chunked_encode_intermediate_fast_loop() {
127        // > 8 bytes input, will enter the pretty fast loop
128        assert_eq!(
129            "Zm9vYmFyYmF6cXV4",
130            chunked_encode_str(b"foobarbazqux", STANDARD)
131        );
132    }
133
134    #[test]
135    fn chunked_encode_fast_loop() {
136        // > 32 bytes input, will enter the uber fast loop
137        assert_eq!(
138            "Zm9vYmFyYmF6cXV4cXV1eGNvcmdlZ3JhdWx0Z2FycGx5eg==",
139            chunked_encode_str(b"foobarbazquxquuxcorgegraultgarplyz", STANDARD)
140        );
141    }
142
143    #[test]
144    fn chunked_encode_slow_loop_only() {
145        // < 8 bytes input, slow loop only
146        assert_eq!("Zm9vYmFy", chunked_encode_str(b"foobar", STANDARD));
147    }
148
149    #[test]
150    fn chunked_encode_matches_normal_encode_random_string_sink() {
151        let helper = StringSinkTestHelper;
152        chunked_encode_matches_normal_encode_random(&helper);
153    }
154
155    #[test]
156    fn max_input_length_no_pad() {
157        let config = config_with_pad(false);
158        assert_eq!(768, max_input_length(1024, config));
159    }
160
161    #[test]
162    fn max_input_length_with_pad_decrements_one_triple() {
163        let config = config_with_pad(true);
164        assert_eq!(765, max_input_length(1024, config));
165    }
166
167    #[test]
168    fn max_input_length_with_pad_one_byte_short() {
169        let config = config_with_pad(true);
170        assert_eq!(765, max_input_length(1025, config));
171    }
172
173    #[test]
174    fn max_input_length_with_pad_fits_exactly() {
175        let config = config_with_pad(true);
176        assert_eq!(768, max_input_length(1026, config));
177    }
178
179    #[test]
180    fn max_input_length_cant_use_extra_single_encoded_byte() {
181        let config = Config::new(crate::CharacterSet::Standard, false);
182        assert_eq!(300, max_input_length(401, config));
183    }
184
185    pub fn chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S) {
186        let mut input_buf: Vec<u8> = Vec::new();
187        let mut output_buf = String::new();
188        let mut rng = rand::rngs::SmallRng::from_entropy();
189        let input_len_range = Uniform::new(1, 10_000);
190
191        for _ in 0..5_000 {
192            input_buf.clear();
193            output_buf.clear();
194
195            let buf_len = input_len_range.sample(&mut rng);
196            for _ in 0..buf_len {
197                input_buf.push(rng.gen());
198            }
199
200            let config = random_config(&mut rng);
201
202            let chunk_encoded_string = sink_test_helper.encode_to_string(config, &input_buf);
203            encode_config_buf(&input_buf, config, &mut output_buf);
204
205            assert_eq!(
206                output_buf, chunk_encoded_string,
207                "input len={}, config: pad={}",
208                buf_len, config.pad
209            );
210        }
211    }
212
213    fn chunked_encode_str(bytes: &[u8], config: Config) -> String {
214        let mut s = String::new();
215        {
216            let mut sink = StringSink::new(&mut s);
217            let encoder = ChunkedEncoder::new(config);
218            encoder.encode(bytes, &mut sink).unwrap();
219        }
220
221        return s;
222    }
223
224    fn config_with_pad(pad: bool) -> Config {
225        Config::new(CharacterSet::Standard, pad)
226    }
227
228    // An abstraction around sinks so that we can have tests that easily to any sink implementation
229    pub trait SinkTestHelper {
230        fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String;
231    }
232
233    struct StringSinkTestHelper;
234
235    impl SinkTestHelper for StringSinkTestHelper {
236        fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String {
237            let encoder = ChunkedEncoder::new(config);
238            let mut s = String::new();
239            {
240                let mut sink = StringSink::new(&mut s);
241                encoder.encode(bytes, &mut sink).unwrap();
242            }
243
244            s
245        }
246    }
247}