crux_http/response/
decode.rs

1use http_types::Error;
2
3use std::fmt;
4use std::io;
5
6/// An error occurred while decoding a response body to a string.
7///
8/// The error carries the encoding that was used to attempt to decode the body, and the raw byte
9/// contents of the body. This can be used to treat un-decodable bodies specially or to implement a
10/// fallback parsing strategy.
11#[derive(Clone)]
12pub struct DecodeError {
13    /// The name of the encoding that was used to try to decode the input.
14    pub encoding: String,
15    /// The input data as bytes.
16    pub data: Vec<u8>,
17}
18
19// Override debug output so you don't get each individual byte in `data` printed out separately,
20// because it can be many megabytes large. The actual content is not that interesting anyways
21// and can be accessed manually if it is required.
22impl fmt::Debug for DecodeError {
23    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24        f.debug_struct("DecodeError")
25            .field("encoding", &self.encoding)
26            // Perhaps we can output the first N bytes of the response in the future
27            .field("data", &format!("{} bytes", self.data.len()))
28            .finish()
29    }
30}
31
32impl fmt::Display for DecodeError {
33    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34        write!(f, "could not decode body as {}", &self.encoding)
35    }
36}
37
38impl std::error::Error for DecodeError {}
39
40/// Check if an encoding label refers to the UTF-8 encoding.
41#[allow(dead_code)]
42fn is_utf8_encoding(encoding_label: &str) -> bool {
43    encoding_label.eq_ignore_ascii_case("utf-8")
44        || encoding_label.eq_ignore_ascii_case("utf8")
45        || encoding_label.eq_ignore_ascii_case("unicode-1-1-utf-8")
46}
47
48/// Decode a response body as utf-8.
49///
50/// # Errors
51///
52/// If the body cannot be decoded as utf-8, this function returns an `std::io::Error` of kind
53/// `std::io::ErrorKind::InvalidData`, carrying a `DecodeError` struct.
54#[cfg(not(feature = "encoding"))]
55pub fn decode_body(bytes: Vec<u8>, content_encoding: Option<&str>) -> Result<String, Error> {
56    if is_utf8_encoding(content_encoding.unwrap_or("utf-8")) {
57        Ok(String::from_utf8(bytes).map_err(|err| {
58            let err = DecodeError {
59                encoding: "utf-8".to_string(),
60                data: err.into_bytes(),
61            };
62            io::Error::new(io::ErrorKind::InvalidData, err)
63        })?)
64    } else {
65        let err = DecodeError {
66            encoding: "utf-8".to_string(),
67            data: bytes,
68        };
69        Err(io::Error::new(io::ErrorKind::InvalidData, err).into())
70    }
71}
72
73/// Decode a response body as the given content type.
74///
75/// If the input bytes are valid utf-8, this does not make a copy.
76///
77/// # Errors
78///
79/// If an unsupported encoding is requested, or the body does not conform to the requested
80/// encoding, this function returns an `std::io::Error` of kind `std::io::ErrorKind::InvalidData`,
81/// carrying a `DecodeError` struct.
82#[cfg(all(feature = "encoding", not(target_arch = "wasm32")))]
83pub fn decode_body(bytes: Vec<u8>, content_encoding: Option<&str>) -> Result<String, Error> {
84    use encoding_rs::Encoding;
85    use std::borrow::Cow;
86
87    let content_encoding = content_encoding.unwrap_or("utf-8");
88    if let Some(encoding) = Encoding::for_label(content_encoding.as_bytes()) {
89        let (decoded, encoding_used, failed) = encoding.decode(&bytes);
90        if failed {
91            let err = DecodeError {
92                encoding: encoding_used.name().into(),
93                data: bytes,
94            };
95            Err(io::Error::new(io::ErrorKind::InvalidData, err).into())
96        } else {
97            Ok(match decoded {
98                // If encoding_rs returned a `Cow::Borrowed`, the bytes are guaranteed to be valid
99                // UTF-8, by virtue of being UTF-8 or being in the subset of ASCII that is the same
100                // in UTF-8.
101                Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(bytes) },
102                Cow::Owned(string) => string,
103            })
104        }
105    } else {
106        let err = DecodeError {
107            encoding: content_encoding.to_string(),
108            data: bytes,
109        };
110        Err(io::Error::new(io::ErrorKind::InvalidData, err).into())
111    }
112}
113
114/// Decode a response body as the given content type.
115///
116/// This always makes a copy. (It could be optimized to avoid the copy if the encoding is utf-8.)
117///
118/// # Errors
119///
120/// If an unsupported encoding is requested, or the body does not conform to the requested
121/// encoding, this function returns an `std::io::Error` of kind `std::io::ErrorKind::InvalidData`,
122/// carrying a `DecodeError` struct.
123#[cfg(all(feature = "encoding", target_arch = "wasm32"))]
124pub fn decode_body(mut bytes: Vec<u8>, content_encoding: Option<&str>) -> Result<String, Error> {
125    use web_sys::TextDecoder;
126
127    // Encoding names are always valid ASCII, so we can avoid including casing mapping tables
128    let content_encoding = content_encoding.unwrap_or("utf-8").to_ascii_lowercase();
129    if is_utf8_encoding(&content_encoding) {
130        return String::from_utf8(bytes)
131            .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err).into());
132    }
133
134    let decoder = TextDecoder::new_with_label(&content_encoding).unwrap();
135
136    Ok(decoder.decode_with_u8_array(&mut bytes).map_err(|_| {
137        let err = DecodeError {
138            encoding: content_encoding.to_string(),
139            data: bytes,
140        };
141        io::Error::new(io::ErrorKind::InvalidData, err)
142    })?)
143}
144
145#[cfg(test)]
146mod decode_tests {
147    use super::decode_body;
148
149    #[test]
150    fn utf8() {
151        let input = "Rød grød med fløde";
152        assert_eq!(
153            decode_body(input.as_bytes().to_vec(), Some("utf-8")).unwrap(),
154            input,
155            "Parses utf-8"
156        );
157    }
158
159    #[test]
160    fn default_utf8() {
161        let input = "Rød grød med fløde";
162        assert_eq!(
163            decode_body(input.as_bytes().to_vec(), None).unwrap(),
164            input,
165            "Defaults to utf-8"
166        );
167    }
168
169    #[test]
170    fn euc_kr() {
171        let input = vec![
172            0xb3, 0xbb, 0x20, 0xc7, 0xb0, 0xc0, 0xb8, 0xb7, 0xce, 0x20, 0xb5, 0xb9, 0xbe, 0xc6,
173            0xbf, 0xc0, 0xb6, 0xf3, 0x2c, 0x20, 0xb3, 0xbb, 0x20, 0xbe, 0xc8, 0xbf, 0xa1, 0xbc,
174            0xad, 0x20, 0xc0, 0xe1, 0xb5, 0xe9, 0xb0, 0xc5, 0xb6, 0xf3,
175        ];
176
177        let result = decode_body(input, Some("euc-kr"));
178        if cfg!(feature = "encoding") {
179            assert_eq!(result.unwrap(), "내 품으로 돌아오라, 내 안에서 잠들거라");
180        } else {
181            assert!(result.is_err(), "Only utf-8 is supported");
182        }
183    }
184}