use anyhow::Context; use smartstring::{SmartString, LazyCompact}; use super::common::*; use super::utils::*; #[derive(Debug, Clone, PartialEq, Eq)] pub struct AnnounceRequest { pub info_hash: InfoHash, pub peer_id: PeerId, pub port: u16, pub bytes_left: usize, pub event: AnnounceEvent, pub compact: bool, /// Number of response peers wanted pub numwant: Option, pub key: Option>, } impl AnnounceRequest { pub fn as_bytes(&self) -> Vec { let mut bytes = Vec::with_capacity( 24 + 60 + 9 + 60 + 6 + 5 + // high estimate 6 + 2 + // estimate 14 + // FIXME event 9 + 1 + 20 + // numwant bad estimate 20 + // key bad estimate 13 ); bytes.extend_from_slice(b"GET /announce?info_hash="); urlencode_20_bytes(self.info_hash.0, &mut bytes); bytes.extend_from_slice(b"&peer_id="); urlencode_20_bytes(self.info_hash.0, &mut bytes); bytes.extend_from_slice(b"&port="); let _ = itoa::write(&mut bytes, self.port); bytes.extend_from_slice(b"&left="); let _ = itoa::write(&mut bytes, self.bytes_left); bytes.extend_from_slice(b"&event=started"); // FIXME bytes.extend_from_slice(b"&compact="); let _ = itoa::write(&mut bytes, self.compact as u8); if let Some(numwant) = self.numwant { bytes.extend_from_slice(b"&numwant="); let _ = itoa::write(&mut bytes, numwant); } if let Some(ref key) = self.key { bytes.extend_from_slice(b"&key="); bytes.extend_from_slice(key.as_str().as_bytes()); } bytes.extend_from_slice(b" HTTP/1.1\r\n\r\n"); bytes } } #[derive(Debug, Clone, PartialEq, Eq)] pub struct ScrapeRequest { pub info_hashes: Vec, } impl ScrapeRequest { pub fn as_bytes(&self) -> Vec { let mut bytes = Vec::new(); bytes.extend_from_slice(b"GET /scrape?"); let mut first = true; for info_hash in self.info_hashes.iter() { if !first { bytes.push(b'&') } bytes.extend_from_slice(b"info_hash="); urlencode_20_bytes(info_hash.0, &mut bytes); first = false; } bytes.extend_from_slice(b" HTTP/1.1\r\n\r\n"); bytes } } #[derive(Debug)] pub enum RequestParseError { NeedMoreData, Invalid(anyhow::Error), } #[derive(Debug, Clone, PartialEq, Eq)] pub enum Request { Announce(AnnounceRequest), Scrape(ScrapeRequest), } impl Request { /// Parse Request from HTTP request bytes pub fn from_bytes(bytes: &[u8]) -> Result { let mut headers = [httparse::EMPTY_HEADER; 16]; let mut http_request = httparse::Request::new(&mut headers); let path = match http_request.parse(bytes){ Ok(httparse::Status::Complete(_)) => { if let Some(path) = http_request.path { path } else { return Err(RequestParseError::Invalid( anyhow::anyhow!("no http path") )) } }, Ok(httparse::Status::Partial) => { if let Some(path) = http_request.path { path } else { return Err(RequestParseError::NeedMoreData) } } Err(err) => { return Err(RequestParseError::Invalid( anyhow::Error::from(err) )) }, }; Self::from_http_get_path(path).map_err(RequestParseError::Invalid) } /// Parse Request from http path (GET `/announce?info_hash=...`) /// /// Existing serde-url decode crates were insufficient, so the decision was /// made to create a custom parser. serde_urlencoded doesn't support multiple /// values with same key, and serde_qs pulls in lots of dependencies. Both /// would need preprocessing for the binary format used for info_hash and /// peer_id. pub fn from_http_get_path(path: &str) -> anyhow::Result { ::log::debug!("request GET path: {}", path); let mut split_parts= path.splitn(2, '?'); let location = split_parts.next() .with_context(|| "no location")?; let query_string = split_parts.next() .with_context(|| "no query string")?; // -- Parse key-value pairs let mut info_hashes = Vec::new(); let mut opt_peer_id = None; let mut opt_port = None; let mut opt_bytes_left = None; let mut event = AnnounceEvent::default(); let mut opt_numwant = None; let mut opt_key = None; let query_string_bytes = query_string.as_bytes(); let mut ampersand_iter = ::memchr::memchr_iter(b'&', query_string_bytes); let mut position = 0usize; for equal_sign_index in ::memchr::memchr_iter(b'=', query_string_bytes){ let segment_end = ampersand_iter.next() .unwrap_or(query_string.len()); let key = query_string.get(position..equal_sign_index) .with_context(|| format!("no key at {}..{}", position, equal_sign_index))?; let value = query_string.get(equal_sign_index + 1..segment_end) .with_context(|| format!("no value at {}..{}", equal_sign_index + 1, segment_end))?; match key { "info_hash" => { let value = Self::urldecode_20_bytes(value)?; info_hashes.push(InfoHash(value)); }, "peer_id" => { let value = Self::urldecode_20_bytes(value)?; opt_peer_id = Some(PeerId(value)); }, "port" => { opt_port = Some(value.parse::().with_context(|| "parse port")?); }, "left" => { opt_bytes_left = Some(value.parse::().with_context(|| "parse left")?); }, "event" => { event = value.parse::().map_err(|err| anyhow::anyhow!("invalid event: {}", err) )?; }, "compact" => { if value != "1" { return Err(anyhow::anyhow!("compact set, but not to 1")); } }, "numwant" => { opt_numwant = Some(value.parse::().with_context(|| "parse numwant")?); }, "key" => { if value.len() > 100 { return Err(anyhow::anyhow!("'key' is too long")) } opt_key = Some(value.into()); }, k => { ::log::info!("ignored unrecognized key: {}", k) } } if segment_end == query_string.len(){ break } else { position = segment_end + 1; } } // -- Put together request if location == "/announce" { let request = AnnounceRequest { info_hash: info_hashes.pop().with_context(|| "no info_hash")?, peer_id: opt_peer_id.with_context(|| "no peer_id")?, port: opt_port.with_context(|| "no port")?, bytes_left: opt_bytes_left.with_context(|| "no left")?, event, compact: true, numwant: opt_numwant, key: opt_key, }; Ok(Request::Announce(request)) } else { let request = ScrapeRequest { info_hashes, }; Ok(Request::Scrape(request)) } } /// The info hashes and peer id's that are received are url-encoded byte /// by byte, e.g., %fa for byte 0xfa. However, they need to be parsed as /// UTF-8 string, meaning that non-ascii bytes are invalid characters. /// Therefore, these bytes must be converted to their equivalent multi-byte /// UTF-8 encodings. fn urldecode(value: &str) -> anyhow::Result { let mut processed = String::new(); for (i, part) in value.split('%').enumerate(){ if i == 0 { processed.push_str(part); } else if part.len() >= 2 { let mut two_first = String::with_capacity(2); for (j, c) in part.chars().enumerate(){ if j == 0 { two_first.push(c); } else if j == 1 { two_first.push(c); let byte = u8::from_str_radix(&two_first, 16)?; processed.push(byte as char); } else { processed.push(c); } } } else { return Err(anyhow::anyhow!( "url decode: too few characters in '%{}'", part )) } } Ok(processed) } /// Quite a bit faster than non-memchr version fn urldecode_memchr(value: &str) -> anyhow::Result> { let mut processed = SmartString::new(); let bytes = value.as_bytes(); let iter = ::memchr::memchr_iter(b'%', bytes); let mut str_index_after_hex = 0usize; for i in iter { match (bytes.get(i), bytes.get(i + 1), bytes.get(i + 2)){ (Some(0..=127), Some(0..=127), Some(0..=127)) => { if i > 0 { processed.push_str(&value[str_index_after_hex..i]); } str_index_after_hex = i + 3; let hex = &value[i + 1..i + 3]; let byte = u8::from_str_radix(&hex, 16)?; processed.push(byte as char); }, _ => { return Err(anyhow::anyhow!( "invalid urlencoded segment at byte {} in {}", i, value )); } } } if let Some(rest_of_str) = value.get(str_index_after_hex..){ processed.push_str(rest_of_str); } Ok(processed) } fn urldecode_20_bytes(value: &str) -> anyhow::Result<[u8; 20]> { let mut out_arr = [0u8; 20]; let mut chars = value.chars(); for i in 0..20 { let c = chars.next() .with_context(|| "less than 20 chars")?; if c as u32 > 255 { return Err(anyhow::anyhow!( "character not in single byte range: {:#?}", c )); } if c == '%' { let first = chars.next() .with_context(|| "missing first urldecode char in pair")?; let second = chars.next() .with_context(|| "missing second urldecode char in pair")?; let hex = [first as u8, second as u8]; hex::decode_to_slice(&hex, &mut out_arr[i..i+1]).map_err(|err| anyhow::anyhow!("hex decode error: {:?}", err) )?; } else { if c as u32 > 255 { return Err(anyhow::anyhow!( "character not in single byte range: {:#?}", c )); } out_arr[i] = c as u8; } } if chars.next().is_some(){ return Err(anyhow::anyhow!("more than 20 chars")); } Ok(out_arr) } pub fn as_bytes(&self) -> Vec { match self { Self::Announce(r) => r.as_bytes(), Self::Scrape(r) => r.as_bytes(), } } } #[cfg(test)] mod tests { use super::*; static ANNOUNCE_REQUEST_PATH: &str = "/announce?info_hash=%04%0bkV%3f%5cr%14%a6%b7%98%adC%c3%c9.%40%24%00%b9&peer_id=-ABC940-5ert69muw5t8&port=12345&uploaded=0&downloaded=0&left=1&numwant=0&key=4ab4b877&compact=1&supportcrypto=1&event=started"; static SCRAPE_REQUEST_PATH: &str = "/scrape?info_hash=%04%0bkV%3f%5cr%14%a6%b7%98%adC%c3%c9.%40%24%00%b9"; static REFERENCE_INFO_HASH: [u8; 20] = [0x04, 0x0b, b'k', b'V', 0x3f, 0x5c, b'r', 0x14, 0xa6, 0xb7, 0x98, 0xad, b'C', 0xc3, 0xc9, b'.', 0x40, 0x24, 0x00, 0xb9]; static REFERENCE_PEER_ID: [u8; 20] = [b'-', b'A', b'B', b'C', b'9', b'4', b'0', b'-', b'5', b'e', b'r', b't', b'6', b'9', b'm', b'u', b'w', b'5', b't', b'8']; #[test] fn test_urldecode(){ let f = Request::urldecode_memchr; assert_eq!(f("").unwrap(), "".to_string()); assert_eq!(f("abc").unwrap(), "abc".to_string()); assert_eq!(f("%21").unwrap(), "!".to_string()); assert_eq!(f("%21%3D").unwrap(), "!=".to_string()); assert_eq!(f("abc%21def%3Dghi").unwrap(), "abc!def=ghi".to_string()); assert!(f("%").is_err()); assert!(f("%å7").is_err()); } fn get_reference_announce_request() -> Request { Request::Announce(AnnounceRequest { info_hash: InfoHash(REFERENCE_INFO_HASH), peer_id: PeerId(REFERENCE_PEER_ID), port: 12345, bytes_left: 1, event: AnnounceEvent::Started, compact: true, numwant: Some(0), key: Some("4ab4b877".into()) }) } #[test] fn test_announce_request_from_bytes(){ let mut bytes = Vec::new(); bytes.extend_from_slice(b"GET "); bytes.extend_from_slice(&ANNOUNCE_REQUEST_PATH.as_bytes()); bytes.extend_from_slice(b" HTTP/1.1\r\n\r\n"); let parsed_request = Request::from_bytes(&bytes[..]).unwrap(); let reference_request = get_reference_announce_request(); assert_eq!(parsed_request, reference_request); } #[test] fn test_scrape_request_from_bytes(){ let mut bytes = Vec::new(); bytes.extend_from_slice(b"GET "); bytes.extend_from_slice(&SCRAPE_REQUEST_PATH.as_bytes()); bytes.extend_from_slice(b" HTTP/1.1\r\n\r\n"); let parsed_request = Request::from_bytes(&bytes[..]).unwrap(); let reference_request = Request::Scrape(ScrapeRequest { info_hashes: vec![InfoHash(REFERENCE_INFO_HASH)], }); assert_eq!(parsed_request, reference_request); } }