From f28abbb7f6b0ad12bd680650b4d47b84b635077e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20Frosteg=C3=A5rd?= Date: Fri, 3 Jul 2020 13:06:09 +0200 Subject: [PATCH] aquatic_http: fix issue with incorrect parsing of info_hash, peer_id --- TODO.md | 2 -- aquatic_http/src/lib/protocol/mod.rs | 51 ++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/TODO.md b/TODO.md index 816959d..7b2d931 100644 --- a/TODO.md +++ b/TODO.md @@ -12,8 +12,6 @@ what error return type to use then * compact peer representation in announce response: is implementation correct? * scrape info hash parsing: multiple ought to be accepted -* info hashes, peer ids: check that whole deserialization and url decoding - works as it should. There are suspicously many `\u{fffd}` * move stuff to common crate with ws: what about Request/InMessage etc? * don't overdo this diff --git a/aquatic_http/src/lib/protocol/mod.rs b/aquatic_http/src/lib/protocol/mod.rs index 28182be..933a9a9 100644 --- a/aquatic_http/src/lib/protocol/mod.rs +++ b/aquatic_http/src/lib/protocol/mod.rs @@ -148,10 +148,11 @@ impl Request { let mut split_parts= path.splitn(2, '?'); let path = split_parts.next()?; - let query_string = split_parts.next()?; + let query_string = Self::preprocess_query_string(split_parts.next()?); if path == "/announce" { - let result: Result = serde_urlencoded::from_str(query_string); + let result: Result = + serde_urlencoded::from_str(&query_string); if let Err(ref err) = result { log::debug!("error: {}", err); @@ -159,7 +160,8 @@ impl Request { result.ok().map(Request::Announce) } else { - let result: Result = serde_urlencoded::from_str(query_string); + let result: Result = + serde_urlencoded::from_str(&query_string); if let Err(ref err) = result { log::debug!("error: {}", err); @@ -168,6 +170,49 @@ impl Request { result.ok().map(Request::Scrape) } } + + /// The info hashes and peer id's that are received are url-encoded byte + /// by byte, e.g., %fa for byte 0xfa. However, they are parsed as an UTF-8 + /// string, meaning that non-ascii bytes are invalid characters. Therefore, + /// these bytes must be converted to their equivalent multi-byte UTF-8 + /// encodings first. + fn preprocess_query_string(query_string: &str) -> String { + let mut processed = String::new(); + + for (i, part) in query_string.split('%').enumerate(){ + println!("{}", part); + + if i == 0 { + processed.push_str(part); + } else if part.len() >= 2 { + let mut two_first = String::with_capacity(2); + let mut rest = String::new(); + + for (j, c) in part.chars().enumerate(){ + if j < 2 { + two_first.push(c); + } else { + rest.push(c); + } + } + + let byte = u8::from_str_radix(&two_first, 16).unwrap(); + + let mut tmp = [0u8; 4]; + + let slice = (byte as char).encode_utf8(&mut tmp); + + for byte in slice.bytes(){ + processed.push('%'); + processed.push_str(&format!("{:02x}", byte)); + } + + processed.push_str(&rest); + } + } + + processed + } }