From f28abbb7f6b0ad12bd680650b4d47b84b635077e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Frosteg=C3=A5rd?= <joakim.frostegard@gmail.com>
Date: Fri, 3 Jul 2020 13:06:09 +0200
Subject: [PATCH] aquatic_http: fix issue with incorrect parsing of info_hash,
 peer_id

---
 TODO.md                              |  2 --
 aquatic_http/src/lib/protocol/mod.rs | 51 ++++++++++++++++++++++++++--
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/TODO.md b/TODO.md
index 816959d..7b2d931 100644
--- a/TODO.md
+++ b/TODO.md
@@ -12,8 +12,6 @@
   what error return type to use then
 * compact peer representation in announce response: is implementation correct?
 * scrape info hash parsing: multiple ought to be accepted
-* info hashes, peer ids: check that whole deserialization and url decoding
-  works as it should. There are suspicously many `\u{fffd}`
 * move stuff to common crate with ws: what about Request/InMessage etc?
   * don't overdo this
 
diff --git a/aquatic_http/src/lib/protocol/mod.rs b/aquatic_http/src/lib/protocol/mod.rs
index 28182be..933a9a9 100644
--- a/aquatic_http/src/lib/protocol/mod.rs
+++ b/aquatic_http/src/lib/protocol/mod.rs
@@ -148,10 +148,11 @@ impl Request {
         let mut split_parts= path.splitn(2, '?');
 
         let path = split_parts.next()?;
-        let query_string = split_parts.next()?;
+        let query_string = Self::preprocess_query_string(split_parts.next()?);
 
         if path == "/announce" {
-            let result: Result<AnnounceRequest, serde_urlencoded::de::Error> = serde_urlencoded::from_str(query_string);
+            let result: Result<AnnounceRequest, serde_urlencoded::de::Error> =
+                serde_urlencoded::from_str(&query_string);
 
             if let Err(ref err) = result {
                 log::debug!("error: {}", err);
@@ -159,7 +160,8 @@ impl Request {
 
             result.ok().map(Request::Announce)
         } else {
-            let result: Result<ScrapeRequest, serde_urlencoded::de::Error> = serde_urlencoded::from_str(query_string);
+            let result: Result<ScrapeRequest, serde_urlencoded::de::Error> =
+                serde_urlencoded::from_str(&query_string);
 
             if let Err(ref err) = result {
                 log::debug!("error: {}", err);
@@ -168,6 +170,49 @@ impl Request {
             result.ok().map(Request::Scrape)
         }
     }
+
+    /// The info hashes and peer id's that are received are url-encoded byte
+    /// by byte, e.g., %fa for byte 0xfa. However, they are parsed as an UTF-8
+    /// string, meaning that non-ascii bytes are invalid characters. Therefore,
+    /// these bytes must be converted to their equivalent multi-byte UTF-8
+    /// encodings first.
+    fn preprocess_query_string(query_string: &str) -> String {
+        let mut processed = String::new();
+
+        for (i, part) in query_string.split('%').enumerate(){
+            println!("{}", part);
+
+            if i == 0 {
+                processed.push_str(part);
+            } else if part.len() >= 2 {
+                let mut two_first = String::with_capacity(2);
+                let mut rest = String::new();
+
+                for (j, c) in part.chars().enumerate(){
+                    if j < 2 {
+                        two_first.push(c);
+                    } else {
+                        rest.push(c);
+                    }
+                }
+
+                let byte = u8::from_str_radix(&two_first, 16).unwrap();
+
+                let mut tmp = [0u8; 4];
+
+                let slice = (byte as char).encode_utf8(&mut tmp);
+
+                for byte in slice.bytes(){
+                    processed.push('%');
+                    processed.push_str(&format!("{:02x}", byte));
+                }
+
+                processed.push_str(&rest);
+            }
+        }
+
+        processed
+    }
 }