From 52cc7d8acb4540163bb7f749e85bf1e9faf98c03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20Frosteg=C3=A5rd?= Date: Fri, 3 Jul 2020 17:10:30 +0200 Subject: [PATCH] aquatic_http: write custom deserialize logic for Request --- TODO.md | 7 +- aquatic_http/src/lib/network/connection.rs | 16 +- aquatic_http/src/lib/protocol/mod.rs | 158 +++++++++++------- .../src/lib/protocol/serde_helpers.rs | 155 +++-------------- 4 files changed, 130 insertions(+), 206 deletions(-) diff --git a/TODO.md b/TODO.md index 65fd901..c5f20cb 100644 --- a/TODO.md +++ b/TODO.md @@ -10,11 +10,7 @@ * test tls * request parsing in protocol module instead of in network? Not obvious what error return type to use then -* scrape info hash parsing: multiple ought to be accepted, might need to roll - my own url encode crate. https://github.com/samscott89/serde_qs could be - used with extra preprocessing (`?info_hash[0]=..&info_hash[1]` and so on), - but it pulls in lots of dependencies (actix-web etc), and current - preprocessing is already ugly. +* scrape info hash parsing: does multiple hashes work? * serialization * there is the question of how serialization should be done for 20 byte arrays, such as in the scrape response. There, a 20 byte byte string is @@ -25,7 +21,6 @@ * compact response peers should be forbidden for ipv6 * move stuff to common crate with ws: what about Request/InMessage etc? * don't overdo this - * 20 byte helper ## aquatic_ws * established connections do not get valid_until updated, I think? diff --git a/aquatic_http/src/lib/network/connection.rs b/aquatic_http/src/lib/network/connection.rs index 6973d93..15a9167 100644 --- a/aquatic_http/src/lib/network/connection.rs +++ b/aquatic_http/src/lib/network/connection.rs @@ -19,7 +19,7 @@ use crate::protocol::Request; #[derive(Debug)] pub enum RequestReadError { NeedMoreData, - Invalid, + Invalid(anyhow::Error), StreamEnded, Io(::std::io::Error), Parse(::httparse::Error), @@ -77,16 +77,16 @@ impl EstablishedConnection { match http_request.parse(&self.buf[..self.bytes_read]){ Ok(httparse::Status::Complete(_)) => { - let opt_request = http_request.path.and_then( - Request::from_http_get_path - ); + if let Some(path) = http_request.path { + let res_request = Request::from_http_get_path(path); - self.clear_buffer(); + self.clear_buffer(); - if let Some(request) = opt_request { - Ok(request) + res_request.map_err(RequestReadError::Invalid) } else { - Err(RequestReadError::Invalid) + self.clear_buffer(); + + Err(RequestReadError::Invalid(anyhow::anyhow!("no http path"))) } }, Ok(httparse::Status::Partial) => { diff --git a/aquatic_http/src/lib/protocol/mod.rs b/aquatic_http/src/lib/protocol/mod.rs index 4d5a440..b17215f 100644 --- a/aquatic_http/src/lib/protocol/mod.rs +++ b/aquatic_http/src/lib/protocol/mod.rs @@ -1,6 +1,9 @@ use std::net::IpAddr; +use std::str::FromStr; + +use anyhow::Context; use hashbrown::HashMap; -use serde::{Serialize, Deserialize}; +use serde::Serialize; use crate::common::Peer; @@ -9,29 +12,26 @@ mod serde_helpers; use serde_helpers::*; -#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize)] #[serde(transparent)] pub struct PeerId( #[serde( - deserialize_with = "deserialize_20_bytes", serialize_with = "serialize_20_bytes", )] pub [u8; 20] ); -#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize)] #[serde(transparent)] pub struct InfoHash( #[serde( - deserialize_with = "deserialize_20_bytes", serialize_with = "serialize_20_bytes", )] pub [u8; 20] ); - #[derive(Clone, Copy, Debug, Serialize)] pub struct ResponsePeer { pub ip_address: IpAddr, @@ -51,8 +51,7 @@ impl ResponsePeer { } -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "lowercase")] +#[derive(Debug, Clone)] pub enum AnnounceEvent { Started, Stopped, @@ -68,30 +67,34 @@ impl Default for AnnounceEvent { } -#[derive(Debug, Clone, Deserialize)] +impl FromStr for AnnounceEvent { + type Err = String; + + fn from_str(value: &str) -> std::result::Result { + let event = match value { + "started" => Self::Started, + "stopped" => Self::Stopped, + "completed" => Self::Completed, + _ => Self::default(), + }; + + Ok(event) + } +} + + +#[derive(Debug, Clone)] pub struct AnnounceRequest { pub info_hash: InfoHash, pub peer_id: PeerId, pub port: u16, - #[serde(rename = "left")] pub bytes_left: usize, - #[serde(default)] pub event: AnnounceEvent, - #[serde( - deserialize_with = "deserialize_bool_from_number", - default = "AnnounceRequest::default_compact_value" - )] pub compact: bool, /// Requested number of peers to return pub numwant: usize, } -impl AnnounceRequest { - fn default_compact_value() -> bool { - true - } -} - #[derive(Debug, Clone, Serialize)] pub struct AnnounceResponseSuccess { @@ -113,12 +116,8 @@ pub struct AnnounceResponseFailure { } -#[derive(Debug, Clone, Deserialize)] +#[derive(Debug, Clone)] pub struct ScrapeRequest { - #[serde( - rename = "info_hash", - deserialize_with = "deserialize_info_hashes" // FIXME: does this work? - )] pub info_hashes: Vec, } @@ -145,44 +144,97 @@ pub enum Request { impl Request { - pub fn from_http_get_path(path: &str) -> Option { - log::debug!("path: {:?}", path); - + /// Parse Request from http path (GET `/announce?info_hash=...`) + /// + /// Existing serde-url decode crates were insufficient, so the decision was + /// made to create a custom parser. serde_urlencoded doesn't support multiple + /// values with same key, and serde_qs pulls in lots of dependencies. Both + /// would need preprocessing for the binary format used for info_hash and + /// peer_id. + pub fn from_http_get_path(path: &str) -> anyhow::Result { let mut split_parts= path.splitn(2, '?'); - let path = split_parts.next()?; - let query_string = Self::preprocess_query_string(split_parts.next()?); + let location = split_parts.next() + .with_context(|| "no location")?; + let query_string = split_parts.next() + .with_context(|| "no query string")?; - if path == "/announce" { - let result: Result = - serde_urlencoded::from_str(&query_string); + let mut info_hashes = Vec::new(); + let mut data = HashMap::new(); - if let Err(ref err) = result { - log::debug!("error: {}", err); + for part in query_string.split('&'){ + let mut key_and_value = part.splitn(2, '='); + + let key = key_and_value.next() + .with_context(|| format!("no key in {}", part))?; + let value = key_and_value.next() + .with_context(|| format!("no value in {}", part))?; + let value = Self::urldecode(value).to_string(); + + if key == "info_hash" { + info_hashes.push(value); + } else { + data.insert(key, value); } + } - result.ok().map(Request::Announce) + if location == "/announce" { + let request = AnnounceRequest { + info_hash: info_hashes.get(0) + .with_context(|| "no info_hash") + .and_then(|s| deserialize_20_bytes(s)) + .map(InfoHash)?, + peer_id: data.get("peer_id") + .with_context(|| "no peer_id") + .and_then(|s| deserialize_20_bytes(s)) + .map(PeerId)?, + port: data.get("port") + .with_context(|| "no port") + .and_then(|s| s.parse() + .map_err(|err| anyhow::anyhow!("parse 'port': {}", err)))?, + bytes_left: data.get("left") + .with_context(|| "no left") + .and_then(|s| s.parse() + .map_err(|err| anyhow::anyhow!("parse 'left': {}", err)))?, + event: data.get("event") + .and_then(|s| s.parse().ok()) + .unwrap_or_default(), + compact: data.get("compact") + .map(|s| s == "1") + .unwrap_or(true), + numwant: data.get("numwant") + .with_context(|| "no numwant") + .and_then(|s| s.parse() + .map_err(|err| + anyhow::anyhow!("parse 'numwant': {}", err) + ))?, + }; + + Ok(Request::Announce(request)) } else { - let result: Result = - serde_urlencoded::from_str(&query_string); + let mut parsed_info_hashes = Vec::with_capacity(info_hashes.len()); - if let Err(ref err) = result { - log::debug!("error: {}", err); + for info_hash in info_hashes { + parsed_info_hashes.push(InfoHash(deserialize_20_bytes(&info_hash)?)); } - result.ok().map(Request::Scrape) + let request = ScrapeRequest { + info_hashes: parsed_info_hashes, + }; + + Ok(Request::Scrape(request)) } } /// The info hashes and peer id's that are received are url-encoded byte - /// by byte, e.g., %fa for byte 0xfa. However, they are parsed as an UTF-8 - /// string, meaning that non-ascii bytes are invalid characters. Therefore, - /// these bytes must be converted to their equivalent multi-byte UTF-8 - /// encodings first. - fn preprocess_query_string(query_string: &str) -> String { + /// by byte, e.g., %fa for byte 0xfa. However, they need to be parsed as + /// UTF-8 string, meaning that non-ascii bytes are invalid characters. + /// Therefore, these bytes must be converted to their equivalent multi-byte + /// UTF-8 encodings. + fn urldecode(value: &str) -> String { let mut processed = String::new(); - for (i, part) in query_string.split('%').enumerate(){ + for (i, part) in value.split('%').enumerate(){ if i == 0 { processed.push_str(part); } else if part.len() >= 2 { @@ -199,15 +251,7 @@ impl Request { let byte = u8::from_str_radix(&two_first, 16).unwrap(); - let mut tmp = [0u8; 4]; - - let slice = (byte as char).encode_utf8(&mut tmp); - - for byte in slice.bytes(){ - processed.push('%'); - processed.push_str(&format!("{:02x}", byte)); - } - + processed.push(byte as char); processed.push_str(&rest); } } diff --git a/aquatic_http/src/lib/protocol/serde_helpers.rs b/aquatic_http/src/lib/protocol/serde_helpers.rs index f599ce5..2236dab 100644 --- a/aquatic_http/src/lib/protocol/serde_helpers.rs +++ b/aquatic_http/src/lib/protocol/serde_helpers.rs @@ -1,93 +1,36 @@ use std::net::IpAddr; -use serde::{Serializer, Deserializer, de::{Visitor, SeqAccess}}; +use serde::Serializer; -use super::{InfoHash, ResponsePeer}; - - -struct BoolFromNumberVisitor; - -impl<'de> Visitor<'de> for BoolFromNumberVisitor { - type Value = bool; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("1 for true, 0 for false") - } - - #[inline] - fn visit_str(self, value: &str) -> Result - where E: ::serde::de::Error, - { - if value == "0" { - Ok(false) - } else if value == "1" { - Ok(true) - } else { - Err(E::custom(format!("not 0 or 1: {}", value))) - } - } -} - - -#[inline] -pub fn deserialize_bool_from_number<'de, D>( - deserializer: D -) -> Result - where D: Deserializer<'de> -{ - deserializer.deserialize_any(BoolFromNumberVisitor) -} +use super::ResponsePeer; -/// Decode string of 20 byte-size chars to a [u8; 20] -struct TwentyByteVisitor; +/// Not for serde +pub fn deserialize_20_bytes(value: &str) -> anyhow::Result<[u8; 20]> { + let mut arr = [0u8; 20]; + let mut char_iter = value.chars(); -impl<'de> Visitor<'de> for TwentyByteVisitor { - type Value = [u8; 20]; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("string consisting of 20 bytes") - } - - #[inline] - fn visit_str(self, value: &str) -> Result - where E: ::serde::de::Error, - { - let mut arr = [0u8; 20]; - let mut char_iter = value.chars(); - - for a in arr.iter_mut(){ - if let Some(c) = char_iter.next(){ - if c as u32 > 255 { - return Err(E::custom(format!( - "character not in single byte range: {:#?}", - c - ))); - } - - *a = c as u8; - } else { - return Err(E::custom(format!("less than 20 bytes: {:#?}", value))); + for a in arr.iter_mut(){ + if let Some(c) = char_iter.next(){ + if c as u32 > 255 { + return Err(anyhow::anyhow!( + "character not in single byte range: {:#?}", + c + )); } - } - if char_iter.next().is_some(){ - Err(E::custom(format!("more than 20 bytes: {:#?}", value))) + *a = c as u8; } else { - Ok(arr) + return Err(anyhow::anyhow!("less than 20 bytes: {:#?}", value)); } } -} - -#[inline] -pub fn deserialize_20_bytes<'de, D>( - deserializer: D -) -> Result<[u8; 20], D::Error> - where D: Deserializer<'de> -{ - deserializer.deserialize_any(TwentyByteVisitor) + if char_iter.next().is_some(){ + Err(anyhow::anyhow!("more than 20 bytes: {:#?}", value)) + } else { + Ok(arr) + } } @@ -100,64 +43,6 @@ pub fn serialize_20_bytes( } -pub struct InfoHashVecVisitor; - - -impl<'de> Visitor<'de> for InfoHashVecVisitor { - type Value = Vec; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("string or array of strings consisting of 20 bytes") - } - - #[inline] - fn visit_str(self, value: &str) -> Result - where E: ::serde::de::Error, - { - match TwentyByteVisitor::visit_str::(TwentyByteVisitor, value){ - Ok(arr) => Ok(vec![InfoHash(arr)]), - Err(err) => Err(E::custom(format!("got string, but {}", err))) - } - } - - #[inline] - fn visit_seq(self, mut seq: A) -> Result - where A: SeqAccess<'de> - { - let mut info_hashes: Self::Value = Vec::new(); - - while let Ok(Some(value)) = seq.next_element::<&str>(){ - let arr = TwentyByteVisitor::visit_str( - TwentyByteVisitor, value - )?; - - info_hashes.push(InfoHash(arr)); - } - - Ok(info_hashes) - } - - #[inline] - fn visit_none(self) -> Result - where E: ::serde::de::Error - { - Ok(vec![]) - } -} - - -/// Empty vector is returned if value is null or any invalid info hash -/// is present -#[inline] -pub fn deserialize_info_hashes<'de, D>( - deserializer: D -) -> Result, D::Error> - where D: Deserializer<'de>, -{ - Ok(deserializer.deserialize_any(InfoHashVecVisitor).unwrap_or_default()) -} - - pub fn serialize_response_peers_compact( response_peers: &Vec, serializer: S