aquatic_http: write custom deserialize logic for Request

This commit is contained in:
Joakim Frostegård 2020-07-03 17:10:30 +02:00
parent 501c2a293a
commit 52cc7d8acb
4 changed files with 130 additions and 206 deletions

View file

@ -10,11 +10,7 @@
* test tls
* request parsing in protocol module instead of in network? Not obvious
what error return type to use then
* scrape info hash parsing: multiple ought to be accepted, might need to roll
my own url encode crate. https://github.com/samscott89/serde_qs could be
used with extra preprocessing (`?info_hash[0]=..&info_hash[1]` and so on),
but it pulls in lots of dependencies (actix-web etc), and current
preprocessing is already ugly.
* scrape info hash parsing: does multiple hashes work?
* serialization
* there is the question of how serialization should be done for 20 byte
arrays, such as in the scrape response. There, a 20 byte byte string is
@ -25,7 +21,6 @@
* compact response peers should be forbidden for ipv6
* move stuff to common crate with ws: what about Request/InMessage etc?
* don't overdo this
* 20 byte helper
## aquatic_ws
* established connections do not get valid_until updated, I think?

View file

@ -19,7 +19,7 @@ use crate::protocol::Request;
#[derive(Debug)]
pub enum RequestReadError {
NeedMoreData,
Invalid,
Invalid(anyhow::Error),
StreamEnded,
Io(::std::io::Error),
Parse(::httparse::Error),
@ -77,16 +77,16 @@ impl EstablishedConnection {
match http_request.parse(&self.buf[..self.bytes_read]){
Ok(httparse::Status::Complete(_)) => {
let opt_request = http_request.path.and_then(
Request::from_http_get_path
);
if let Some(path) = http_request.path {
let res_request = Request::from_http_get_path(path);
self.clear_buffer();
if let Some(request) = opt_request {
Ok(request)
res_request.map_err(RequestReadError::Invalid)
} else {
Err(RequestReadError::Invalid)
self.clear_buffer();
Err(RequestReadError::Invalid(anyhow::anyhow!("no http path")))
}
},
Ok(httparse::Status::Partial) => {

View file

@ -1,6 +1,9 @@
use std::net::IpAddr;
use std::str::FromStr;
use anyhow::Context;
use hashbrown::HashMap;
use serde::{Serialize, Deserialize};
use serde::Serialize;
use crate::common::Peer;
@ -9,29 +12,26 @@ mod serde_helpers;
use serde_helpers::*;
#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize)]
#[serde(transparent)]
pub struct PeerId(
#[serde(
deserialize_with = "deserialize_20_bytes",
serialize_with = "serialize_20_bytes",
)]
pub [u8; 20]
);
#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize)]
#[serde(transparent)]
pub struct InfoHash(
#[serde(
deserialize_with = "deserialize_20_bytes",
serialize_with = "serialize_20_bytes",
)]
pub [u8; 20]
);
#[derive(Clone, Copy, Debug, Serialize)]
pub struct ResponsePeer {
pub ip_address: IpAddr,
@ -51,8 +51,7 @@ impl ResponsePeer {
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "lowercase")]
#[derive(Debug, Clone)]
pub enum AnnounceEvent {
Started,
Stopped,
@ -68,30 +67,34 @@ impl Default for AnnounceEvent {
}
#[derive(Debug, Clone, Deserialize)]
impl FromStr for AnnounceEvent {
type Err = String;
fn from_str(value: &str) -> std::result::Result<Self, String> {
let event = match value {
"started" => Self::Started,
"stopped" => Self::Stopped,
"completed" => Self::Completed,
_ => Self::default(),
};
Ok(event)
}
}
#[derive(Debug, Clone)]
pub struct AnnounceRequest {
pub info_hash: InfoHash,
pub peer_id: PeerId,
pub port: u16,
#[serde(rename = "left")]
pub bytes_left: usize,
#[serde(default)]
pub event: AnnounceEvent,
#[serde(
deserialize_with = "deserialize_bool_from_number",
default = "AnnounceRequest::default_compact_value"
)]
pub compact: bool,
/// Requested number of peers to return
pub numwant: usize,
}
impl AnnounceRequest {
fn default_compact_value() -> bool {
true
}
}
#[derive(Debug, Clone, Serialize)]
pub struct AnnounceResponseSuccess {
@ -113,12 +116,8 @@ pub struct AnnounceResponseFailure {
}
#[derive(Debug, Clone, Deserialize)]
#[derive(Debug, Clone)]
pub struct ScrapeRequest {
#[serde(
rename = "info_hash",
deserialize_with = "deserialize_info_hashes" // FIXME: does this work?
)]
pub info_hashes: Vec<InfoHash>,
}
@ -145,44 +144,97 @@ pub enum Request {
impl Request {
pub fn from_http_get_path(path: &str) -> Option<Self> {
log::debug!("path: {:?}", path);
/// Parse Request from http path (GET `/announce?info_hash=...`)
///
/// Existing serde-url decode crates were insufficient, so the decision was
/// made to create a custom parser. serde_urlencoded doesn't support multiple
/// values with same key, and serde_qs pulls in lots of dependencies. Both
/// would need preprocessing for the binary format used for info_hash and
/// peer_id.
pub fn from_http_get_path(path: &str) -> anyhow::Result<Self> {
let mut split_parts= path.splitn(2, '?');
let path = split_parts.next()?;
let query_string = Self::preprocess_query_string(split_parts.next()?);
let location = split_parts.next()
.with_context(|| "no location")?;
let query_string = split_parts.next()
.with_context(|| "no query string")?;
if path == "/announce" {
let result: Result<AnnounceRequest, serde_urlencoded::de::Error> =
serde_urlencoded::from_str(&query_string);
let mut info_hashes = Vec::new();
let mut data = HashMap::new();
if let Err(ref err) = result {
log::debug!("error: {}", err);
}
for part in query_string.split('&'){
let mut key_and_value = part.splitn(2, '=');
result.ok().map(Request::Announce)
let key = key_and_value.next()
.with_context(|| format!("no key in {}", part))?;
let value = key_and_value.next()
.with_context(|| format!("no value in {}", part))?;
let value = Self::urldecode(value).to_string();
if key == "info_hash" {
info_hashes.push(value);
} else {
let result: Result<ScrapeRequest, serde_urlencoded::de::Error> =
serde_urlencoded::from_str(&query_string);
if let Err(ref err) = result {
log::debug!("error: {}", err);
data.insert(key, value);
}
}
result.ok().map(Request::Scrape)
if location == "/announce" {
let request = AnnounceRequest {
info_hash: info_hashes.get(0)
.with_context(|| "no info_hash")
.and_then(|s| deserialize_20_bytes(s))
.map(InfoHash)?,
peer_id: data.get("peer_id")
.with_context(|| "no peer_id")
.and_then(|s| deserialize_20_bytes(s))
.map(PeerId)?,
port: data.get("port")
.with_context(|| "no port")
.and_then(|s| s.parse()
.map_err(|err| anyhow::anyhow!("parse 'port': {}", err)))?,
bytes_left: data.get("left")
.with_context(|| "no left")
.and_then(|s| s.parse()
.map_err(|err| anyhow::anyhow!("parse 'left': {}", err)))?,
event: data.get("event")
.and_then(|s| s.parse().ok())
.unwrap_or_default(),
compact: data.get("compact")
.map(|s| s == "1")
.unwrap_or(true),
numwant: data.get("numwant")
.with_context(|| "no numwant")
.and_then(|s| s.parse()
.map_err(|err|
anyhow::anyhow!("parse 'numwant': {}", err)
))?,
};
Ok(Request::Announce(request))
} else {
let mut parsed_info_hashes = Vec::with_capacity(info_hashes.len());
for info_hash in info_hashes {
parsed_info_hashes.push(InfoHash(deserialize_20_bytes(&info_hash)?));
}
let request = ScrapeRequest {
info_hashes: parsed_info_hashes,
};
Ok(Request::Scrape(request))
}
}
/// The info hashes and peer id's that are received are url-encoded byte
/// by byte, e.g., %fa for byte 0xfa. However, they are parsed as an UTF-8
/// string, meaning that non-ascii bytes are invalid characters. Therefore,
/// these bytes must be converted to their equivalent multi-byte UTF-8
/// encodings first.
fn preprocess_query_string(query_string: &str) -> String {
/// by byte, e.g., %fa for byte 0xfa. However, they need to be parsed as
/// UTF-8 string, meaning that non-ascii bytes are invalid characters.
/// Therefore, these bytes must be converted to their equivalent multi-byte
/// UTF-8 encodings.
fn urldecode(value: &str) -> String {
let mut processed = String::new();
for (i, part) in query_string.split('%').enumerate(){
for (i, part) in value.split('%').enumerate(){
if i == 0 {
processed.push_str(part);
} else if part.len() >= 2 {
@ -199,15 +251,7 @@ impl Request {
let byte = u8::from_str_radix(&two_first, 16).unwrap();
let mut tmp = [0u8; 4];
let slice = (byte as char).encode_utf8(&mut tmp);
for byte in slice.bytes(){
processed.push('%');
processed.push_str(&format!("{:02x}", byte));
}
processed.push(byte as char);
processed.push_str(&rest);
}
}

View file

@ -1,93 +1,36 @@
use std::net::IpAddr;
use serde::{Serializer, Deserializer, de::{Visitor, SeqAccess}};
use serde::Serializer;
use super::{InfoHash, ResponsePeer};
struct BoolFromNumberVisitor;
impl<'de> Visitor<'de> for BoolFromNumberVisitor {
type Value = bool;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("1 for true, 0 for false")
}
#[inline]
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where E: ::serde::de::Error,
{
if value == "0" {
Ok(false)
} else if value == "1" {
Ok(true)
} else {
Err(E::custom(format!("not 0 or 1: {}", value)))
}
}
}
#[inline]
pub fn deserialize_bool_from_number<'de, D>(
deserializer: D
) -> Result<bool, D::Error>
where D: Deserializer<'de>
{
deserializer.deserialize_any(BoolFromNumberVisitor)
}
use super::ResponsePeer;
/// Decode string of 20 byte-size chars to a [u8; 20]
struct TwentyByteVisitor;
impl<'de> Visitor<'de> for TwentyByteVisitor {
type Value = [u8; 20];
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("string consisting of 20 bytes")
}
#[inline]
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where E: ::serde::de::Error,
{
/// Not for serde
pub fn deserialize_20_bytes(value: &str) -> anyhow::Result<[u8; 20]> {
let mut arr = [0u8; 20];
let mut char_iter = value.chars();
for a in arr.iter_mut(){
if let Some(c) = char_iter.next(){
if c as u32 > 255 {
return Err(E::custom(format!(
return Err(anyhow::anyhow!(
"character not in single byte range: {:#?}",
c
)));
));
}
*a = c as u8;
} else {
return Err(E::custom(format!("less than 20 bytes: {:#?}", value)));
return Err(anyhow::anyhow!("less than 20 bytes: {:#?}", value));
}
}
if char_iter.next().is_some(){
Err(E::custom(format!("more than 20 bytes: {:#?}", value)))
Err(anyhow::anyhow!("more than 20 bytes: {:#?}", value))
} else {
Ok(arr)
}
}
}
#[inline]
pub fn deserialize_20_bytes<'de, D>(
deserializer: D
) -> Result<[u8; 20], D::Error>
where D: Deserializer<'de>
{
deserializer.deserialize_any(TwentyByteVisitor)
}
@ -100,64 +43,6 @@ pub fn serialize_20_bytes<S>(
}
pub struct InfoHashVecVisitor;
impl<'de> Visitor<'de> for InfoHashVecVisitor {
type Value = Vec<InfoHash>;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("string or array of strings consisting of 20 bytes")
}
#[inline]
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where E: ::serde::de::Error,
{
match TwentyByteVisitor::visit_str::<E>(TwentyByteVisitor, value){
Ok(arr) => Ok(vec![InfoHash(arr)]),
Err(err) => Err(E::custom(format!("got string, but {}", err)))
}
}
#[inline]
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where A: SeqAccess<'de>
{
let mut info_hashes: Self::Value = Vec::new();
while let Ok(Some(value)) = seq.next_element::<&str>(){
let arr = TwentyByteVisitor::visit_str(
TwentyByteVisitor, value
)?;
info_hashes.push(InfoHash(arr));
}
Ok(info_hashes)
}
#[inline]
fn visit_none<E>(self) -> Result<Self::Value, E>
where E: ::serde::de::Error
{
Ok(vec![])
}
}
/// Empty vector is returned if value is null or any invalid info hash
/// is present
#[inline]
pub fn deserialize_info_hashes<'de, D>(
deserializer: D
) -> Result<Vec<InfoHash>, D::Error>
where D: Deserializer<'de>,
{
Ok(deserializer.deserialize_any(InfoHashVecVisitor).unwrap_or_default())
}
pub fn serialize_response_peers_compact<S>(
response_peers: &Vec<ResponsePeer>,
serializer: S