aquatic_http: Request::from_http_get_path: add memchar query str parser

This commit is contained in:
Joakim Frostegård 2020-07-18 18:40:27 +02:00
parent a8900c99ab
commit 9b0956cc91
2 changed files with 70 additions and 18 deletions

View file

@ -9,8 +9,8 @@
and maybe run scripts should be adjusted
## aquatic_http
* faster Request creation (splitn functions) using memchr? possibly
iterate over several bytes (& and =)
* request parsing: tests and benchmarks of the various helper functions,
as well as tests of main parsing function
* test torrent transfer with real clients
* test tls
* current serialized byte strings valid

View file

@ -52,22 +52,12 @@ impl Request {
let mut info_hashes = Vec::new();
let mut data = HashMap::new();
for part in query_string.split('&'){
let mut key_and_value = part.splitn(2, '=');
let key = key_and_value.next()
.with_context(|| format!("no key in {}", part))?;
let value = key_and_value.next()
.with_context(|| format!("no value in {}", part))?;
let value = Self::urldecode_memchr(value)?;
if key == "info_hash" {
info_hashes.push(value);
} else {
data.insert(key, value);
}
}
Self::parse_key_value_pairs_memchr(
&mut info_hashes,
&mut data,
query_string
)?;
if location == "/announce" {
let numwant = if let Some(s) = data.get("numwant"){
@ -133,6 +123,68 @@ impl Request {
}
}
fn parse_key_value_pairs<'a>(
info_hashes: &mut Vec<String>,
data: &mut HashMap<&'a str, String>,
query_string: &'a str,
) -> anyhow::Result<()> {
for part in query_string.split('&'){
let mut key_and_value = part.splitn(2, '=');
let key = key_and_value.next()
.with_context(|| format!("no key in {}", part))?;
let value = key_and_value.next()
.with_context(|| format!("no value in {}", part))?;
let value = Self::urldecode_memchr(value)?;
if key == "info_hash" {
info_hashes.push(value);
} else {
data.insert(key, value);
}
}
Ok(())
}
// Seems to be a bit faster than non-memchr version
fn parse_key_value_pairs_memchr<'a>(
info_hashes: &mut Vec<String>,
data: &mut HashMap<&'a str, String>,
query_string: &'a str,
) -> anyhow::Result<()> {
let query_string_bytes = query_string.as_bytes();
let mut ampersand_iter = ::memchr::memchr_iter(b'&', query_string_bytes);
let mut position = 0usize;
for equal_sign_index in ::memchr::memchr_iter(b'=', query_string_bytes){
let segment_end = ampersand_iter.next()
.unwrap_or(query_string.len());
let key = query_string.get(position..equal_sign_index)
.with_context(|| format!("no key at {}..{}", position, equal_sign_index))?;
let value = query_string.get(equal_sign_index + 1..segment_end)
.with_context(|| format!("no value at {}..{}", equal_sign_index + 1, segment_end))?;
let value = Self::urldecode_memchr(value)?;
if key == "info_hash" {
info_hashes.push(value);
} else {
data.insert(key, value);
}
position = segment_end + 1;
if position == query_string.len(){
break;
}
}
Ok(())
}
/// The info hashes and peer id's that are received are url-encoded byte
/// by byte, e.g., %fa for byte 0xfa. However, they need to be parsed as
/// UTF-8 string, meaning that non-ascii bytes are invalid characters.