aquatic_http: rewrite request parsing, use smartstring

Use smartstring in PeerMapKey too.

Improves benchmark speed.

request-from-path:

time:   [2.1484 us 2.1530 us 2.1586 us]
change: [-24.246% -23.908% -23.570%] (p = 0.00 < 0.01)
Performance has improved.
This commit is contained in:
Joakim Frostegård 2020-07-19 15:21:11 +02:00
parent fc9b4c8e0d
commit 9df1f0ecc6
10 changed files with 1069 additions and 1057 deletions

16
Cargo.lock generated
View file

@ -95,6 +95,7 @@ dependencies = [
"rand", "rand",
"serde", "serde",
"simplelog", "simplelog",
"smartstring",
] ]
[[package]] [[package]]
@ -1644,6 +1645,15 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7cb5678e1615754284ec264d9bb5b4c27d2018577fd90ac0ceb578591ed5ee4" checksum = "c7cb5678e1615754284ec264d9bb5b4c27d2018577fd90ac0ceb578591ed5ee4"
[[package]]
name = "smartstring"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bdec7d62192ad94e7b16d49856c007d81ae1cf541cd29b4c04b755df39fd80d"
dependencies = [
"static_assertions",
]
[[package]] [[package]]
name = "socket2" name = "socket2"
version = "0.3.12" version = "0.3.12"
@ -1662,6 +1672,12 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.34" version = "1.0.34"

View file

@ -24,7 +24,7 @@
### less important ### less important
* use fastrand instead of rand? (also for ws and udp then I guess because of * use fastrand instead of rand? (also for ws and udp then I guess because of
shared function) shared function)
* use smartstring crate for announce request key and failure response reason? * use smartstring crate for failure response reason?
* request parsing in protocol module instead of in network, maybe from byte * request parsing in protocol module instead of in network, maybe from byte
buffer? Not obvious what error return type to use then (anyhow maybe?) buffer? Not obvious what error return type to use then (anyhow maybe?)
* log more info for all log modes (function location etc)? also for aquatic_ws * log more info for all log modes (function location etc)? also for aquatic_ws

View file

@ -40,6 +40,7 @@ privdrop = "0.3"
rand = { version = "0.7", features = ["small_rng"] } rand = { version = "0.7", features = ["small_rng"] }
serde = { version = "1", features = ["derive"] } serde = { version = "1", features = ["derive"] }
simplelog = "0.8" simplelog = "0.8"
smartstring = "0.2"
[dev-dependencies] [dev-dependencies]
criterion = "0.3" criterion = "0.3"

View file

@ -8,6 +8,7 @@ use indexmap::IndexMap;
use log::error; use log::error;
use mio::Token; use mio::Token;
use parking_lot::Mutex; use parking_lot::Mutex;
use smartstring::{SmartString, LazyCompact};
pub use aquatic_common::{ValidUntil, convert_ipv4_mapped_ipv4}; pub use aquatic_common::{ValidUntil, convert_ipv4_mapped_ipv4};
@ -90,7 +91,7 @@ impl <I: Ip>Peer<I> {
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct PeerMapKey<I: Ip> { pub struct PeerMapKey<I: Ip> {
pub peer_id: PeerId, pub peer_id: PeerId,
pub ip_or_key: Either<I, String> pub ip_or_key: Either<I, SmartString<LazyCompact>>
} }

View file

@ -1,5 +1,6 @@
use anyhow::Context; use anyhow::Context;
use hashbrown::HashMap; use hashbrown::HashMap;
use smartstring::{SmartString, LazyCompact};
use super::common::*; use super::common::*;
use super::utils::*; use super::utils::*;
@ -15,7 +16,7 @@ pub struct AnnounceRequest {
pub compact: bool, pub compact: bool,
/// Number of response peers wanted /// Number of response peers wanted
pub numwant: Option<usize>, pub numwant: Option<usize>,
pub key: Option<String>, pub key: Option<SmartString<LazyCompact>>,
} }
@ -79,30 +80,48 @@ impl Request {
} else { } else {
None None
}; };
let port = if let Some(port) = data.remove("port"){
port.parse().with_context(|| "parse port")?
} else {
return Err(anyhow::anyhow!("no port"));
};
let bytes_left = if let Some(left) = data.remove("left"){
left.parse().with_context(|| "parse bytes left")?
} else {
return Err(anyhow::anyhow!("no left"));
};
let event = if let Some(event) = data.remove("event"){
if let Ok(event) = event.parse(){
event
} else {
return Err(anyhow::anyhow!("invalid event: {}", event));
}
} else {
AnnounceEvent::default()
};
let compact = if let Some(compact) = data.remove("compact"){
if compact.as_str() == "1" {
true
} else {
return Err(anyhow::anyhow!("compact set, but not to 1"));
}
} else {
true
};
let request = AnnounceRequest { let request = AnnounceRequest {
info_hash: info_hashes.get(0) info_hash: info_hashes.pop()
.with_context(|| "no info_hash") .with_context(|| "no info_hash")
.and_then(|s| deserialize_20_bytes(s)) .and_then(deserialize_20_bytes)
.map(InfoHash)?, .map(InfoHash)?,
peer_id: data.get("peer_id") peer_id: data.remove("peer_id")
.with_context(|| "no peer_id") .with_context(|| "no peer_id")
.and_then(|s| deserialize_20_bytes(s)) .and_then(deserialize_20_bytes)
.map(PeerId)?, .map(PeerId)?,
port: data.remove("port") port,
.with_context(|| "no port") bytes_left,
.and_then(|s| s.parse() event,
.map_err(|err| anyhow::anyhow!("parse 'port': {}", err)))?, compact,
bytes_left: data.remove("left")
.with_context(|| "no left")
.and_then(|s| s.parse()
.map_err(|err| anyhow::anyhow!("parse 'left': {}", err)))?,
event: data.remove("event")
.and_then(|s| s.parse().ok())
.unwrap_or_default(),
compact: data.remove("compact")
.map(|s| s == "1")
.unwrap_or(true),
numwant, numwant,
key, key,
}; };
@ -112,7 +131,7 @@ impl Request {
let mut parsed_info_hashes = Vec::with_capacity(info_hashes.len()); let mut parsed_info_hashes = Vec::with_capacity(info_hashes.len());
for info_hash in info_hashes { for info_hash in info_hashes {
parsed_info_hashes.push(InfoHash(deserialize_20_bytes(&info_hash)?)); parsed_info_hashes.push(InfoHash(deserialize_20_bytes(info_hash)?));
} }
let request = ScrapeRequest { let request = ScrapeRequest {
@ -123,34 +142,10 @@ impl Request {
} }
} }
fn parse_key_value_pairs<'a>(
info_hashes: &mut Vec<String>,
data: &mut HashMap<&'a str, String>,
query_string: &'a str,
) -> anyhow::Result<()> {
for part in query_string.split('&'){
let mut key_and_value = part.splitn(2, '=');
let key = key_and_value.next()
.with_context(|| format!("no key in {}", part))?;
let value = key_and_value.next()
.with_context(|| format!("no value in {}", part))?;
let value = Self::urldecode_memchr(value)?;
if key == "info_hash" {
info_hashes.push(value);
} else {
data.insert(key, value);
}
}
Ok(())
}
/// Seems to be somewhat faster than non-memchr version /// Seems to be somewhat faster than non-memchr version
fn parse_key_value_pairs_memchr<'a>( fn parse_key_value_pairs_memchr<'a>(
info_hashes: &mut Vec<String>, info_hashes: &mut Vec<SmartString<LazyCompact>>,
data: &mut HashMap<&'a str, String>, data: &mut HashMap<&'a str, SmartString<LazyCompact>>,
query_string: &'a str, query_string: &'a str,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let query_string_bytes = query_string.as_bytes(); let query_string_bytes = query_string.as_bytes();
@ -232,8 +227,8 @@ impl Request {
} }
/// Quite a bit faster than non-memchr version /// Quite a bit faster than non-memchr version
fn urldecode_memchr(value: &str) -> anyhow::Result<String> { fn urldecode_memchr(value: &str) -> anyhow::Result<SmartString<LazyCompact>> {
let mut processed = String::with_capacity(value.len()); let mut processed = SmartString::new();
let bytes = value.as_bytes(); let bytes = value.as_bytes();
let iter = ::memchr::memchr_iter(b'%', bytes); let iter = ::memchr::memchr_iter(b'%', bytes);
@ -266,8 +261,6 @@ impl Request {
processed.push_str(rest_of_str); processed.push_str(rest_of_str);
} }
processed.shrink_to_fit();
Ok(processed) Ok(processed)
} }
} }
@ -309,7 +302,7 @@ mod tests {
event: AnnounceEvent::Started, event: AnnounceEvent::Started,
compact: true, compact: true,
numwant: Some(0), numwant: Some(0),
key: Some("4ab4b877".to_string()) key: Some("4ab4b877".into())
}); });
assert_eq!(parsed_request, reference_request); assert_eq!(parsed_request, reference_request);

View file

@ -1,12 +1,13 @@
use std::net::{Ipv4Addr, Ipv6Addr}; use std::net::{Ipv4Addr, Ipv6Addr};
use serde::Serializer; use serde::Serializer;
use smartstring::{SmartString, LazyCompact};
use super::response::ResponsePeer; use super::response::ResponsePeer;
/// Not for serde /// Not for serde
pub fn deserialize_20_bytes(value: &str) -> anyhow::Result<[u8; 20]> { pub fn deserialize_20_bytes(value: SmartString<LazyCompact>) -> anyhow::Result<[u8; 20]> {
let mut arr = [0u8; 20]; let mut arr = [0u8; 20];
let mut char_iter = value.chars(); let mut char_iter = value.chars();

View file

@ -1 +1 @@
{"mean":{"confidence_interval":{"confidence_level":0.95,"lower_bound":2813.0057190996495,"upper_bound":2835.0971444734805},"point_estimate":2823.5630570532094,"standard_error":5.60999731355646},"median":{"confidence_interval":{"confidence_level":0.95,"lower_bound":2781.583086146614,"upper_bound":2798.7822350926263},"point_estimate":2790.3154531476994,"standard_error":4.287304236535639},"median_abs_dev":{"confidence_interval":{"confidence_level":0.95,"lower_bound":80.18917995973909,"upper_bound":97.47337305052561},"point_estimate":88.82164587342986,"standard_error":4.385899559662111},"slope":{"confidence_interval":{"confidence_level":0.95,"lower_bound":2805.0369153378406,"upper_bound":2827.3641695562396},"point_estimate":2815.621225429684,"standard_error":5.696158165859643},"std_dev":{"confidence_interval":{"confidence_level":0.95,"lower_bound":138.70894509735032,"upper_bound":221.75787184542193},"point_estimate":177.57952341503912,"standard_error":21.529316188103856}} {"mean":{"confidence_interval":{"confidence_level":0.95,"lower_bound":2144.1723727709154,"upper_bound":2153.112440385677},"point_estimate":2148.498471928281,"standard_error":2.2819363399119816},"median":{"confidence_interval":{"confidence_level":0.95,"lower_bound":2136.1098212730085,"upper_bound":2142.6570382247664},"point_estimate":2139.175575113935,"standard_error":1.677012549554238},"median_abs_dev":{"confidence_interval":{"confidence_level":0.95,"lower_bound":44.72773439758049,"upper_bound":52.312043449354604},"point_estimate":48.23289034620888,"standard_error":1.8876949157852096},"slope":{"confidence_interval":{"confidence_level":0.95,"lower_bound":2148.408988674169,"upper_bound":2158.642726752668},"point_estimate":2153.014596603725,"standard_error":2.6164468826051843},"std_dev":{"confidence_interval":{"confidence_level":0.95,"lower_bound":60.092727504812785,"upper_bound":85.59866236503464},"point_estimate":72.3824856801523,"standard_error":6.570166078942696}}

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
[2329.596058483846,2533.3581761736277,3076.7238233463795,3280.4859410361614] [1910.369982082539,2008.6428012110698,2270.7036522204853,2368.976471349016]