aquatic_ws: deserialize InMessage with simd-json for performance

From 18% to 13% CPU time during load test run.

Criterion benchmark of deserialization got about 90% improvement.
This commit is contained in:
Joakim Frostegård 2020-08-11 06:32:51 +02:00
parent 539cf03f93
commit 5d40954936
11 changed files with 1098 additions and 1031 deletions

72
Cargo.lock generated
View file

@ -65,7 +65,7 @@ dependencies = [
"aquatic_http_protocol", "aquatic_http_protocol",
"crossbeam-channel", "crossbeam-channel",
"either", "either",
"hashbrown", "hashbrown 0.8.1",
"indexmap", "indexmap",
"itoa", "itoa",
"log", "log",
@ -90,7 +90,7 @@ dependencies = [
"anyhow", "anyhow",
"aquatic_cli_helpers", "aquatic_cli_helpers",
"aquatic_http_protocol", "aquatic_http_protocol",
"hashbrown", "hashbrown 0.8.1",
"mimalloc", "mimalloc",
"mio", "mio",
"quickcheck", "quickcheck",
@ -107,7 +107,7 @@ dependencies = [
"anyhow", "anyhow",
"bendy", "bendy",
"criterion", "criterion",
"hashbrown", "hashbrown 0.8.1",
"hex", "hex",
"httparse", "httparse",
"itoa", "itoa",
@ -130,7 +130,7 @@ dependencies = [
"aquatic_common", "aquatic_common",
"aquatic_udp_protocol", "aquatic_udp_protocol",
"crossbeam-channel", "crossbeam-channel",
"hashbrown", "hashbrown 0.8.1",
"histogram", "histogram",
"indexmap", "indexmap",
"mimalloc", "mimalloc",
@ -168,7 +168,7 @@ dependencies = [
"aquatic_cli_helpers", "aquatic_cli_helpers",
"aquatic_udp_protocol", "aquatic_udp_protocol",
"crossbeam-channel", "crossbeam-channel",
"hashbrown", "hashbrown 0.8.1",
"mimalloc", "mimalloc",
"mio", "mio",
"parking_lot", "parking_lot",
@ -199,7 +199,7 @@ dependencies = [
"aquatic_ws_protocol", "aquatic_ws_protocol",
"crossbeam-channel", "crossbeam-channel",
"either", "either",
"hashbrown", "hashbrown 0.8.1",
"indexmap", "indexmap",
"log", "log",
"mimalloc", "mimalloc",
@ -222,7 +222,7 @@ dependencies = [
"anyhow", "anyhow",
"aquatic_cli_helpers", "aquatic_cli_helpers",
"aquatic_ws_protocol", "aquatic_ws_protocol",
"hashbrown", "hashbrown 0.8.1",
"mimalloc", "mimalloc",
"mio", "mio",
"quickcheck", "quickcheck",
@ -241,11 +241,12 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"criterion", "criterion",
"hashbrown", "hashbrown 0.8.1",
"quickcheck", "quickcheck",
"quickcheck_macros", "quickcheck_macros",
"serde", "serde",
"serde_json", "serde_json",
"simd-json",
"tungstenite", "tungstenite",
] ]
@ -681,6 +682,15 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
[[package]]
name = "float-cmp"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "499a1bfa03d254b047e7e5c1fc8dd23a8cf6b344a8eb7e622ae4bc76bfac8e68"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@ -754,6 +764,26 @@ version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d36fab90f82edc3c747f9d438e06cf0a491055896f2a279638bb5beed6c40177" checksum = "d36fab90f82edc3c747f9d438e06cf0a491055896f2a279638bb5beed6c40177"
[[package]]
name = "halfbrown"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c12499524b5585419ab2f51545a19b842263a373580a83c0eb98a0142a260a10"
dependencies = [
"hashbrown 0.7.2",
"serde",
]
[[package]]
name = "hashbrown"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96282e96bfcd3da0d3aa9938bedf1e50df3269b6db08b4876d2da0bb1a0841cf"
dependencies = [
"ahash",
"autocfg",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.8.1" version = "0.8.1"
@ -827,7 +857,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b88cd59ee5f71fea89a62248fc8f387d44400cefe05ef548466d61ced9029a7" checksum = "5b88cd59ee5f71fea89a62248fc8f387d44400cefe05ef548466d61ced9029a7"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"hashbrown", "hashbrown 0.8.1",
] ]
[[package]] [[package]]
@ -1602,6 +1632,18 @@ dependencies = [
"opaque-debug", "opaque-debug",
] ]
[[package]]
name = "simd-json"
version = "0.3.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0696059565e9aac60099f9ab388869551643fc2d04bf1535554e6ee157821a2f"
dependencies = [
"halfbrown",
"serde",
"serde_json",
"value-trait",
]
[[package]] [[package]]
name = "simplelog" name = "simplelog"
version = "0.8.0" version = "0.8.0"
@ -1849,6 +1891,18 @@ version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7" checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7"
[[package]]
name = "value-trait"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3384a4788567e35113300281d737ab1b77917bd35cb99ffd8a4283345da9a825"
dependencies = [
"float-cmp",
"halfbrown",
"itoa",
"ryu",
]
[[package]] [[package]]
name = "vcpkg" name = "vcpkg"
version = "0.2.10" version = "0.2.10"

View file

@ -2,6 +2,7 @@
## General ## General
* rustflags for scripts in own .env file or similar
* automatic tests running real clients in container? * automatic tests running real clients in container?
## aquatic_http_load_test ## aquatic_http_load_test
@ -18,8 +19,6 @@
can distribute them to different workers) can distribute them to different workers)
## aquatic_http ## aquatic_http
* array buffer for EstablishedConnection.send_response, there is a lot of
allocating and deallocating now
* test torrent transfer with real clients * test torrent transfer with real clients
* test tls * test tls
* scrape: does it work (serialization etc), and with multiple hashes? * scrape: does it work (serialization etc), and with multiple hashes?
@ -27,6 +26,7 @@
positive number. positive number.
* compact=0 should result in error response * compact=0 should result in error response
* config: multiple request workers * config: multiple request workers
* actually delete old benchmark
## aquatic_ws_load_test ## aquatic_ws_load_test
* still maybe too few answers received with aquatic_ws * still maybe too few answers received with aquatic_ws
@ -38,8 +38,6 @@
## aquatic_ws ## aquatic_ws
* config: multiple request workers * config: multiple request workers
* create criterion benchmarks, then try out simd_json. deserializing InMessages
takes about 18% CPU, serializing OutMessages takes about 13.5% CPU
* test transfer again with changes made: * test transfer again with changes made:
* crossbeam-channel * crossbeam-channel
* ipv6/ipv4 mapping * ipv6/ipv4 mapping
@ -70,6 +68,8 @@
if that is the since since it means a panic occured if that is the since since it means a panic occured
## aquatic_http ## aquatic_http
* array buffer for EstablishedConnection.send_response? there is a lot of
allocating and deallocating now. Doesn't seem to help performance a lot.
* request parsing: * request parsing:
* smartstring: maybe use for keys? maybe use less? needs benchmarking * smartstring: maybe use for keys? maybe use less? needs benchmarking
* use fastrand instead of rand? (also for ws and udp then I guess because of * use fastrand instead of rand? (also for ws and udp then I guess because of

View file

@ -195,8 +195,8 @@ pub fn run_handshakes_and_read_messages(
use ::tungstenite::Error::Io; use ::tungstenite::Error::Io;
match established_ws.ws.read_message(){ match established_ws.ws.read_message(){
Ok(ws_message) => { Ok(mut ws_message) => {
if let Ok(in_message) = InMessage::from_ws_message(&ws_message){ if let Ok(in_message) = InMessage::from_ws_message(&mut ws_message){
let naive_peer_addr = established_ws.peer_addr; let naive_peer_addr = established_ws.peer_addr;
let converted_peer_ip = convert_ipv4_mapped_ipv6( let converted_peer_ip = convert_ipv4_mapped_ipv6(
naive_peer_addr.ip() naive_peer_addr.ip()

View file

@ -18,6 +18,7 @@ anyhow = "1"
hashbrown = { version = "0.8", features = ["serde"] } hashbrown = { version = "0.8", features = ["serde"] }
serde = { version = "1", features = ["derive"] } serde = { version = "1", features = ["derive"] }
serde_json = "1" serde_json = "1"
simd-json = "0.3"
tungstenite = "0.11" tungstenite = "0.11"
[dev-dependencies] [dev-dependencies]

View file

@ -37,10 +37,10 @@ pub fn bench(c: &mut Criterion) {
offer_id: Some(OfferId(info_hash.0)) offer_id: Some(OfferId(info_hash.0))
}); });
let ws_message = request.to_ws_message(); let mut ws_message = request.to_ws_message();
c.bench_function("deserialize-announce-request", |b| b.iter(|| c.bench_function("deserialize-announce-request", |b| b.iter(||
InMessage::from_ws_message(black_box(&ws_message)) InMessage::from_ws_message(black_box(&mut ws_message))
)); ));
} }

View file

@ -258,16 +258,18 @@ pub enum InMessage {
impl InMessage { impl InMessage {
#[inline] #[inline]
pub fn from_ws_message(ws_message: &tungstenite::Message) -> ::anyhow::Result<Self> { pub fn from_ws_message(
use tungstenite::Message::{Text, Binary}; ws_message: &mut tungstenite::Message
) -> ::anyhow::Result<Self> {
use tungstenite::Message::Text;
let text = match ws_message { let text: &mut str = if let Text(text) = ws_message {
Text(text) => text, text
Binary(bytes) => ::std::str::from_utf8(bytes)?, } else {
_ => return Err(anyhow::anyhow!("Message is neither text nor bytes")), return Err(anyhow::anyhow!("Message is not text"));
}; };
::serde_json::from_str(text).context("serialize with serde") ::simd_json::serde::from_str(text).context("deserialize with serde")
} }
pub fn to_ws_message(&self) -> ::tungstenite::Message { pub fn to_ws_message(&self) -> ::tungstenite::Message {
@ -521,9 +523,9 @@ mod tests {
#[quickcheck] #[quickcheck]
fn quickcheck_serde_identity_in_message(in_message_1: InMessage) -> bool { fn quickcheck_serde_identity_in_message(in_message_1: InMessage) -> bool {
let ws_message = in_message_1.to_ws_message(); let mut ws_message = in_message_1.to_ws_message();
let in_message_2 = InMessage::from_ws_message(&ws_message).unwrap(); let in_message_2 = InMessage::from_ws_message(&mut ws_message).unwrap();
let success = in_message_1 == in_message_2; let success = in_message_1 == in_message_2;
@ -660,4 +662,4 @@ mod tests {
success success
} }
} }

View file

@ -1 +1 @@
{"mean":{"confidence_interval":{"confidence_level":0.95,"lower_bound":18703.904042751496,"upper_bound":18792.03205243071},"point_estimate":18746.072800173675,"standard_error":22.574908262590952},"median":{"confidence_interval":{"confidence_level":0.95,"lower_bound":18547.912025316455,"upper_bound":18605.846380105744},"point_estimate":18576.281397429004,"standard_error":15.686062550253332},"median_abs_dev":{"confidence_interval":{"confidence_level":0.95,"lower_bound":306.28255537969375,"upper_bound":380.480602345445},"point_estimate":338.0236020241898,"standard_error":19.790518378924638},"slope":{"confidence_interval":{"confidence_level":0.95,"lower_bound":18658.020869749234,"upper_bound":18761.405070764362},"point_estimate":18706.576690779686,"standard_error":26.604673332450176},"std_dev":{"confidence_interval":{"confidence_level":0.95,"lower_bound":553.3799052716993,"upper_bound":868.3032654487926},"point_estimate":715.2362082741504,"standard_error":80.39906836878366}} {"mean":{"confidence_interval":{"confidence_level":0.95,"lower_bound":2094.926862805663,"upper_bound":2106.4267028892173},"point_estimate":2100.4015350320765,"standard_error":2.9392946950512737},"median":{"confidence_interval":{"confidence_level":0.95,"lower_bound":2085.6324189161965,"upper_bound":2087.229197620325},"point_estimate":2086.413383946413,"standard_error":0.4169721177244358},"median_abs_dev":{"confidence_interval":{"confidence_level":0.95,"lower_bound":16.490233538836364,"upper_bound":21.81096312849954},"point_estimate":18.680703664212437,"standard_error":1.3102508160911694},"slope":{"confidence_interval":{"confidence_level":0.95,"lower_bound":2085.6033212698335,"upper_bound":2094.8775402138917},"point_estimate":2089.9275901786054,"standard_error":2.361080413460781},"std_dev":{"confidence_interval":{"confidence_level":0.95,"lower_bound":73.7250326777399,"upper_bound":111.45831108639615},"point_estimate":93.04040248967267,"standard_error":9.710703224470429}}

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
[16630.874818319186,17501.98981446029,19824.96313750323,20696.078133644332] [1994.3859525202693,2034.7983478769509,2142.5647354947687,2182.97713085145]

View file

@ -1,7 +1,17 @@
#!/bin/sh #!/bin/sh
# Test in release mode to avoid quickcheck tests taking forever
# Compile with target-cpu=native but without AVX512 features, since they
# decrease performance.
DISABLE_AVX512=$(rustc --print target-features | grep " avx512" |
awk '{print $1}' | sed 's/^/-C target-feature=-/' | xargs)
export RUSTFLAGS="-C target-cpu=native $DISABLE_AVX512"
# Not chosen for exact values, only to be larger than defaults # Not chosen for exact values, only to be larger than defaults
export QUICKCHECK_TESTS=2000 export QUICKCHECK_TESTS=2000
export QUICKCHECK_GENERATOR_SIZE=1000 export QUICKCHECK_GENERATOR_SIZE=1000
cargo test cargo test --all