Merge pull request #188 from greatest-ape/work-2024-02-09

improve aquatic_bencher
This commit is contained in:
Joakim Frostegård 2024-02-10 18:39:15 +01:00 committed by GitHub
commit 616b43d731
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 114 additions and 158 deletions

40
TODO.md
View file

@ -2,11 +2,6 @@
## High priority ## High priority
* aquatic_bencher
* bench aquatic_udp with io_uring too
* test with SubsequentOnePerPair
* include chihaya with higher core counts?
## Medium priority ## Medium priority
* stagger cleaning tasks? * stagger cleaning tasks?
@ -23,14 +18,6 @@
* aquatic_ws * aquatic_ws
* Add cleaning task for ConnectionHandle.announced_info_hashes? * Add cleaning task for ConnectionHandle.announced_info_hashes?
* Performance hyperoptimization (receive interrupts on correct core)
* If there is no network card RSS support, do eBPF XDP CpuMap redirect based on packet info, to
cpus where socket workers run. Support is work in progress in the larger Rust eBPF
implementations, but exists in rebpf
* Pin socket workers
* Set SO_INCOMING_CPU (which should be fixed in very recent Linux?) to currently pinned thread
* How does this relate to (currently unused) so_attach_reuseport_cbpf code?
## Low priority ## Low priority
* aquatic_udp * aquatic_udp
@ -39,9 +26,14 @@
* thiserror? * thiserror?
* CI * CI
* uring load test? * uring load test?
* load test
* move additional request sending to for each received response, maybe * Performance hyperoptimization (receive interrupts on correct core)
with probability 0.2 * If there is no network card RSS support, do eBPF XDP CpuMap redirect based on packet info, to
cpus where socket workers run. Support is work in progress in the larger Rust eBPF
implementations, but exists in rebpf
* Pin socket workers
* Set SO_INCOMING_CPU (which should be fixed in very recent Linux?) to currently pinned thread
* How does this relate to (currently unused) so_attach_reuseport_cbpf code?
# Not important # Not important
@ -52,19 +44,3 @@
* scrape: does it work (serialization etc), and with multiple hashes? * scrape: does it work (serialization etc), and with multiple hashes?
* 'left' optional in magnet requests? Probably not. Transmission sends huge * 'left' optional in magnet requests? Probably not. Transmission sends huge
positive number. positive number.
# Don't do
* general: PGO didn't seem to help way back
## aquatic_http
* request from path:
* deserialize 20 bytes: possibly rewrite (just check length of underlying
bytes == 20 and then copy them), also maybe remove String from map for
these cases too. doesn't really improve performance
* crazy http parsing: check for newline with memchr, take slice until
there. then iter over space newlines/just take relevant data. Not faster
than httparse and a lot worse
## aquatic_udp_protocol
* Use `bytes` crate: seems to worsen performance somewhat

View file

@ -19,7 +19,7 @@ default = ["udp"]
udp = ["aquatic_udp", "aquatic_udp_load_test"] udp = ["aquatic_udp", "aquatic_udp_load_test"]
[dependencies] [dependencies]
aquatic_udp = { optional = true, workspace = true } aquatic_udp = { optional = true, workspace = true, features = ["io-uring"] }
aquatic_udp_load_test = { optional = true, workspace = true } aquatic_udp_load_test = { optional = true, workspace = true }
anyhow = "1" anyhow = "1"

View file

@ -1,6 +1,8 @@
# aquatic_bencher # aquatic_bencher
Automated benchmarking of aquatic and other BitTorrent trackers. Linux only. Automated benchmarking of aquatic and other BitTorrent trackers.
Requires Linux 6.0 or later.
## Supported trackers by protocol ## Supported trackers by protocol

View file

@ -167,13 +167,21 @@ impl TryFrom<Range<usize>> for TaskSetCpuIndicator {
#[derive(Debug, Clone, Copy, clap::ValueEnum)] #[derive(Debug, Clone, Copy, clap::ValueEnum)]
pub enum CpuMode { pub enum CpuMode {
/// For 8 vCPU processor, use vCPU groups 0, 1, 2, 3, 4, 5, 6 and 7 /// Suitable for bare-metal machines without hyperthreads/SMT.
///
/// For 8 vCPU processor, uses vCPU groups 0, 1, 2, 3, 4, 5, 6 and 7
Subsequent, Subsequent,
/// For 8 vCPU processor, use vCPU groups 0 & 4, 1 & 5, 2 & 6 and 3 & 7 /// Suitable for bare-metal machines with hyperthreads/SMT.
///
/// For 8 vCPU processor, uses vCPU groups 0 & 4, 1 & 5, 2 & 6 and 3 & 7
SplitPairs, SplitPairs,
/// For 8 vCPU processor, use vCPU groups 0 & 1, 2 & 3, 4 & 5 and 6 & 7 /// For 8 vCPU processor, uses vCPU groups 0 & 1, 2 & 3, 4 & 5 and 6 & 7
SubsequentPairs, SubsequentPairs,
/// For 8 vCPU processor, use vCPU groups 0, 2, 4 and 6 /// Suitable for somewhat fairly comparing trackers on Hetzner virtual
/// machines. Since in-VM hyperthreads aren't really hyperthreads,
/// enabling them causes unpredictable performance.
///
/// For 8 vCPU processor, uses vCPU groups 0, 2, 4 and 6
SubsequentOnePerPair, SubsequentOnePerPair,
} }

View file

@ -20,6 +20,7 @@ use crate::{
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum UdpTracker { pub enum UdpTracker {
Aquatic, Aquatic,
AquaticIoUring,
OpenTracker, OpenTracker,
Chihaya, Chihaya,
} }
@ -28,6 +29,7 @@ impl Tracker for UdpTracker {
fn name(&self) -> String { fn name(&self) -> String {
match self { match self {
Self::Aquatic => "aquatic_udp".into(), Self::Aquatic => "aquatic_udp".into(),
Self::AquaticIoUring => "aquatic_udp (io_uring)".into(),
Self::OpenTracker => "opentracker".into(), Self::OpenTracker => "opentracker".into(),
Self::Chihaya => "chihaya".into(), Self::Chihaya => "chihaya".into(),
} }
@ -56,189 +58,144 @@ impl UdpCommand {
indexmap::indexmap! { indexmap::indexmap! {
1 => SetConfig { 1 => SetConfig {
implementations: indexmap! { implementations: indexmap! {
UdpTracker::Aquatic => vec![
AquaticUdpRunner::new(1, 1, Priority::High),
AquaticUdpRunner::new(2, 1, Priority::High),
],
UdpTracker::OpenTracker => vec![ UdpTracker::OpenTracker => vec![
OpenTrackerUdpRunner::new(0, Priority::Low), // Handle requests within event loop OpenTrackerUdpRunner::new(0, Priority::Medium), // Handle requests within event loop
OpenTrackerUdpRunner::new(1, Priority::Medium), OpenTrackerUdpRunner::new(1, Priority::High),
OpenTrackerUdpRunner::new(2, Priority::High),
], ],
UdpTracker::Chihaya => vec![ UdpTracker::Chihaya => vec![
ChihayaUdpRunner::new(), ChihayaUdpRunner::new(),
], ],
}, },
load_test_runs: simple_load_test_runs(cpu_mode, &[ load_test_runs: simple_load_test_runs(cpu_mode, &[
(1, Priority::High), (8, Priority::Medium),
(2, Priority::Medium), (12, Priority::High)
(4, Priority::Medium),
(6, Priority::Medium),
(8, Priority::High)
]), ]),
}, },
2 => SetConfig { 2 => SetConfig {
implementations: indexmap! { implementations: indexmap! {
UdpTracker::Aquatic => vec![ UdpTracker::Aquatic => vec![
AquaticUdpRunner::new(1, 1, Priority::Low), AquaticUdpRunner::with_mio(1, 1, Priority::Medium),
AquaticUdpRunner::new(2, 1, Priority::Medium), AquaticUdpRunner::with_mio(2, 1, Priority::High),
AquaticUdpRunner::new(3, 1, Priority::High), ],
UdpTracker::AquaticIoUring => vec![
AquaticUdpRunner::with_io_uring(1, 1, Priority::Medium),
AquaticUdpRunner::with_io_uring(2, 1, Priority::High),
], ],
UdpTracker::OpenTracker => vec![ UdpTracker::OpenTracker => vec![
OpenTrackerUdpRunner::new(2, Priority::Medium), OpenTrackerUdpRunner::new(2, Priority::High),
OpenTrackerUdpRunner::new(4, Priority::High), OpenTrackerUdpRunner::new(4, Priority::Medium),
], ],
UdpTracker::Chihaya => vec![ UdpTracker::Chihaya => vec![
ChihayaUdpRunner::new(), ChihayaUdpRunner::new(),
], ],
}, },
load_test_runs: simple_load_test_runs(cpu_mode, &[ load_test_runs: simple_load_test_runs(cpu_mode, &[
(1, Priority::High), (8, Priority::Medium),
(2, Priority::Medium), (12, Priority::High),
(4, Priority::Medium),
(6, Priority::Medium),
(8, Priority::High)
]), ]),
}, },
4 => SetConfig { 4 => SetConfig {
implementations: indexmap! { implementations: indexmap! {
UdpTracker::Aquatic => vec![ UdpTracker::Aquatic => vec![
AquaticUdpRunner::new(3, 1, Priority::Low), AquaticUdpRunner::with_mio(3, 1, Priority::High),
AquaticUdpRunner::new(4, 1, Priority::Low), AquaticUdpRunner::with_mio(4, 1, Priority::Medium),
AquaticUdpRunner::new(5, 1, Priority::Medium), ],
AquaticUdpRunner::new(6, 1, Priority::Medium), UdpTracker::AquaticIoUring => vec![
AquaticUdpRunner::new(7, 1, Priority::High), AquaticUdpRunner::with_io_uring(3, 1, Priority::High),
AquaticUdpRunner::with_io_uring(4, 1, Priority::Medium),
], ],
UdpTracker::OpenTracker => vec![ UdpTracker::OpenTracker => vec![
OpenTrackerUdpRunner::new(4, Priority::High), OpenTrackerUdpRunner::new(4, Priority::High),
OpenTrackerUdpRunner::new(8, Priority::Medium),
], ],
UdpTracker::Chihaya => vec![ UdpTracker::Chihaya => vec![
ChihayaUdpRunner::new(), ChihayaUdpRunner::new(),
], ],
}, },
load_test_runs: simple_load_test_runs(cpu_mode, &[ load_test_runs: simple_load_test_runs(cpu_mode, &[
(6, Priority::High),
(8, Priority::Medium), (8, Priority::Medium),
(12, Priority::High), (12, Priority::High),
(16, Priority::Medium)
]), ]),
}, },
6 => SetConfig { 6 => SetConfig {
implementations: indexmap! { implementations: indexmap! {
UdpTracker::Aquatic => vec![ UdpTracker::Aquatic => vec![
AquaticUdpRunner::new(5, 1, Priority::Medium), AquaticUdpRunner::with_mio(5, 1, Priority::High),
AquaticUdpRunner::new(6, 1, Priority::Medium), ],
AquaticUdpRunner::new(10, 1, Priority::Low), UdpTracker::AquaticIoUring => vec![
AquaticUdpRunner::with_io_uring(5, 1, Priority::High),
AquaticUdpRunner::new(4, 2, Priority::Low),
AquaticUdpRunner::new(6, 2, Priority::Medium),
AquaticUdpRunner::new(8, 2, Priority::High),
], ],
UdpTracker::OpenTracker => vec![ UdpTracker::OpenTracker => vec![
OpenTrackerUdpRunner::new(6, Priority::High), OpenTrackerUdpRunner::new(6, Priority::High),
OpenTrackerUdpRunner::new(12, Priority::Medium), ],
UdpTracker::Chihaya => vec![
ChihayaUdpRunner::new(),
], ],
}, },
load_test_runs: simple_load_test_runs(cpu_mode, &[ load_test_runs: simple_load_test_runs(cpu_mode, &[
(6, Priority::Medium),
(8, Priority::Medium), (8, Priority::Medium),
(12, Priority::High), (12, Priority::High),
(16, Priority::High),
(24, Priority::Medium),
]), ]),
}, },
8 => SetConfig { 8 => SetConfig {
implementations: indexmap! { implementations: indexmap! {
UdpTracker::Aquatic => vec![ UdpTracker::Aquatic => vec![
AquaticUdpRunner::new(7, 1, Priority::Medium), AquaticUdpRunner::with_mio(7, 1, Priority::High),
AquaticUdpRunner::new(8, 1, Priority::Medium), ],
AquaticUdpRunner::new(14, 1, Priority::Low), UdpTracker::AquaticIoUring => vec![
AquaticUdpRunner::new(6, 2, Priority::Low), AquaticUdpRunner::with_io_uring(7, 1, Priority::High),
AquaticUdpRunner::new(12, 2, Priority::High),
AquaticUdpRunner::new(5, 3, Priority::Low),
AquaticUdpRunner::new(10, 3, Priority::Medium),
], ],
UdpTracker::OpenTracker => vec![ UdpTracker::OpenTracker => vec![
OpenTrackerUdpRunner::new(8, Priority::High), OpenTrackerUdpRunner::new(8, Priority::High),
OpenTrackerUdpRunner::new(16, Priority::Medium), ],
UdpTracker::Chihaya => vec![
ChihayaUdpRunner::new(),
], ],
}, },
load_test_runs: simple_load_test_runs(cpu_mode, &[ load_test_runs: simple_load_test_runs(cpu_mode, &[
(8, Priority::High), (8, Priority::Medium),
(12, Priority::Medium), (12, Priority::High),
(16, Priority::High),
(24, Priority::Medium)
]), ]),
}, },
12 => SetConfig { 12 => SetConfig {
implementations: indexmap! { implementations: indexmap! {
UdpTracker::Aquatic => vec![ UdpTracker::Aquatic => vec![
AquaticUdpRunner::new(10, 2, Priority::Medium), AquaticUdpRunner::with_mio(10, 2, Priority::High),
AquaticUdpRunner::new(12, 2, Priority::Medium), AquaticUdpRunner::with_mio(9, 3, Priority::Medium),
AquaticUdpRunner::new(20, 2, Priority::Low), ],
UdpTracker::AquaticIoUring => vec![
AquaticUdpRunner::new(9, 3, Priority::Low), AquaticUdpRunner::with_io_uring(10, 2, Priority::High),
AquaticUdpRunner::new(12, 3, Priority::Medium), AquaticUdpRunner::with_io_uring(9, 3, Priority::Medium),
AquaticUdpRunner::new(18, 3, Priority::Low),
AquaticUdpRunner::new(8, 4, Priority::Low),
AquaticUdpRunner::new(12, 4, Priority::Medium),
AquaticUdpRunner::new(16, 4, Priority::High),
AquaticUdpRunner::new(7, 5, Priority::Low),
AquaticUdpRunner::new(12, 5, Priority::Medium),
AquaticUdpRunner::new(14, 5, Priority::Medium),
], ],
UdpTracker::OpenTracker => vec![ UdpTracker::OpenTracker => vec![
OpenTrackerUdpRunner::new(12, Priority::High), OpenTrackerUdpRunner::new(12, Priority::High),
OpenTrackerUdpRunner::new(24, Priority::Medium), ],
UdpTracker::Chihaya => vec![
ChihayaUdpRunner::new(),
], ],
}, },
load_test_runs: simple_load_test_runs(cpu_mode, &[ load_test_runs: simple_load_test_runs(cpu_mode, &[
(8, Priority::Medium), (8, Priority::Medium),
(12, Priority::Medium), (12, Priority::High),
(16, Priority::High),
(24, Priority::High),
]), ]),
}, },
16 => SetConfig { 16 => SetConfig {
implementations: indexmap! { implementations: indexmap! {
UdpTracker::Aquatic => vec![ UdpTracker::Aquatic => vec![
AquaticUdpRunner::new(14, 2, Priority::Low), AquaticUdpRunner::with_mio(13, 3, Priority::High),
AquaticUdpRunner::new(16, 2, Priority::Low), ],
AquaticUdpRunner::new(28, 2, Priority::Low), UdpTracker::AquaticIoUring => vec![
AquaticUdpRunner::with_io_uring(13, 3, Priority::High),
AquaticUdpRunner::new(13, 3, Priority::Low),
AquaticUdpRunner::new(16, 3, Priority::Low),
AquaticUdpRunner::new(26, 3, Priority::Low),
AquaticUdpRunner::new(12, 4, Priority::Medium),
AquaticUdpRunner::new(16, 4, Priority::Medium),
AquaticUdpRunner::new(24, 4, Priority::Low),
AquaticUdpRunner::new(11, 5, Priority::Low),
AquaticUdpRunner::new(16, 5, Priority::Medium),
AquaticUdpRunner::new(22, 5, Priority::Low),
AquaticUdpRunner::new(10, 6, Priority::Low),
AquaticUdpRunner::new(16, 6, Priority::High),
AquaticUdpRunner::new(20, 6, Priority::Medium),
AquaticUdpRunner::new(9, 7, Priority::Low),
AquaticUdpRunner::new(16, 7, Priority::Medium),
AquaticUdpRunner::new(18, 7, Priority::Low),
], ],
UdpTracker::OpenTracker => vec![ UdpTracker::OpenTracker => vec![
OpenTrackerUdpRunner::new(16, Priority::High), OpenTrackerUdpRunner::new(16, Priority::High),
OpenTrackerUdpRunner::new(32, Priority::Medium), ],
UdpTracker::Chihaya => vec![
ChihayaUdpRunner::new(),
], ],
}, },
load_test_runs: simple_load_test_runs(cpu_mode, &[ load_test_runs: simple_load_test_runs(cpu_mode, &[
(8, Priority::High), (8, Priority::High),
(12, Priority::High), (12, Priority::High),
(16, Priority::High),
(24, Priority::High),
]), ]),
}, },
} }
@ -255,12 +212,12 @@ impl UdpCommand {
struct AquaticUdpRunner { struct AquaticUdpRunner {
socket_workers: usize, socket_workers: usize,
swarm_workers: usize, swarm_workers: usize,
use_io_uring: bool,
priority: Priority, priority: Priority,
} }
impl AquaticUdpRunner { impl AquaticUdpRunner {
#[allow(clippy::new_ret_no_self)] fn with_mio(
fn new(
socket_workers: usize, socket_workers: usize,
swarm_workers: usize, swarm_workers: usize,
priority: Priority, priority: Priority,
@ -268,6 +225,19 @@ impl AquaticUdpRunner {
Rc::new(Self { Rc::new(Self {
socket_workers, socket_workers,
swarm_workers, swarm_workers,
use_io_uring: false,
priority,
})
}
fn with_io_uring(
socket_workers: usize,
swarm_workers: usize,
priority: Priority,
) -> Rc<dyn ProcessRunner<Command = UdpCommand>> {
Rc::new(Self {
socket_workers,
swarm_workers,
use_io_uring: true,
priority, priority,
}) })
} }
@ -288,6 +258,7 @@ impl ProcessRunner for AquaticUdpRunner {
c.socket_workers = self.socket_workers; c.socket_workers = self.socket_workers;
c.swarm_workers = self.swarm_workers; c.swarm_workers = self.swarm_workers;
c.network.address = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 3000)); c.network.address = SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 3000));
c.network.use_io_uring = self.use_io_uring;
c.protocol.max_response_peers = 30; c.protocol.max_response_peers = 30;
let c = toml::to_string_pretty(&c)?; let c = toml::to_string_pretty(&c)?;

View file

@ -100,6 +100,8 @@ pub struct NetworkConfig {
pub socket_recv_buffer_size: usize, pub socket_recv_buffer_size: usize,
/// Poll timeout in milliseconds (mio backend only) /// Poll timeout in milliseconds (mio backend only)
pub poll_timeout_ms: u64, pub poll_timeout_ms: u64,
#[cfg(feature = "io-uring")]
pub use_io_uring: bool,
/// Number of ring entries (io_uring backend only) /// Number of ring entries (io_uring backend only)
/// ///
/// Will be rounded to next power of two if not already one. /// Will be rounded to next power of two if not already one.
@ -131,6 +133,8 @@ impl Default for NetworkConfig {
socket_recv_buffer_size: 8_000_000, socket_recv_buffer_size: 8_000_000,
poll_timeout_ms: 50, poll_timeout_ms: 50,
#[cfg(feature = "io-uring")] #[cfg(feature = "io-uring")]
use_io_uring: true,
#[cfg(feature = "io-uring")]
ring_size: 128, ring_size: 128,
resend_buffer_max_len: 0, resend_buffer_max_len: 0,
} }

View file

@ -49,8 +49,9 @@ pub fn run_socket_worker(
priv_dropper: PrivilegeDropper, priv_dropper: PrivilegeDropper,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
#[cfg(all(target_os = "linux", feature = "io-uring"))] #[cfg(all(target_os = "linux", feature = "io-uring"))]
match self::uring::supported_on_current_kernel() { if config.network.use_io_uring {
Ok(()) => { self::uring::supported_on_current_kernel().context("check for io_uring compatibility")?;
return self::uring::SocketWorker::run( return self::uring::SocketWorker::run(
config, config,
shared_state, shared_state,
@ -61,13 +62,6 @@ pub fn run_socket_worker(
priv_dropper, priv_dropper,
); );
} }
Err(err) => {
::log::warn!(
"Falling back to mio because of lacking kernel io_uring support: {:#}",
err
);
}
}
self::mio::SocketWorker::run( self::mio::SocketWorker::run(
config, config,

View file

@ -5,13 +5,14 @@
sudo apt-get update && sudo apt-get upgrade -y sudo apt-get update && sudo apt-get upgrade -y
sudo apt-get install -y curl vim htop screen cmake build-essential pkg-config git screen cvs zlib1g zlib1g-dev golang sudo apt-get install -y curl vim htop screen cmake build-essential pkg-config git screen cvs zlib1g zlib1g-dev golang
sudo echo "deb http://deb.debian.org/debian bookworm-backports main contrib" >> /etc/apt/sources.list sudo echo "deb http://deb.debian.org/debian bookworm-backports main contrib" >> /etc/apt/sources.list
sudo apt-get update && sudo apt-get install linux-image-amd64/bookworm-backports sudo apt-get update && sudo apt-get install -y linux-image-amd64/bookworm-backports
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
source "$HOME/.cargo/env" source "$HOME/.cargo/env"
# Build aquatic # Build aquatic
. ./scripts/env-native-cpu-without-avx-512 . ./scripts/env-native-cpu-without-avx-512
cargo build --profile "release-debug" -p aquatic_udp # export RUSTFLAGS="-C target-cpu=native"
cargo build --profile "release-debug" -p aquatic_udp --features "io-uring"
cargo build --profile "release-debug" -p aquatic_udp_load_test cargo build --profile "release-debug" -p aquatic_udp_load_test
cargo build --profile "release-debug" -p aquatic_bencher --features udp cargo build --profile "release-debug" -p aquatic_bencher --features udp
git log --oneline | head -n 1 git log --oneline | head -n 1