From e6b37c1c5fc2beb001111289b0ab9a4d56c6478a Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 3 Jun 2025 13:02:44 +0300 Subject: [PATCH 01/12] implement `Display` trait for `InfoHash` #226 --- crates/udp_protocol/src/common.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/crates/udp_protocol/src/common.rs b/crates/udp_protocol/src/common.rs index 01e5b1c..7842657 100644 --- a/crates/udp_protocol/src/common.rs +++ b/crates/udp_protocol/src/common.rs @@ -275,6 +275,19 @@ pub fn invalid_data() -> ::std::io::Error { ::std::io::Error::new(::std::io::ErrorKind::InvalidData, "invalid data") } +impl std::fmt::Display for InfoHash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + self.0 + .iter() + .map(|b| format!("{:02x}", b)) + .collect::() + ) + } +} + #[cfg(test)] impl quickcheck::Arbitrary for InfoHash { fn arbitrary(g: &mut quickcheck::Gen) -> Self { From 4b473a63bc36fee8e40f71737de07550f0bda07e Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 3 Jun 2025 14:50:32 +0300 Subject: [PATCH 02/12] implement info-hash JSON/API #226 --- crates/udp/src/config.rs | 9 +++++++ crates/udp/src/swarm.rs | 55 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/crates/udp/src/config.rs b/crates/udp/src/config.rs index 0d3e6bc..3851b31 100644 --- a/crates/udp/src/config.rs +++ b/crates/udp/src/config.rs @@ -183,6 +183,12 @@ pub struct StatisticsConfig { pub write_html_to_file: bool, /// Path to save HTML file to pub html_file_path: PathBuf, + /// Save statistics as JSON to a file + pub write_json_to_file: bool, + /// Path to dump JSON info-hash IPv4 to + pub json_info_hash_ipv4_file_path: PathBuf, + /// Path to dump JSON info-hash IPv6 to + pub json_info_hash_ipv6_file_path: PathBuf, /// Run a prometheus endpoint #[cfg(feature = "prometheus")] pub run_prometheus_endpoint: bool, @@ -223,6 +229,9 @@ impl Default for StatisticsConfig { print_to_stdout: false, write_html_to_file: false, html_file_path: "tmp/statistics.html".into(), + write_json_to_file: false, + json_info_hash_ipv4_file_path: "tmp/info_hash_v4.json".into(), + json_info_hash_ipv6_file_path: "tmp/info_hash_v6.json".into(), #[cfg(feature = "prometheus")] run_prometheus_endpoint: false, #[cfg(feature = "prometheus")] diff --git a/crates/udp/src/swarm.rs b/crates/udp/src/swarm.rs index 2d422f9..1496540 100644 --- a/crates/udp/src/swarm.rs +++ b/crates/udp/src/swarm.rs @@ -1,3 +1,4 @@ +use std::io::Write; use std::iter::repeat_with; use std::net::IpAddr; use std::ops::DerefMut; @@ -133,6 +134,43 @@ impl TorrentMaps { ::log::error!("couldn't send statistics message: {:#}", err); } } + + if config.statistics.write_json_to_file { + fn save_to_file( + path: &std::path::PathBuf, + info_hashes: &Vec, + ) -> anyhow::Result<()> { + let mut file = + anyhow::Context::with_context(std::fs::File::create(path), || { + format!("File path: {}", path.to_string_lossy()) + })?; + write!(file, "[")?; + if !info_hashes.is_empty() { + write!(file, "\"{}\"", info_hashes[0])?; + if let Some(i) = info_hashes.get(1..) { + for info_hash in i { + write!(file, ",\"{info_hash}\"")?; + } + } + } + write!(file, "]")?; // @TODO serialize with serde_json? + Ok(()) + } + if config.network.ipv4_active() { + if let Err(err) = + save_to_file(&config.statistics.json_info_hash_ipv4_file_path, &ipv4.3) + { + ::log::error!("Couldn't dump IPv4 info-hash table to file: {:#}", err) + } + } + if config.network.ipv6_active() { + if let Err(err) = + save_to_file(&config.statistics.json_info_hash_ipv6_file_path, &ipv6.3) + { + ::log::error!("Couldn't dump IPv6 info-hash table to file: {:#}", err) + } + } + } } } } @@ -219,9 +257,10 @@ impl TorrentMapShards { access_list_cache: &mut AccessListCache, access_list_mode: AccessListMode, now: SecondsSinceServerStart, - ) -> (usize, usize, Option>) { + ) -> (usize, usize, Option>, Vec) { let mut total_num_torrents = 0; let mut total_num_peers = 0; + let mut info_hashes: Vec = Vec::new(); let mut opt_histogram: Option> = config .statistics @@ -297,9 +336,21 @@ impl TorrentMapShards { torrent_map_shard.shrink_to_fit(); total_num_torrents += torrent_map_shard.len(); + + if config.statistics.write_json_to_file { + info_hashes.reserve(total_num_torrents); + for (k, _) in torrent_map_shard.iter() { + info_hashes.push(*k) + } + } } - (total_num_torrents, total_num_peers, opt_histogram) + ( + total_num_torrents, + total_num_peers, + opt_histogram, + info_hashes, + ) } fn get_shard(&self, info_hash: &InfoHash) -> &RwLock> { From 7d00f0750f50c1c45a71f24f2d04a9584fb358d6 Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 8 Jul 2025 01:41:06 +0300 Subject: [PATCH 03/12] implement binary info-hash api --- crates/udp/src/config.rs | 10 +++++++ crates/udp/src/swarm.rs | 65 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/crates/udp/src/config.rs b/crates/udp/src/config.rs index 3851b31..46c5e38 100644 --- a/crates/udp/src/config.rs +++ b/crates/udp/src/config.rs @@ -189,6 +189,13 @@ pub struct StatisticsConfig { pub json_info_hash_ipv4_file_path: PathBuf, /// Path to dump JSON info-hash IPv6 to pub json_info_hash_ipv6_file_path: PathBuf, + /// Save statistics as binary to a file + /// * this option is recommended for SSD as compares previous file before overwrite + pub write_bin_to_file: bool, + /// Path to dump binary info-hash IPv4 to + pub bin_info_hash_ipv4_file_path: PathBuf, + /// Path to dump binary info-hash IPv6 to + pub bin_info_hash_ipv6_file_path: PathBuf, /// Run a prometheus endpoint #[cfg(feature = "prometheus")] pub run_prometheus_endpoint: bool, @@ -232,6 +239,9 @@ impl Default for StatisticsConfig { write_json_to_file: false, json_info_hash_ipv4_file_path: "tmp/info_hash_v4.json".into(), json_info_hash_ipv6_file_path: "tmp/info_hash_v6.json".into(), + write_bin_to_file: false, + bin_info_hash_ipv4_file_path: "tmp/info_hash_v4.bin".into(), + bin_info_hash_ipv6_file_path: "tmp/info_hash_v6.bin".into(), #[cfg(feature = "prometheus")] run_prometheus_endpoint: false, #[cfg(feature = "prometheus")] diff --git a/crates/udp/src/swarm.rs b/crates/udp/src/swarm.rs index 1496540..d78a7b2 100644 --- a/crates/udp/src/swarm.rs +++ b/crates/udp/src/swarm.rs @@ -136,14 +136,14 @@ impl TorrentMaps { } if config.statistics.write_json_to_file { + use anyhow::{Context, Result}; fn save_to_file( path: &std::path::PathBuf, info_hashes: &Vec, - ) -> anyhow::Result<()> { - let mut file = - anyhow::Context::with_context(std::fs::File::create(path), || { - format!("File path: {}", path.to_string_lossy()) - })?; + ) -> Result<()> { + let mut file = Context::with_context(std::fs::File::create(path), || { + format!("File path: {}", path.to_string_lossy()) + })?; write!(file, "[")?; if !info_hashes.is_empty() { write!(file, "\"{}\"", info_hashes[0])?; @@ -153,7 +153,7 @@ impl TorrentMaps { } } } - write!(file, "]")?; // @TODO serialize with serde_json? + write!(file, "]")?; Ok(()) } if config.network.ipv4_active() { @@ -171,6 +171,59 @@ impl TorrentMaps { } } } + + if config.statistics.write_bin_to_file { + use anyhow::{Context, Result}; + use std::{fs::File, io::Read, path::PathBuf}; + /// Prevent extra write operations by compare the file content is same + fn is_same(path: &PathBuf, info_hashes: &Vec) -> Result { + if !std::fs::exists(path)? { + return Ok(false); + } + let mut t = 0; + let mut f = File::open(path)?; + loop { + let mut b = vec![0, 20]; + let n = f.read_to_end(&mut b)?; + if n == 0 { + break; + } + if info_hashes.iter().any(|i| i.0 != b[..n]) { + return Ok(false); + } + t += 1 + } + Ok(t == info_hashes.len()) + } + /// Dump `InfoHash` index to file + fn save_to_file(path: &PathBuf, info_hashes: &Vec) -> Result<()> { + if is_same(path, info_hashes)? { + return Ok(()); + } + let mut f = Context::with_context(File::create(path), || { + format!("File path: {}", path.to_string_lossy()) + })?; + for i in info_hashes { + f.write_all(&i.0)?; + f.write_all(b"\n")? + } + Ok(()) + } + if config.network.ipv4_active() { + if let Err(err) = + save_to_file(&config.statistics.bin_info_hash_ipv4_file_path, &ipv4.3) + { + ::log::error!("Couldn't dump IPv4 info-hash table to file: {:#}", err) + } + } + if config.network.ipv6_active() { + if let Err(err) = + save_to_file(&config.statistics.bin_info_hash_ipv6_file_path, &ipv6.3) + { + ::log::error!("Couldn't dump IPv6 info-hash table to file: {:#}", err) + } + } + } } } } From 7a62417d189239ad1fafa057dbef4927fe2b8299 Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 8 Jul 2025 01:54:47 +0300 Subject: [PATCH 04/12] fix vector init --- crates/udp/src/swarm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/udp/src/swarm.rs b/crates/udp/src/swarm.rs index d78a7b2..72b5cdf 100644 --- a/crates/udp/src/swarm.rs +++ b/crates/udp/src/swarm.rs @@ -183,7 +183,7 @@ impl TorrentMaps { let mut t = 0; let mut f = File::open(path)?; loop { - let mut b = vec![0, 20]; + let mut b = vec![0; 20]; let n = f.read_to_end(&mut b)?; if n == 0 { break; From e51c1dc7b167eecc2927a13a32684d5374626ec1 Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 8 Jul 2025 14:04:19 +0300 Subject: [PATCH 05/12] remove separator byte from the binary api format --- crates/udp/src/swarm.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/udp/src/swarm.rs b/crates/udp/src/swarm.rs index 72b5cdf..f568da1 100644 --- a/crates/udp/src/swarm.rs +++ b/crates/udp/src/swarm.rs @@ -175,17 +175,18 @@ impl TorrentMaps { if config.statistics.write_bin_to_file { use anyhow::{Context, Result}; use std::{fs::File, io::Read, path::PathBuf}; - /// Prevent extra write operations by compare the file content is same + /// Prevent extra write operations by compare the file content is up to date fn is_same(path: &PathBuf, info_hashes: &Vec) -> Result { if !std::fs::exists(path)? { return Ok(false); } + const L: usize = 20; // v1 only let mut t = 0; let mut f = File::open(path)?; loop { - let mut b = vec![0; 20]; - let n = f.read_to_end(&mut b)?; - if n == 0 { + let mut b = vec![0; L]; + let n = f.read(&mut b)?; + if n != L { break; } if info_hashes.iter().any(|i| i.0 != b[..n]) { @@ -204,8 +205,7 @@ impl TorrentMaps { format!("File path: {}", path.to_string_lossy()) })?; for i in info_hashes { - f.write_all(&i.0)?; - f.write_all(b"\n")? + f.write_all(&i.0)? } Ok(()) } From 8a3d05fa4349b7be345c51566d2cb6f0e6e411bf Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 8 Jul 2025 14:08:31 +0300 Subject: [PATCH 06/12] rename len variable --- crates/udp/src/swarm.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/udp/src/swarm.rs b/crates/udp/src/swarm.rs index f568da1..a3f1b6e 100644 --- a/crates/udp/src/swarm.rs +++ b/crates/udp/src/swarm.rs @@ -185,11 +185,11 @@ impl TorrentMaps { let mut f = File::open(path)?; loop { let mut b = vec![0; L]; - let n = f.read(&mut b)?; - if n != L { + let l = f.read(&mut b)?; + if l != L { break; } - if info_hashes.iter().any(|i| i.0 != b[..n]) { + if info_hashes.iter().any(|i| i.0 != b[..l]) { return Ok(false); } t += 1 From 3377baad75ba9c5da95b8a0bf8faa19817f1cb2d Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 8 Jul 2025 14:43:29 +0300 Subject: [PATCH 07/12] update statistic is active condition --- crates/udp/src/config.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/udp/src/config.rs b/crates/udp/src/config.rs index 46c5e38..81580bf 100644 --- a/crates/udp/src/config.rs +++ b/crates/udp/src/config.rs @@ -217,11 +217,11 @@ impl StatisticsConfig { if #[cfg(feature = "prometheus")] { pub fn active(&self) -> bool { (self.interval != 0) & - (self.print_to_stdout | self.write_html_to_file | self.run_prometheus_endpoint) + (self.print_to_stdout | self.write_html_to_file | self.write_json_to_file | self.write_bin_to_file | self.run_prometheus_endpoint) } } else { pub fn active(&self) -> bool { - (self.interval != 0) & (self.print_to_stdout | self.write_html_to_file) + (self.interval != 0) & (self.print_to_stdout | self.write_html_to_file | self.write_json_to_file | self.write_bin_to_file) } } } From 8a58dfdf132015fd55c173c385d59d9e34877c33 Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 8 Jul 2025 14:44:56 +0300 Subject: [PATCH 08/12] fix up to date match condition --- crates/udp/src/swarm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/udp/src/swarm.rs b/crates/udp/src/swarm.rs index a3f1b6e..4e4ac13 100644 --- a/crates/udp/src/swarm.rs +++ b/crates/udp/src/swarm.rs @@ -189,7 +189,7 @@ impl TorrentMaps { if l != L { break; } - if info_hashes.iter().any(|i| i.0 != b[..l]) { + if !info_hashes.iter().any(|i| i.0 == b[..l]) { return Ok(false); } t += 1 From 67de3fc2135bcb8ddfce25da56e2e7067e7da0cb Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 8 Jul 2025 15:07:18 +0300 Subject: [PATCH 09/12] apply clippy optimization --- crates/udp_protocol/src/common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/udp_protocol/src/common.rs b/crates/udp_protocol/src/common.rs index 7842657..11b875a 100644 --- a/crates/udp_protocol/src/common.rs +++ b/crates/udp_protocol/src/common.rs @@ -282,7 +282,7 @@ impl std::fmt::Display for InfoHash { "{}", self.0 .iter() - .map(|b| format!("{:02x}", b)) + .map(|b| format!("{b:02x}")) .collect::() ) } From 4ec82dc94532e6ce4f5fea8319831be5121fc4ed Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 8 Jul 2025 16:09:32 +0300 Subject: [PATCH 10/12] minor optimizations: remove extra vector wrap --- crates/udp/src/swarm.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/udp/src/swarm.rs b/crates/udp/src/swarm.rs index 4e4ac13..4eb04ad 100644 --- a/crates/udp/src/swarm.rs +++ b/crates/udp/src/swarm.rs @@ -184,12 +184,11 @@ impl TorrentMaps { let mut t = 0; let mut f = File::open(path)?; loop { - let mut b = vec![0; L]; - let l = f.read(&mut b)?; - if l != L { + let mut b = [0; L]; + if f.read(&mut b)? != L { break; } - if !info_hashes.iter().any(|i| i.0 == b[..l]) { + if !info_hashes.iter().any(|i| i.0 == b) { return Ok(false); } t += 1 From 3fa126ffa58988860d22ccfd66ad5dac56e7d930 Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 8 Jul 2025 19:09:36 +0300 Subject: [PATCH 11/12] use short local var name --- crates/udp/src/swarm.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/udp/src/swarm.rs b/crates/udp/src/swarm.rs index 4eb04ad..b255cbe 100644 --- a/crates/udp/src/swarm.rs +++ b/crates/udp/src/swarm.rs @@ -141,19 +141,19 @@ impl TorrentMaps { path: &std::path::PathBuf, info_hashes: &Vec, ) -> Result<()> { - let mut file = Context::with_context(std::fs::File::create(path), || { + let mut f = Context::with_context(std::fs::File::create(path), || { format!("File path: {}", path.to_string_lossy()) })?; - write!(file, "[")?; + write!(f, "[")?; if !info_hashes.is_empty() { - write!(file, "\"{}\"", info_hashes[0])?; + write!(f, "\"{}\"", info_hashes[0])?; if let Some(i) = info_hashes.get(1..) { for info_hash in i { - write!(file, ",\"{info_hash}\"")?; + write!(f, ",\"{info_hash}\"")?; } } } - write!(file, "]")?; + write!(f, "]")?; Ok(()) } if config.network.ipv4_active() { From e238db44398e0267a158276e473eeed18dee6197 Mon Sep 17 00:00:00 2001 From: yggverse Date: Tue, 8 Jul 2025 21:07:30 +0300 Subject: [PATCH 12/12] add missed condition to collect an additional data, implement as shared config function --- crates/udp/src/config.rs | 4 ++++ crates/udp/src/swarm.rs | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/udp/src/config.rs b/crates/udp/src/config.rs index 81580bf..ee2ef45 100644 --- a/crates/udp/src/config.rs +++ b/crates/udp/src/config.rs @@ -225,6 +225,10 @@ impl StatisticsConfig { } } } + /// Skip info-hash collection if not required by the configuration + pub fn collect_info_hash(&self) -> bool { + (self.interval != 0) & (self.write_json_to_file | self.write_bin_to_file) + } } impl Default for StatisticsConfig { diff --git a/crates/udp/src/swarm.rs b/crates/udp/src/swarm.rs index b255cbe..cd0a51d 100644 --- a/crates/udp/src/swarm.rs +++ b/crates/udp/src/swarm.rs @@ -389,7 +389,7 @@ impl TorrentMapShards { total_num_torrents += torrent_map_shard.len(); - if config.statistics.write_json_to_file { + if config.statistics.collect_info_hash() { info_hashes.reserve(total_num_torrents); for (k, _) in torrent_map_shard.iter() { info_hashes.push(*k)