diff --git a/crates/udp/src/config.rs b/crates/udp/src/config.rs index 0d3e6bc..ee2ef45 100644 --- a/crates/udp/src/config.rs +++ b/crates/udp/src/config.rs @@ -183,6 +183,19 @@ pub struct StatisticsConfig { pub write_html_to_file: bool, /// Path to save HTML file to pub html_file_path: PathBuf, + /// Save statistics as JSON to a file + pub write_json_to_file: bool, + /// Path to dump JSON info-hash IPv4 to + pub json_info_hash_ipv4_file_path: PathBuf, + /// Path to dump JSON info-hash IPv6 to + pub json_info_hash_ipv6_file_path: PathBuf, + /// Save statistics as binary to a file + /// * this option is recommended for SSD as compares previous file before overwrite + pub write_bin_to_file: bool, + /// Path to dump binary info-hash IPv4 to + pub bin_info_hash_ipv4_file_path: PathBuf, + /// Path to dump binary info-hash IPv6 to + pub bin_info_hash_ipv6_file_path: PathBuf, /// Run a prometheus endpoint #[cfg(feature = "prometheus")] pub run_prometheus_endpoint: bool, @@ -204,14 +217,18 @@ impl StatisticsConfig { if #[cfg(feature = "prometheus")] { pub fn active(&self) -> bool { (self.interval != 0) & - (self.print_to_stdout | self.write_html_to_file | self.run_prometheus_endpoint) + (self.print_to_stdout | self.write_html_to_file | self.write_json_to_file | self.write_bin_to_file | self.run_prometheus_endpoint) } } else { pub fn active(&self) -> bool { - (self.interval != 0) & (self.print_to_stdout | self.write_html_to_file) + (self.interval != 0) & (self.print_to_stdout | self.write_html_to_file | self.write_json_to_file | self.write_bin_to_file) } } } + /// Skip info-hash collection if not required by the configuration + pub fn collect_info_hash(&self) -> bool { + (self.interval != 0) & (self.write_json_to_file | self.write_bin_to_file) + } } impl Default for StatisticsConfig { @@ -223,6 +240,12 @@ impl Default for StatisticsConfig { print_to_stdout: false, write_html_to_file: false, html_file_path: "tmp/statistics.html".into(), + write_json_to_file: false, + json_info_hash_ipv4_file_path: "tmp/info_hash_v4.json".into(), + json_info_hash_ipv6_file_path: "tmp/info_hash_v6.json".into(), + write_bin_to_file: false, + bin_info_hash_ipv4_file_path: "tmp/info_hash_v4.bin".into(), + bin_info_hash_ipv6_file_path: "tmp/info_hash_v6.bin".into(), #[cfg(feature = "prometheus")] run_prometheus_endpoint: false, #[cfg(feature = "prometheus")] diff --git a/crates/udp/src/swarm.rs b/crates/udp/src/swarm.rs index 2d422f9..cd0a51d 100644 --- a/crates/udp/src/swarm.rs +++ b/crates/udp/src/swarm.rs @@ -1,3 +1,4 @@ +use std::io::Write; use std::iter::repeat_with; use std::net::IpAddr; use std::ops::DerefMut; @@ -133,6 +134,95 @@ impl TorrentMaps { ::log::error!("couldn't send statistics message: {:#}", err); } } + + if config.statistics.write_json_to_file { + use anyhow::{Context, Result}; + fn save_to_file( + path: &std::path::PathBuf, + info_hashes: &Vec, + ) -> Result<()> { + let mut f = Context::with_context(std::fs::File::create(path), || { + format!("File path: {}", path.to_string_lossy()) + })?; + write!(f, "[")?; + if !info_hashes.is_empty() { + write!(f, "\"{}\"", info_hashes[0])?; + if let Some(i) = info_hashes.get(1..) { + for info_hash in i { + write!(f, ",\"{info_hash}\"")?; + } + } + } + write!(f, "]")?; + Ok(()) + } + if config.network.ipv4_active() { + if let Err(err) = + save_to_file(&config.statistics.json_info_hash_ipv4_file_path, &ipv4.3) + { + ::log::error!("Couldn't dump IPv4 info-hash table to file: {:#}", err) + } + } + if config.network.ipv6_active() { + if let Err(err) = + save_to_file(&config.statistics.json_info_hash_ipv6_file_path, &ipv6.3) + { + ::log::error!("Couldn't dump IPv6 info-hash table to file: {:#}", err) + } + } + } + + if config.statistics.write_bin_to_file { + use anyhow::{Context, Result}; + use std::{fs::File, io::Read, path::PathBuf}; + /// Prevent extra write operations by compare the file content is up to date + fn is_same(path: &PathBuf, info_hashes: &Vec) -> Result { + if !std::fs::exists(path)? { + return Ok(false); + } + const L: usize = 20; // v1 only + let mut t = 0; + let mut f = File::open(path)?; + loop { + let mut b = [0; L]; + if f.read(&mut b)? != L { + break; + } + if !info_hashes.iter().any(|i| i.0 == b) { + return Ok(false); + } + t += 1 + } + Ok(t == info_hashes.len()) + } + /// Dump `InfoHash` index to file + fn save_to_file(path: &PathBuf, info_hashes: &Vec) -> Result<()> { + if is_same(path, info_hashes)? { + return Ok(()); + } + let mut f = Context::with_context(File::create(path), || { + format!("File path: {}", path.to_string_lossy()) + })?; + for i in info_hashes { + f.write_all(&i.0)? + } + Ok(()) + } + if config.network.ipv4_active() { + if let Err(err) = + save_to_file(&config.statistics.bin_info_hash_ipv4_file_path, &ipv4.3) + { + ::log::error!("Couldn't dump IPv4 info-hash table to file: {:#}", err) + } + } + if config.network.ipv6_active() { + if let Err(err) = + save_to_file(&config.statistics.bin_info_hash_ipv6_file_path, &ipv6.3) + { + ::log::error!("Couldn't dump IPv6 info-hash table to file: {:#}", err) + } + } + } } } } @@ -219,9 +309,10 @@ impl TorrentMapShards { access_list_cache: &mut AccessListCache, access_list_mode: AccessListMode, now: SecondsSinceServerStart, - ) -> (usize, usize, Option>) { + ) -> (usize, usize, Option>, Vec) { let mut total_num_torrents = 0; let mut total_num_peers = 0; + let mut info_hashes: Vec = Vec::new(); let mut opt_histogram: Option> = config .statistics @@ -297,9 +388,21 @@ impl TorrentMapShards { torrent_map_shard.shrink_to_fit(); total_num_torrents += torrent_map_shard.len(); + + if config.statistics.collect_info_hash() { + info_hashes.reserve(total_num_torrents); + for (k, _) in torrent_map_shard.iter() { + info_hashes.push(*k) + } + } } - (total_num_torrents, total_num_peers, opt_histogram) + ( + total_num_torrents, + total_num_peers, + opt_histogram, + info_hashes, + ) } fn get_shard(&self, info_hash: &InfoHash) -> &RwLock> { diff --git a/crates/udp_protocol/src/common.rs b/crates/udp_protocol/src/common.rs index 01e5b1c..11b875a 100644 --- a/crates/udp_protocol/src/common.rs +++ b/crates/udp_protocol/src/common.rs @@ -275,6 +275,19 @@ pub fn invalid_data() -> ::std::io::Error { ::std::io::Error::new(::std::io::ErrorKind::InvalidData, "invalid data") } +impl std::fmt::Display for InfoHash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + self.0 + .iter() + .map(|b| format!("{b:02x}")) + .collect::() + ) + } +} + #[cfg(test)] impl quickcheck::Arbitrary for InfoHash { fn arbitrary(g: &mut quickcheck::Gen) -> Self {