From 00e8f8078bb49cbae9550c2fac58109cca705a1c Mon Sep 17 00:00:00 2001 From: yggverse Date: Thu, 7 Aug 2025 04:19:43 +0300 Subject: [PATCH] draft multi-stack udp scraper implementation --- Cargo.toml | 1 + README.md | 4 +- src/config.rs | 19 ++++++++- src/main.rs | 27 +++++++----- src/scraper.rs | 38 +++++++++++++++++ src/scraper/udp.rs | 104 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 180 insertions(+), 13 deletions(-) create mode 100644 src/scraper.rs create mode 100644 src/scraper/udp.rs diff --git a/Cargo.toml b/Cargo.toml index 11cd126..0fd9d8b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,3 +19,4 @@ url = { version = "2.5", features = ["serde"] } urlencoding = "2.1" rocket_dyn_templates = { version = "0.2", features = ["tera"] } plurify = "0.2" +rand = "0.9" diff --git a/README.md b/README.md index 05d33a7..dac6e54 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,9 @@ * [ ] Files list * [ ] Background image (from the files asset) * [ ] Common features - * [ ] Scrape peers/seeds + * [ ] Scrape peers/seeders/leechers + * [x] UDP + * [ ] TCP * [ ] Download * [x] Magnet * [ ] Torrent diff --git a/src/config.rs b/src/config.rs index dadb181..cf57885 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,6 +1,6 @@ use clap::Parser; use std::{ - net::{IpAddr, Ipv4Addr}, + net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6}, path::PathBuf, }; use url::Url; @@ -53,4 +53,21 @@ pub struct Config { /// Bind server on given port #[arg(long, short, default_value_t = 8000)] pub port: u16, + + /// Bind local UDP client for `scrape_udp_server` + /// + /// * not in use if the `scrape_udp_server` is not set + #[arg(long, default_values_t = vec![ + SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, 0)), + SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::UNSPECIFIED, 0, 0, 0)) + ])] + pub scrape_udp_client: Vec, + + /// Scrape given UDP trackers to display peers/seeders/leechers info + #[arg(long)] + pub scrape_udp_server: Option>, + + /// Configure instance in the debug mode + #[arg(long, default_value_t = false)] + pub debug: bool, } diff --git a/src/main.rs b/src/main.rs index 9ea2737..64f7bb3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ extern crate rocket; mod config; mod feed; mod format; +mod scraper; mod storage; use config::Config; @@ -15,6 +16,7 @@ use rocket::{ serde::Serialize, }; use rocket_dyn_templates::{Template, context}; +use scraper::{Scrape, Scraper}; use storage::{Order, Sort, Storage, Torrent}; use url::Url; @@ -33,19 +35,13 @@ pub struct Meta { #[get("/?")] fn index( page: Option, + scraper: &State, storage: &State, meta: &State, ) -> Result> { use plurify::Plurify; #[derive(Serialize)] #[serde(crate = "rocket::serde")] - struct Scrape { - leechers: usize, - peers: usize, - seeders: usize, - } - #[derive(Serialize)] - #[serde(crate = "rocket::serde")] struct Row { created: Option, files: String, @@ -77,7 +73,7 @@ fn index( .map(|t| t.format(&meta.format_time).to_string()), indexed: torrent.time.format(&meta.format_time).to_string(), magnet: format::magnet(&torrent.info_hash, meta.trackers.as_ref()), - scrape: None, // @TODO + scrape: scraper.scrape(torrent.info_hash.as_bytes()), size: format::bytes(torrent.size), files: torrent.files.as_ref().map_or("1 file".into(), |f| { let l = f.len(); @@ -116,7 +112,6 @@ fn rss(feed: &State, storage: &State) -> Result, C #[launch] fn rocket() -> _ { use clap::Parser; - use rocket::fs::FileServer; let config = Config::parse(); let feed = Feed::init( config.title.clone(), @@ -124,15 +119,25 @@ fn rocket() -> _ { config.canonical_url.clone(), config.tracker.clone(), ); + let scraper = Scraper::init( + config + .scrape_udp_server + .map(|s| (config.scrape_udp_client, s)), + ); let storage = Storage::init(config.preload, config.list_limit, config.capacity).unwrap(); // @TODO handle rocket::build() .attach(Template::fairing()) .configure(rocket::Config { port: config.port, address: config.host, - ..rocket::Config::default() + ..if config.debug { + rocket::Config::debug_default() + } else { + rocket::Config::default() + } }) .manage(feed) + .manage(scraper) .manage(storage) .manage(Meta { canonical: config.canonical_url, @@ -142,6 +147,6 @@ fn rocket() -> _ { trackers: config.tracker, version: env!("CARGO_PKG_VERSION").into(), }) - .mount("/", FileServer::from(config.statics)) + .mount("/", rocket::fs::FileServer::from(config.statics)) .mount("/", routes![index, rss]) } diff --git a/src/scraper.rs b/src/scraper.rs new file mode 100644 index 0000000..26486a9 --- /dev/null +++ b/src/scraper.rs @@ -0,0 +1,38 @@ +mod udp; + +use rocket::serde::Serialize; +use std::net::SocketAddr; +use udp::Udp; + +#[derive(Serialize, Default)] +#[serde(crate = "rocket::serde")] +pub struct Scrape { + pub leechers: u32, + pub peers: u32, + pub seeders: u32, +} + +pub struct Scraper { + udp: Option, + // tcp: @TODO +} + +impl Scraper { + pub fn init(udp: Option<(Vec, Vec)>) -> Self { + Self { + udp: udp.map(|(local, remote)| Udp::init(local, remote)), + } + } + + pub fn scrape(&self, info_hash: &[u8]) -> Option { + self.udp.as_ref()?; + let mut t = Scrape::default(); + if let Some(ref u) = self.udp { + let r = u.scrape(info_hash).ok()?; // @TODO handle + t.leechers += r.leechers; + t.peers += r.peers; + t.seeders += r.seeders; + } + Some(t) + } +} diff --git a/src/scraper/udp.rs b/src/scraper/udp.rs new file mode 100644 index 0000000..c6031a8 --- /dev/null +++ b/src/scraper/udp.rs @@ -0,0 +1,104 @@ +use super::Scrape; +use rand::Rng; +use std::{ + io::Error, + net::{SocketAddr, UdpSocket}, + time::Duration, +}; + +struct Route { + socket: UdpSocket, + remote: Vec, +} + +pub struct Udp(Vec); + +impl Udp { + pub fn init(local: Vec, remote: Vec) -> Self { + Self( + local + .into_iter() + .map(|l| { + let socket = UdpSocket::bind(l).unwrap(); + socket + .set_read_timeout(Some(Duration::from_secs(3))) + .unwrap(); + Route { + socket, + remote: if l.is_ipv4() { + remote.iter().filter(|r| r.is_ipv4()).cloned().collect() + } else { + remote.iter().filter(|r| r.is_ipv6()).cloned().collect() + }, + } + }) + .collect(), + ) + } + + pub fn scrape(&self, info_hash: &[u8]) -> Result { + let mut t = Scrape::default(); + for route in &self.0 { + for remote in &route.remote { + route.socket.send_to(&connection_request(), remote)?; + + let mut b = [0u8; 16]; + if route.socket.recv(&mut b)? < 16 { + todo!() + } + route.socket.send_to( + &scrape_request( + u64::from_be_bytes(b[8..16].try_into().unwrap()), + rand::rng().random::(), + &[info_hash.to_vec()], + ), + remote, + )?; + + let mut b = [0u8; 1024]; + let l = route.socket.recv(&mut b)?; + let r = scrape_response(&b[..l]); + + t.leechers += r.leechers; + t.peers += r.peers; + t.seeders += r.seeders; + } + } + Ok(t) + } +} + +fn connection_request() -> Vec { + let mut b = Vec::new(); + b.extend_from_slice(&0x41727101980u64.to_be_bytes()); + b.extend_from_slice(&0u32.to_be_bytes()); + b.extend_from_slice(&rand::rng().random::().to_be_bytes()); + b +} + +fn scrape_request(connection_id: u64, transaction_id: u32, info_hashes: &[Vec]) -> Vec { + let mut b = Vec::new(); + b.extend_from_slice(&connection_id.to_be_bytes()); + b.extend_from_slice(&2u32.to_be_bytes()); + b.extend_from_slice(&transaction_id.to_be_bytes()); + // * up to about 74 torrents can be scraped at once + // https://www.bittorrent.org/beps/bep_0015.html + if info_hashes.len() > 74 { + todo!() + } + for hash in info_hashes { + b.extend_from_slice(hash); + } + b +} + +fn scrape_response(response: &[u8]) -> Scrape { + if response.len() < 20 { + todo!() + } + Scrape { + leechers: u32::from_be_bytes(response[12..16].try_into().unwrap()), + seeders: u32::from_be_bytes(response[16..20].try_into().unwrap()), + peers: 0, // @TODO + } +}