draft multi-stack udp scraper implementation

This commit is contained in:
yggverse 2025-08-07 04:19:43 +03:00
parent 4f431083b6
commit 00e8f8078b
6 changed files with 180 additions and 13 deletions

View file

@ -19,3 +19,4 @@ url = { version = "2.5", features = ["serde"] }
urlencoding = "2.1"
rocket_dyn_templates = { version = "0.2", features = ["tera"] }
plurify = "0.2"
rand = "0.9"

View file

@ -18,7 +18,9 @@
* [ ] Files list
* [ ] Background image (from the files asset)
* [ ] Common features
* [ ] Scrape peers/seeds
* [ ] Scrape peers/seeders/leechers
* [x] UDP
* [ ] TCP
* [ ] Download
* [x] Magnet
* [ ] Torrent

View file

@ -1,6 +1,6 @@
use clap::Parser;
use std::{
net::{IpAddr, Ipv4Addr},
net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6},
path::PathBuf,
};
use url::Url;
@ -53,4 +53,21 @@ pub struct Config {
/// Bind server on given port
#[arg(long, short, default_value_t = 8000)]
pub port: u16,
/// Bind local UDP client for `scrape_udp_server`
///
/// * not in use if the `scrape_udp_server` is not set
#[arg(long, default_values_t = vec![
SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, 0)),
SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::UNSPECIFIED, 0, 0, 0))
])]
pub scrape_udp_client: Vec<SocketAddr>,
/// Scrape given UDP trackers to display peers/seeders/leechers info
#[arg(long)]
pub scrape_udp_server: Option<Vec<SocketAddr>>,
/// Configure instance in the debug mode
#[arg(long, default_value_t = false)]
pub debug: bool,
}

View file

@ -4,6 +4,7 @@ extern crate rocket;
mod config;
mod feed;
mod format;
mod scraper;
mod storage;
use config::Config;
@ -15,6 +16,7 @@ use rocket::{
serde::Serialize,
};
use rocket_dyn_templates::{Template, context};
use scraper::{Scrape, Scraper};
use storage::{Order, Sort, Storage, Torrent};
use url::Url;
@ -33,19 +35,13 @@ pub struct Meta {
#[get("/?<page>")]
fn index(
page: Option<usize>,
scraper: &State<Scraper>,
storage: &State<Storage>,
meta: &State<Meta>,
) -> Result<Template, Custom<String>> {
use plurify::Plurify;
#[derive(Serialize)]
#[serde(crate = "rocket::serde")]
struct Scrape {
leechers: usize,
peers: usize,
seeders: usize,
}
#[derive(Serialize)]
#[serde(crate = "rocket::serde")]
struct Row {
created: Option<String>,
files: String,
@ -77,7 +73,7 @@ fn index(
.map(|t| t.format(&meta.format_time).to_string()),
indexed: torrent.time.format(&meta.format_time).to_string(),
magnet: format::magnet(&torrent.info_hash, meta.trackers.as_ref()),
scrape: None, // @TODO
scrape: scraper.scrape(torrent.info_hash.as_bytes()),
size: format::bytes(torrent.size),
files: torrent.files.as_ref().map_or("1 file".into(), |f| {
let l = f.len();
@ -116,7 +112,6 @@ fn rss(feed: &State<Feed>, storage: &State<Storage>) -> Result<RawXml<String>, C
#[launch]
fn rocket() -> _ {
use clap::Parser;
use rocket::fs::FileServer;
let config = Config::parse();
let feed = Feed::init(
config.title.clone(),
@ -124,15 +119,25 @@ fn rocket() -> _ {
config.canonical_url.clone(),
config.tracker.clone(),
);
let scraper = Scraper::init(
config
.scrape_udp_server
.map(|s| (config.scrape_udp_client, s)),
);
let storage = Storage::init(config.preload, config.list_limit, config.capacity).unwrap(); // @TODO handle
rocket::build()
.attach(Template::fairing())
.configure(rocket::Config {
port: config.port,
address: config.host,
..rocket::Config::default()
..if config.debug {
rocket::Config::debug_default()
} else {
rocket::Config::default()
}
})
.manage(feed)
.manage(scraper)
.manage(storage)
.manage(Meta {
canonical: config.canonical_url,
@ -142,6 +147,6 @@ fn rocket() -> _ {
trackers: config.tracker,
version: env!("CARGO_PKG_VERSION").into(),
})
.mount("/", FileServer::from(config.statics))
.mount("/", rocket::fs::FileServer::from(config.statics))
.mount("/", routes![index, rss])
}

38
src/scraper.rs Normal file
View file

@ -0,0 +1,38 @@
mod udp;
use rocket::serde::Serialize;
use std::net::SocketAddr;
use udp::Udp;
#[derive(Serialize, Default)]
#[serde(crate = "rocket::serde")]
pub struct Scrape {
pub leechers: u32,
pub peers: u32,
pub seeders: u32,
}
pub struct Scraper {
udp: Option<Udp>,
// tcp: @TODO
}
impl Scraper {
pub fn init(udp: Option<(Vec<SocketAddr>, Vec<SocketAddr>)>) -> Self {
Self {
udp: udp.map(|(local, remote)| Udp::init(local, remote)),
}
}
pub fn scrape(&self, info_hash: &[u8]) -> Option<Scrape> {
self.udp.as_ref()?;
let mut t = Scrape::default();
if let Some(ref u) = self.udp {
let r = u.scrape(info_hash).ok()?; // @TODO handle
t.leechers += r.leechers;
t.peers += r.peers;
t.seeders += r.seeders;
}
Some(t)
}
}

104
src/scraper/udp.rs Normal file
View file

@ -0,0 +1,104 @@
use super::Scrape;
use rand::Rng;
use std::{
io::Error,
net::{SocketAddr, UdpSocket},
time::Duration,
};
struct Route {
socket: UdpSocket,
remote: Vec<SocketAddr>,
}
pub struct Udp(Vec<Route>);
impl Udp {
pub fn init(local: Vec<SocketAddr>, remote: Vec<SocketAddr>) -> Self {
Self(
local
.into_iter()
.map(|l| {
let socket = UdpSocket::bind(l).unwrap();
socket
.set_read_timeout(Some(Duration::from_secs(3)))
.unwrap();
Route {
socket,
remote: if l.is_ipv4() {
remote.iter().filter(|r| r.is_ipv4()).cloned().collect()
} else {
remote.iter().filter(|r| r.is_ipv6()).cloned().collect()
},
}
})
.collect(),
)
}
pub fn scrape(&self, info_hash: &[u8]) -> Result<Scrape, Error> {
let mut t = Scrape::default();
for route in &self.0 {
for remote in &route.remote {
route.socket.send_to(&connection_request(), remote)?;
let mut b = [0u8; 16];
if route.socket.recv(&mut b)? < 16 {
todo!()
}
route.socket.send_to(
&scrape_request(
u64::from_be_bytes(b[8..16].try_into().unwrap()),
rand::rng().random::<u32>(),
&[info_hash.to_vec()],
),
remote,
)?;
let mut b = [0u8; 1024];
let l = route.socket.recv(&mut b)?;
let r = scrape_response(&b[..l]);
t.leechers += r.leechers;
t.peers += r.peers;
t.seeders += r.seeders;
}
}
Ok(t)
}
}
fn connection_request() -> Vec<u8> {
let mut b = Vec::new();
b.extend_from_slice(&0x41727101980u64.to_be_bytes());
b.extend_from_slice(&0u32.to_be_bytes());
b.extend_from_slice(&rand::rng().random::<u32>().to_be_bytes());
b
}
fn scrape_request(connection_id: u64, transaction_id: u32, info_hashes: &[Vec<u8>]) -> Vec<u8> {
let mut b = Vec::new();
b.extend_from_slice(&connection_id.to_be_bytes());
b.extend_from_slice(&2u32.to_be_bytes());
b.extend_from_slice(&transaction_id.to_be_bytes());
// * up to about 74 torrents can be scraped at once
// https://www.bittorrent.org/beps/bep_0015.html
if info_hashes.len() > 74 {
todo!()
}
for hash in info_hashes {
b.extend_from_slice(hash);
}
b
}
fn scrape_response(response: &[u8]) -> Scrape {
if response.len() < 20 {
todo!()
}
Scrape {
leechers: u32::from_be_bytes(response[12..16].try_into().unwrap()),
seeders: u32::from_be_bytes(response[16..20].try_into().unwrap()),
peers: 0, // @TODO
}
}