From b38988dacf579a30a692f079052f43b74189d3d9 Mon Sep 17 00:00:00 2001 From: yggverse Date: Sun, 15 Jun 2025 16:45:11 +0300 Subject: [PATCH] update `proxy_url` api to librqbit 0.9 beta api, add `enable_tcp` option, implement `peer_opts`, `add_torrent_timeout`, `download_torrent_timeout` arguments --- README.md | 35 +++++++++++++++++++++++++++-------- src/argument.rs | 30 ++++++++++++++++++++++++------ src/main.rs | 24 ++++++++++++++++++++---- 3 files changed, 71 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index b8eb520..dd47b0a 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,15 @@ Crawler for [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracke ## Roadmap +* Info-hash versions supported + * [x] 1 + * [ ] 2 * Targets supported * [x] IPv4/IPv6 info-hash JSON/API (requires [PR#233](https://github.com/greatest-ape/aquatic/pull/233)) * [x] local file path * [ ] remote URL * Storage * [x] File system (resolve infohash to the `.torrent` + download content files match the regex pattern) - * [x] V1 - * [ ] V2 * [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search * [ ] SQLite @@ -36,7 +37,8 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\ --infohash-file /path/to/another-source.json\ --torrent-tracker udp://host1:port\ --torrent-tracker udp://host2:port\ - --storage /path/to/storage + --storage /path/to/storage\ + --enable-tcp ``` ### Options @@ -49,7 +51,7 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\ [default: ei] --c, --clear +--clear Clear previous index collected on crawl session start --infohash-file @@ -69,6 +71,9 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\ --enable-dht Enable DHT resolver +--enable-tcp + Enable TCP connection + --enable-upnp-port-forwarding Enable UPnP @@ -87,20 +92,34 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\ --save-torrents Save resolved torrent files to the `storage` location ---socks-proxy-url +--proxy-url Use `socks5://[username:password@]host:port` +--peer-connect-timeout + + +--peer-read-write-timeout + + +--peer-keep-alive-interval + + --index-capacity Estimated info-hash index capacity [default: 1000] --t - Max time to handle one torrent +--add-torrent-timeout + Max time to handle each torrent [default: 10] --s +--download-torrent-timeout + Max time to download each torrent + + [default: 10] + +--sleep Crawl loop delay in seconds [default: 300] diff --git a/src/argument.rs b/src/argument.rs index c6f42b3..a6a344d 100644 --- a/src/argument.rs +++ b/src/argument.rs @@ -12,7 +12,7 @@ pub struct Argument { pub debug: String, /// Clear previous index collected on crawl session start - #[arg(short, long, default_value_t = false)] + #[arg(long, default_value_t = false)] pub clear: bool, /// Absolute filename(s) to the Aquatic tracker info-hash JSON/API @@ -37,6 +37,10 @@ pub struct Argument { #[arg(long, default_value_t = false)] pub enable_dht: bool, + /// Enable TCP connection + #[arg(long, default_value_t = false)] + pub enable_tcp: bool, + /// Enable UPnP #[arg(long, default_value_t = false)] pub enable_upnp_port_forwarding: bool, @@ -64,18 +68,32 @@ pub struct Argument { /// Use `socks5://[username:password@]host:port` #[arg(long)] - pub socks_proxy_url: Option, + pub proxy_url: Option, + + // Peer options + #[arg(long)] + pub peer_connect_timeout: Option, + + #[arg(long)] + pub peer_read_write_timeout: Option, + + #[arg(long)] + pub peer_keep_alive_interval: Option, /// Estimated info-hash index capacity #[arg(long, default_value_t = 1000)] pub index_capacity: usize, - /// Max time to handle one torrent - #[arg(short, default_value_t = 10)] - pub timeout: u64, + /// Max time to handle each torrent + #[arg(long, default_value_t = 10)] + pub add_torrent_timeout: u64, + + /// Max time to download each torrent + #[arg(long, default_value_t = 10)] + pub download_torrent_timeout: u64, /// Crawl loop delay in seconds - #[arg(short, default_value_t = 300)] + #[arg(long, default_value_t = 300)] pub sleep: u64, /// Limit upload speed (b/s) diff --git a/src/main.rs b/src/main.rs index 0e26d25..185e31b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,7 +12,10 @@ use storage::Storage; #[tokio::main] async fn main() -> Result<()> { use clap::Parser; - use librqbit::{AddTorrent, AddTorrentOptions, AddTorrentResponse, SessionOptions}; + use librqbit::{ + AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, + PeerConnectionOptions, SessionOptions, + }; use std::{collections::HashSet, num::NonZero, time::Duration}; use tokio::time; @@ -21,11 +24,19 @@ async fn main() -> Result<()> { let debug = Debug::init(&arg.debug)?; let peers = peers::Peers::init(&arg.initial_peer)?; let storage = Storage::init(&arg.storage, arg.clear)?; - let timeout = Duration::from_secs(arg.timeout); let trackers = trackers::Trackers::init(&arg.torrent_tracker)?; let session = librqbit::Session::new_with_opts( storage.path(), SessionOptions { + connect: Some(ConnectionOptions { + enable_tcp: arg.enable_tcp, + proxy_url: arg.proxy_url, + peer_opts: Some(PeerConnectionOptions { + connect_timeout: arg.peer_connect_timeout.map(Duration::from_secs), + read_write_timeout: arg.peer_read_write_timeout.map(Duration::from_secs), + keep_alive_interval: arg.peer_keep_alive_interval.map(Duration::from_secs), + }), + }), disable_upload: !arg.enable_upload, disable_dht: !arg.enable_dht, disable_dht_persistence: true, @@ -62,7 +73,7 @@ async fn main() -> Result<()> { // run the crawler in single thread for performance reasons, // use `timeout` argument option to skip the dead connections. match time::timeout( - timeout, + Duration::from_secs(arg.add_torrent_timeout), session.add_torrent( AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")), Some(AddTorrentOptions { @@ -96,7 +107,12 @@ async fn main() -> Result<()> { })?; } // await for `preload_regex` files download to continue - match time::timeout(timeout, mt.wait_until_completed()).await { + match time::timeout( + Duration::from_secs(arg.download_torrent_timeout), + mt.wait_until_completed(), + ) + .await + { Ok(r) => { if let Err(e) = r { debug.info(&format!("Skip `{i}`: `{e}`."))