update proxy_url api to librqbit 0.9 beta api, add enable_tcp option, implement peer_opts, add_torrent_timeout, download_torrent_timeout arguments

This commit is contained in:
yggverse 2025-06-15 16:45:11 +03:00
parent 3a948b5bee
commit b38988dacf
3 changed files with 71 additions and 18 deletions

View file

@ -11,14 +11,15 @@ Crawler for [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracke
## Roadmap
* Info-hash versions supported
* [x] 1
* [ ] 2
* Targets supported
* [x] IPv4/IPv6 info-hash JSON/API (requires [PR#233](https://github.com/greatest-ape/aquatic/pull/233))
* [x] local file path
* [ ] remote URL
* Storage
* [x] File system (resolve infohash to the `.torrent` + download content files match the regex pattern)
* [x] V1
* [ ] V2
* [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search
* [ ] SQLite
@ -36,7 +37,8 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
--infohash-file /path/to/another-source.json\
--torrent-tracker udp://host1:port\
--torrent-tracker udp://host2:port\
--storage /path/to/storage
--storage /path/to/storage\
--enable-tcp
```
### Options
@ -49,7 +51,7 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
[default: ei]
-c, --clear
--clear
Clear previous index collected on crawl session start
--infohash-file <INFOHASH_FILE>
@ -69,6 +71,9 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
--enable-dht
Enable DHT resolver
--enable-tcp
Enable TCP connection
--enable-upnp-port-forwarding
Enable UPnP
@ -87,20 +92,34 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
--save-torrents
Save resolved torrent files to the `storage` location
--socks-proxy-url <SOCKS_PROXY_URL>
--proxy-url <PROXY_URL>
Use `socks5://[username:password@]host:port`
--peer-connect-timeout <PEER_CONNECT_TIMEOUT>
--peer-read-write-timeout <PEER_READ_WRITE_TIMEOUT>
--peer-keep-alive-interval <PEER_KEEP_ALIVE_INTERVAL>
--index-capacity <INDEX_CAPACITY>
Estimated info-hash index capacity
[default: 1000]
-t <TIMEOUT>
Max time to handle one torrent
--add-torrent-timeout <ADD_TORRENT_TIMEOUT>
Max time to handle each torrent
[default: 10]
-s <SLEEP>
--download-torrent-timeout <DOWNLOAD_TORRENT_TIMEOUT>
Max time to download each torrent
[default: 10]
--sleep <SLEEP>
Crawl loop delay in seconds
[default: 300]

View file

@ -12,7 +12,7 @@ pub struct Argument {
pub debug: String,
/// Clear previous index collected on crawl session start
#[arg(short, long, default_value_t = false)]
#[arg(long, default_value_t = false)]
pub clear: bool,
/// Absolute filename(s) to the Aquatic tracker info-hash JSON/API
@ -37,6 +37,10 @@ pub struct Argument {
#[arg(long, default_value_t = false)]
pub enable_dht: bool,
/// Enable TCP connection
#[arg(long, default_value_t = false)]
pub enable_tcp: bool,
/// Enable UPnP
#[arg(long, default_value_t = false)]
pub enable_upnp_port_forwarding: bool,
@ -64,18 +68,32 @@ pub struct Argument {
/// Use `socks5://[username:password@]host:port`
#[arg(long)]
pub socks_proxy_url: Option<String>,
pub proxy_url: Option<String>,
// Peer options
#[arg(long)]
pub peer_connect_timeout: Option<u64>,
#[arg(long)]
pub peer_read_write_timeout: Option<u64>,
#[arg(long)]
pub peer_keep_alive_interval: Option<u64>,
/// Estimated info-hash index capacity
#[arg(long, default_value_t = 1000)]
pub index_capacity: usize,
/// Max time to handle one torrent
#[arg(short, default_value_t = 10)]
pub timeout: u64,
/// Max time to handle each torrent
#[arg(long, default_value_t = 10)]
pub add_torrent_timeout: u64,
/// Max time to download each torrent
#[arg(long, default_value_t = 10)]
pub download_torrent_timeout: u64,
/// Crawl loop delay in seconds
#[arg(short, default_value_t = 300)]
#[arg(long, default_value_t = 300)]
pub sleep: u64,
/// Limit upload speed (b/s)

View file

@ -12,7 +12,10 @@ use storage::Storage;
#[tokio::main]
async fn main() -> Result<()> {
use clap::Parser;
use librqbit::{AddTorrent, AddTorrentOptions, AddTorrentResponse, SessionOptions};
use librqbit::{
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions,
PeerConnectionOptions, SessionOptions,
};
use std::{collections::HashSet, num::NonZero, time::Duration};
use tokio::time;
@ -21,11 +24,19 @@ async fn main() -> Result<()> {
let debug = Debug::init(&arg.debug)?;
let peers = peers::Peers::init(&arg.initial_peer)?;
let storage = Storage::init(&arg.storage, arg.clear)?;
let timeout = Duration::from_secs(arg.timeout);
let trackers = trackers::Trackers::init(&arg.torrent_tracker)?;
let session = librqbit::Session::new_with_opts(
storage.path(),
SessionOptions {
connect: Some(ConnectionOptions {
enable_tcp: arg.enable_tcp,
proxy_url: arg.proxy_url,
peer_opts: Some(PeerConnectionOptions {
connect_timeout: arg.peer_connect_timeout.map(Duration::from_secs),
read_write_timeout: arg.peer_read_write_timeout.map(Duration::from_secs),
keep_alive_interval: arg.peer_keep_alive_interval.map(Duration::from_secs),
}),
}),
disable_upload: !arg.enable_upload,
disable_dht: !arg.enable_dht,
disable_dht_persistence: true,
@ -62,7 +73,7 @@ async fn main() -> Result<()> {
// run the crawler in single thread for performance reasons,
// use `timeout` argument option to skip the dead connections.
match time::timeout(
timeout,
Duration::from_secs(arg.add_torrent_timeout),
session.add_torrent(
AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")),
Some(AddTorrentOptions {
@ -96,7 +107,12 @@ async fn main() -> Result<()> {
})?;
}
// await for `preload_regex` files download to continue
match time::timeout(timeout, mt.wait_until_completed()).await {
match time::timeout(
Duration::from_secs(arg.download_torrent_timeout),
mt.wait_until_completed(),
)
.await
{
Ok(r) => {
if let Err(e) = r {
debug.info(&format!("Skip `{i}`: `{e}`."))