mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 09:05:33 +00:00
update proxy_url api to librqbit 0.9 beta api, add enable_tcp option, implement peer_opts, add_torrent_timeout, download_torrent_timeout arguments
This commit is contained in:
parent
3a948b5bee
commit
b38988dacf
3 changed files with 71 additions and 18 deletions
35
README.md
35
README.md
|
|
@ -11,14 +11,15 @@ Crawler for [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracke
|
|||
|
||||
## Roadmap
|
||||
|
||||
* Info-hash versions supported
|
||||
* [x] 1
|
||||
* [ ] 2
|
||||
* Targets supported
|
||||
* [x] IPv4/IPv6 info-hash JSON/API (requires [PR#233](https://github.com/greatest-ape/aquatic/pull/233))
|
||||
* [x] local file path
|
||||
* [ ] remote URL
|
||||
* Storage
|
||||
* [x] File system (resolve infohash to the `.torrent` + download content files match the regex pattern)
|
||||
* [x] V1
|
||||
* [ ] V2
|
||||
* [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search
|
||||
* [ ] SQLite
|
||||
|
||||
|
|
@ -36,7 +37,8 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
|||
--infohash-file /path/to/another-source.json\
|
||||
--torrent-tracker udp://host1:port\
|
||||
--torrent-tracker udp://host2:port\
|
||||
--storage /path/to/storage
|
||||
--storage /path/to/storage\
|
||||
--enable-tcp
|
||||
```
|
||||
|
||||
### Options
|
||||
|
|
@ -49,7 +51,7 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
|||
|
||||
[default: ei]
|
||||
|
||||
-c, --clear
|
||||
--clear
|
||||
Clear previous index collected on crawl session start
|
||||
|
||||
--infohash-file <INFOHASH_FILE>
|
||||
|
|
@ -69,6 +71,9 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
|||
--enable-dht
|
||||
Enable DHT resolver
|
||||
|
||||
--enable-tcp
|
||||
Enable TCP connection
|
||||
|
||||
--enable-upnp-port-forwarding
|
||||
Enable UPnP
|
||||
|
||||
|
|
@ -87,20 +92,34 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
|||
--save-torrents
|
||||
Save resolved torrent files to the `storage` location
|
||||
|
||||
--socks-proxy-url <SOCKS_PROXY_URL>
|
||||
--proxy-url <PROXY_URL>
|
||||
Use `socks5://[username:password@]host:port`
|
||||
|
||||
--peer-connect-timeout <PEER_CONNECT_TIMEOUT>
|
||||
|
||||
|
||||
--peer-read-write-timeout <PEER_READ_WRITE_TIMEOUT>
|
||||
|
||||
|
||||
--peer-keep-alive-interval <PEER_KEEP_ALIVE_INTERVAL>
|
||||
|
||||
|
||||
--index-capacity <INDEX_CAPACITY>
|
||||
Estimated info-hash index capacity
|
||||
|
||||
[default: 1000]
|
||||
|
||||
-t <TIMEOUT>
|
||||
Max time to handle one torrent
|
||||
--add-torrent-timeout <ADD_TORRENT_TIMEOUT>
|
||||
Max time to handle each torrent
|
||||
|
||||
[default: 10]
|
||||
|
||||
-s <SLEEP>
|
||||
--download-torrent-timeout <DOWNLOAD_TORRENT_TIMEOUT>
|
||||
Max time to download each torrent
|
||||
|
||||
[default: 10]
|
||||
|
||||
--sleep <SLEEP>
|
||||
Crawl loop delay in seconds
|
||||
|
||||
[default: 300]
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ pub struct Argument {
|
|||
pub debug: String,
|
||||
|
||||
/// Clear previous index collected on crawl session start
|
||||
#[arg(short, long, default_value_t = false)]
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub clear: bool,
|
||||
|
||||
/// Absolute filename(s) to the Aquatic tracker info-hash JSON/API
|
||||
|
|
@ -37,6 +37,10 @@ pub struct Argument {
|
|||
#[arg(long, default_value_t = false)]
|
||||
pub enable_dht: bool,
|
||||
|
||||
/// Enable TCP connection
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub enable_tcp: bool,
|
||||
|
||||
/// Enable UPnP
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub enable_upnp_port_forwarding: bool,
|
||||
|
|
@ -64,18 +68,32 @@ pub struct Argument {
|
|||
|
||||
/// Use `socks5://[username:password@]host:port`
|
||||
#[arg(long)]
|
||||
pub socks_proxy_url: Option<String>,
|
||||
pub proxy_url: Option<String>,
|
||||
|
||||
// Peer options
|
||||
#[arg(long)]
|
||||
pub peer_connect_timeout: Option<u64>,
|
||||
|
||||
#[arg(long)]
|
||||
pub peer_read_write_timeout: Option<u64>,
|
||||
|
||||
#[arg(long)]
|
||||
pub peer_keep_alive_interval: Option<u64>,
|
||||
|
||||
/// Estimated info-hash index capacity
|
||||
#[arg(long, default_value_t = 1000)]
|
||||
pub index_capacity: usize,
|
||||
|
||||
/// Max time to handle one torrent
|
||||
#[arg(short, default_value_t = 10)]
|
||||
pub timeout: u64,
|
||||
/// Max time to handle each torrent
|
||||
#[arg(long, default_value_t = 10)]
|
||||
pub add_torrent_timeout: u64,
|
||||
|
||||
/// Max time to download each torrent
|
||||
#[arg(long, default_value_t = 10)]
|
||||
pub download_torrent_timeout: u64,
|
||||
|
||||
/// Crawl loop delay in seconds
|
||||
#[arg(short, default_value_t = 300)]
|
||||
#[arg(long, default_value_t = 300)]
|
||||
pub sleep: u64,
|
||||
|
||||
/// Limit upload speed (b/s)
|
||||
|
|
|
|||
24
src/main.rs
24
src/main.rs
|
|
@ -12,7 +12,10 @@ use storage::Storage;
|
|||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
use clap::Parser;
|
||||
use librqbit::{AddTorrent, AddTorrentOptions, AddTorrentResponse, SessionOptions};
|
||||
use librqbit::{
|
||||
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions,
|
||||
PeerConnectionOptions, SessionOptions,
|
||||
};
|
||||
use std::{collections::HashSet, num::NonZero, time::Duration};
|
||||
use tokio::time;
|
||||
|
||||
|
|
@ -21,11 +24,19 @@ async fn main() -> Result<()> {
|
|||
let debug = Debug::init(&arg.debug)?;
|
||||
let peers = peers::Peers::init(&arg.initial_peer)?;
|
||||
let storage = Storage::init(&arg.storage, arg.clear)?;
|
||||
let timeout = Duration::from_secs(arg.timeout);
|
||||
let trackers = trackers::Trackers::init(&arg.torrent_tracker)?;
|
||||
let session = librqbit::Session::new_with_opts(
|
||||
storage.path(),
|
||||
SessionOptions {
|
||||
connect: Some(ConnectionOptions {
|
||||
enable_tcp: arg.enable_tcp,
|
||||
proxy_url: arg.proxy_url,
|
||||
peer_opts: Some(PeerConnectionOptions {
|
||||
connect_timeout: arg.peer_connect_timeout.map(Duration::from_secs),
|
||||
read_write_timeout: arg.peer_read_write_timeout.map(Duration::from_secs),
|
||||
keep_alive_interval: arg.peer_keep_alive_interval.map(Duration::from_secs),
|
||||
}),
|
||||
}),
|
||||
disable_upload: !arg.enable_upload,
|
||||
disable_dht: !arg.enable_dht,
|
||||
disable_dht_persistence: true,
|
||||
|
|
@ -62,7 +73,7 @@ async fn main() -> Result<()> {
|
|||
// run the crawler in single thread for performance reasons,
|
||||
// use `timeout` argument option to skip the dead connections.
|
||||
match time::timeout(
|
||||
timeout,
|
||||
Duration::from_secs(arg.add_torrent_timeout),
|
||||
session.add_torrent(
|
||||
AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")),
|
||||
Some(AddTorrentOptions {
|
||||
|
|
@ -96,7 +107,12 @@ async fn main() -> Result<()> {
|
|||
})?;
|
||||
}
|
||||
// await for `preload_regex` files download to continue
|
||||
match time::timeout(timeout, mt.wait_until_completed()).await {
|
||||
match time::timeout(
|
||||
Duration::from_secs(arg.download_torrent_timeout),
|
||||
mt.wait_until_completed(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(r) => {
|
||||
if let Err(e) = r {
|
||||
debug.info(&format!("Skip `{i}`: `{e}`."))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue