mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 17:15:35 +00:00
update proxy_url api to librqbit 0.9 beta api, add enable_tcp option, implement peer_opts, add_torrent_timeout, download_torrent_timeout arguments
This commit is contained in:
parent
3a948b5bee
commit
b38988dacf
3 changed files with 71 additions and 18 deletions
35
README.md
35
README.md
|
|
@ -11,14 +11,15 @@ Crawler for [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracke
|
||||||
|
|
||||||
## Roadmap
|
## Roadmap
|
||||||
|
|
||||||
|
* Info-hash versions supported
|
||||||
|
* [x] 1
|
||||||
|
* [ ] 2
|
||||||
* Targets supported
|
* Targets supported
|
||||||
* [x] IPv4/IPv6 info-hash JSON/API (requires [PR#233](https://github.com/greatest-ape/aquatic/pull/233))
|
* [x] IPv4/IPv6 info-hash JSON/API (requires [PR#233](https://github.com/greatest-ape/aquatic/pull/233))
|
||||||
* [x] local file path
|
* [x] local file path
|
||||||
* [ ] remote URL
|
* [ ] remote URL
|
||||||
* Storage
|
* Storage
|
||||||
* [x] File system (resolve infohash to the `.torrent` + download content files match the regex pattern)
|
* [x] File system (resolve infohash to the `.torrent` + download content files match the regex pattern)
|
||||||
* [x] V1
|
|
||||||
* [ ] V2
|
|
||||||
* [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search
|
* [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search
|
||||||
* [ ] SQLite
|
* [ ] SQLite
|
||||||
|
|
||||||
|
|
@ -36,7 +37,8 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
||||||
--infohash-file /path/to/another-source.json\
|
--infohash-file /path/to/another-source.json\
|
||||||
--torrent-tracker udp://host1:port\
|
--torrent-tracker udp://host1:port\
|
||||||
--torrent-tracker udp://host2:port\
|
--torrent-tracker udp://host2:port\
|
||||||
--storage /path/to/storage
|
--storage /path/to/storage\
|
||||||
|
--enable-tcp
|
||||||
```
|
```
|
||||||
|
|
||||||
### Options
|
### Options
|
||||||
|
|
@ -49,7 +51,7 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
||||||
|
|
||||||
[default: ei]
|
[default: ei]
|
||||||
|
|
||||||
-c, --clear
|
--clear
|
||||||
Clear previous index collected on crawl session start
|
Clear previous index collected on crawl session start
|
||||||
|
|
||||||
--infohash-file <INFOHASH_FILE>
|
--infohash-file <INFOHASH_FILE>
|
||||||
|
|
@ -69,6 +71,9 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
||||||
--enable-dht
|
--enable-dht
|
||||||
Enable DHT resolver
|
Enable DHT resolver
|
||||||
|
|
||||||
|
--enable-tcp
|
||||||
|
Enable TCP connection
|
||||||
|
|
||||||
--enable-upnp-port-forwarding
|
--enable-upnp-port-forwarding
|
||||||
Enable UPnP
|
Enable UPnP
|
||||||
|
|
||||||
|
|
@ -87,20 +92,34 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
||||||
--save-torrents
|
--save-torrents
|
||||||
Save resolved torrent files to the `storage` location
|
Save resolved torrent files to the `storage` location
|
||||||
|
|
||||||
--socks-proxy-url <SOCKS_PROXY_URL>
|
--proxy-url <PROXY_URL>
|
||||||
Use `socks5://[username:password@]host:port`
|
Use `socks5://[username:password@]host:port`
|
||||||
|
|
||||||
|
--peer-connect-timeout <PEER_CONNECT_TIMEOUT>
|
||||||
|
|
||||||
|
|
||||||
|
--peer-read-write-timeout <PEER_READ_WRITE_TIMEOUT>
|
||||||
|
|
||||||
|
|
||||||
|
--peer-keep-alive-interval <PEER_KEEP_ALIVE_INTERVAL>
|
||||||
|
|
||||||
|
|
||||||
--index-capacity <INDEX_CAPACITY>
|
--index-capacity <INDEX_CAPACITY>
|
||||||
Estimated info-hash index capacity
|
Estimated info-hash index capacity
|
||||||
|
|
||||||
[default: 1000]
|
[default: 1000]
|
||||||
|
|
||||||
-t <TIMEOUT>
|
--add-torrent-timeout <ADD_TORRENT_TIMEOUT>
|
||||||
Max time to handle one torrent
|
Max time to handle each torrent
|
||||||
|
|
||||||
[default: 10]
|
[default: 10]
|
||||||
|
|
||||||
-s <SLEEP>
|
--download-torrent-timeout <DOWNLOAD_TORRENT_TIMEOUT>
|
||||||
|
Max time to download each torrent
|
||||||
|
|
||||||
|
[default: 10]
|
||||||
|
|
||||||
|
--sleep <SLEEP>
|
||||||
Crawl loop delay in seconds
|
Crawl loop delay in seconds
|
||||||
|
|
||||||
[default: 300]
|
[default: 300]
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ pub struct Argument {
|
||||||
pub debug: String,
|
pub debug: String,
|
||||||
|
|
||||||
/// Clear previous index collected on crawl session start
|
/// Clear previous index collected on crawl session start
|
||||||
#[arg(short, long, default_value_t = false)]
|
#[arg(long, default_value_t = false)]
|
||||||
pub clear: bool,
|
pub clear: bool,
|
||||||
|
|
||||||
/// Absolute filename(s) to the Aquatic tracker info-hash JSON/API
|
/// Absolute filename(s) to the Aquatic tracker info-hash JSON/API
|
||||||
|
|
@ -37,6 +37,10 @@ pub struct Argument {
|
||||||
#[arg(long, default_value_t = false)]
|
#[arg(long, default_value_t = false)]
|
||||||
pub enable_dht: bool,
|
pub enable_dht: bool,
|
||||||
|
|
||||||
|
/// Enable TCP connection
|
||||||
|
#[arg(long, default_value_t = false)]
|
||||||
|
pub enable_tcp: bool,
|
||||||
|
|
||||||
/// Enable UPnP
|
/// Enable UPnP
|
||||||
#[arg(long, default_value_t = false)]
|
#[arg(long, default_value_t = false)]
|
||||||
pub enable_upnp_port_forwarding: bool,
|
pub enable_upnp_port_forwarding: bool,
|
||||||
|
|
@ -64,18 +68,32 @@ pub struct Argument {
|
||||||
|
|
||||||
/// Use `socks5://[username:password@]host:port`
|
/// Use `socks5://[username:password@]host:port`
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub socks_proxy_url: Option<String>,
|
pub proxy_url: Option<String>,
|
||||||
|
|
||||||
|
// Peer options
|
||||||
|
#[arg(long)]
|
||||||
|
pub peer_connect_timeout: Option<u64>,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
pub peer_read_write_timeout: Option<u64>,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
pub peer_keep_alive_interval: Option<u64>,
|
||||||
|
|
||||||
/// Estimated info-hash index capacity
|
/// Estimated info-hash index capacity
|
||||||
#[arg(long, default_value_t = 1000)]
|
#[arg(long, default_value_t = 1000)]
|
||||||
pub index_capacity: usize,
|
pub index_capacity: usize,
|
||||||
|
|
||||||
/// Max time to handle one torrent
|
/// Max time to handle each torrent
|
||||||
#[arg(short, default_value_t = 10)]
|
#[arg(long, default_value_t = 10)]
|
||||||
pub timeout: u64,
|
pub add_torrent_timeout: u64,
|
||||||
|
|
||||||
|
/// Max time to download each torrent
|
||||||
|
#[arg(long, default_value_t = 10)]
|
||||||
|
pub download_torrent_timeout: u64,
|
||||||
|
|
||||||
/// Crawl loop delay in seconds
|
/// Crawl loop delay in seconds
|
||||||
#[arg(short, default_value_t = 300)]
|
#[arg(long, default_value_t = 300)]
|
||||||
pub sleep: u64,
|
pub sleep: u64,
|
||||||
|
|
||||||
/// Limit upload speed (b/s)
|
/// Limit upload speed (b/s)
|
||||||
|
|
|
||||||
24
src/main.rs
24
src/main.rs
|
|
@ -12,7 +12,10 @@ use storage::Storage;
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use librqbit::{AddTorrent, AddTorrentOptions, AddTorrentResponse, SessionOptions};
|
use librqbit::{
|
||||||
|
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions,
|
||||||
|
PeerConnectionOptions, SessionOptions,
|
||||||
|
};
|
||||||
use std::{collections::HashSet, num::NonZero, time::Duration};
|
use std::{collections::HashSet, num::NonZero, time::Duration};
|
||||||
use tokio::time;
|
use tokio::time;
|
||||||
|
|
||||||
|
|
@ -21,11 +24,19 @@ async fn main() -> Result<()> {
|
||||||
let debug = Debug::init(&arg.debug)?;
|
let debug = Debug::init(&arg.debug)?;
|
||||||
let peers = peers::Peers::init(&arg.initial_peer)?;
|
let peers = peers::Peers::init(&arg.initial_peer)?;
|
||||||
let storage = Storage::init(&arg.storage, arg.clear)?;
|
let storage = Storage::init(&arg.storage, arg.clear)?;
|
||||||
let timeout = Duration::from_secs(arg.timeout);
|
|
||||||
let trackers = trackers::Trackers::init(&arg.torrent_tracker)?;
|
let trackers = trackers::Trackers::init(&arg.torrent_tracker)?;
|
||||||
let session = librqbit::Session::new_with_opts(
|
let session = librqbit::Session::new_with_opts(
|
||||||
storage.path(),
|
storage.path(),
|
||||||
SessionOptions {
|
SessionOptions {
|
||||||
|
connect: Some(ConnectionOptions {
|
||||||
|
enable_tcp: arg.enable_tcp,
|
||||||
|
proxy_url: arg.proxy_url,
|
||||||
|
peer_opts: Some(PeerConnectionOptions {
|
||||||
|
connect_timeout: arg.peer_connect_timeout.map(Duration::from_secs),
|
||||||
|
read_write_timeout: arg.peer_read_write_timeout.map(Duration::from_secs),
|
||||||
|
keep_alive_interval: arg.peer_keep_alive_interval.map(Duration::from_secs),
|
||||||
|
}),
|
||||||
|
}),
|
||||||
disable_upload: !arg.enable_upload,
|
disable_upload: !arg.enable_upload,
|
||||||
disable_dht: !arg.enable_dht,
|
disable_dht: !arg.enable_dht,
|
||||||
disable_dht_persistence: true,
|
disable_dht_persistence: true,
|
||||||
|
|
@ -62,7 +73,7 @@ async fn main() -> Result<()> {
|
||||||
// run the crawler in single thread for performance reasons,
|
// run the crawler in single thread for performance reasons,
|
||||||
// use `timeout` argument option to skip the dead connections.
|
// use `timeout` argument option to skip the dead connections.
|
||||||
match time::timeout(
|
match time::timeout(
|
||||||
timeout,
|
Duration::from_secs(arg.add_torrent_timeout),
|
||||||
session.add_torrent(
|
session.add_torrent(
|
||||||
AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")),
|
AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")),
|
||||||
Some(AddTorrentOptions {
|
Some(AddTorrentOptions {
|
||||||
|
|
@ -96,7 +107,12 @@ async fn main() -> Result<()> {
|
||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
// await for `preload_regex` files download to continue
|
// await for `preload_regex` files download to continue
|
||||||
match time::timeout(timeout, mt.wait_until_completed()).await {
|
match time::timeout(
|
||||||
|
Duration::from_secs(arg.download_torrent_timeout),
|
||||||
|
mt.wait_until_completed(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
Ok(r) => {
|
Ok(r) => {
|
||||||
if let Err(e) = r {
|
if let Err(e) = r {
|
||||||
debug.info(&format!("Skip `{i}`: `{e}`."))
|
debug.info(&format!("Skip `{i}`: `{e}`."))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue