mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 09:05:33 +00:00
implement dynamic ban time calculation, remove extra argument options
This commit is contained in:
parent
b45d1de0b9
commit
9cd28eaa3b
2 changed files with 23 additions and 58 deletions
|
|
@ -74,52 +74,28 @@ pub struct Config {
|
|||
#[arg(long)]
|
||||
pub preload_max_filecount: Option<usize>,
|
||||
|
||||
/// Limit download speed (b/s)
|
||||
#[arg(long)]
|
||||
pub download_limit: Option<u32>, // * reminder: upload feature is not planed by the crawler impl
|
||||
|
||||
/// Use `socks5://[username:password@]host:port`
|
||||
#[arg(long)]
|
||||
pub proxy_url: Option<Url>,
|
||||
|
||||
// Tuneup the peers processor
|
||||
#[arg(long)]
|
||||
pub peer_connect_timeout: Option<u64>,
|
||||
|
||||
#[arg(long)]
|
||||
pub peer_read_write_timeout: Option<u64>,
|
||||
|
||||
#[arg(long)]
|
||||
pub peer_keep_alive_interval: Option<u64>,
|
||||
|
||||
/// Estimated info-hash index capacity
|
||||
///
|
||||
/// * use for memory optimization, depending on tracker volumes
|
||||
#[arg(long, default_value_t = 1000)]
|
||||
pub index_capacity: usize,
|
||||
|
||||
/// Max time in seconds to add new torrent
|
||||
#[arg(long, default_value_t = 60)]
|
||||
pub add_torrent_timeout: u64,
|
||||
|
||||
/// Ban time in seconds on torrent add failure (`add_torrent_timeout` is reached)
|
||||
#[arg(long, default_value_t = 3600)]
|
||||
pub add_torrent_ban: u64,
|
||||
|
||||
/// Ban time in seconds on torrent resolve failure
|
||||
#[arg(long, default_value_t = 3600)]
|
||||
pub resolve_torrent_ban: u64,
|
||||
|
||||
/// Crawl loop delay in seconds
|
||||
#[arg(long, default_value_t = 60)]
|
||||
pub sleep: u64,
|
||||
|
||||
/// Limit download speed (b/s)
|
||||
#[arg(long)]
|
||||
pub download_limit: Option<u32>, // * reminder: upload feature is not planed by the crawler impl
|
||||
|
||||
/// Skip long-thinking connections,
|
||||
/// try to handle the other hashes in this queue after `n` seconds
|
||||
/// Skip and ban slow or unresolvable hashes
|
||||
/// when the specified value in seconds is reached
|
||||
///
|
||||
/// * the ban time is dynamically calculated based on the current ban list collected
|
||||
#[arg(long, default_value_t = 60)]
|
||||
pub wait_until_completed: u64,
|
||||
|
||||
/// Ban time in seconds on torrent data download is longer than `wait_until_completed`
|
||||
#[arg(long)]
|
||||
pub wait_until_completed_ban: Option<u64>,
|
||||
pub timeout: u64,
|
||||
}
|
||||
|
|
|
|||
39
src/main.rs
39
src/main.rs
|
|
@ -6,8 +6,7 @@ use anyhow::Result;
|
|||
use chrono::DateTime;
|
||||
use config::Config;
|
||||
use librqbit::{
|
||||
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, PeerConnectionOptions,
|
||||
SessionOptions,
|
||||
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, SessionOptions,
|
||||
};
|
||||
use preload::Preload;
|
||||
use std::{
|
||||
|
|
@ -44,11 +43,7 @@ async fn main() -> Result<()> {
|
|||
connect: Some(ConnectionOptions {
|
||||
enable_tcp: !config.disable_tcp,
|
||||
proxy_url: config.proxy_url.map(|u| u.to_string()),
|
||||
peer_opts: Some(PeerConnectionOptions {
|
||||
connect_timeout: config.peer_connect_timeout.map(Duration::from_secs),
|
||||
read_write_timeout: config.peer_read_write_timeout.map(Duration::from_secs),
|
||||
keep_alive_interval: config.peer_keep_alive_interval.map(Duration::from_secs),
|
||||
}),
|
||||
..ConnectionOptions::default()
|
||||
}),
|
||||
disable_dht_persistence: true,
|
||||
disable_dht: !config.enable_dht,
|
||||
|
|
@ -112,7 +107,7 @@ async fn main() -> Result<()> {
|
|||
// run the crawler in single thread for performance reasons,
|
||||
// use `timeout` argument option to skip the dead connections.
|
||||
match time::timeout(
|
||||
Duration::from_secs(config.add_torrent_timeout),
|
||||
Duration::from_secs(config.timeout),
|
||||
session.add_torrent(
|
||||
AddTorrent::from_url(magnet(
|
||||
&h,
|
||||
|
|
@ -194,28 +189,20 @@ async fn main() -> Result<()> {
|
|||
session.unpause(&mt).await?;
|
||||
log::debug!("begin torrent `{h}` preload...");
|
||||
if let Err(e) = time::timeout(
|
||||
Duration::from_secs(config.wait_until_completed),
|
||||
Duration::from_secs(config.timeout),
|
||||
mt.wait_until_completed(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
let t = Local::now()
|
||||
+ Duration::from_secs(ban.len() as u64 * config.timeout);
|
||||
log::debug!(
|
||||
"skip awaiting the completion of preload `{h}` data (`{e}`), ban until {t}."
|
||||
);
|
||||
assert!(ban.insert(i, t).is_none());
|
||||
session
|
||||
.delete(librqbit::api::TorrentIdOrHash::Id(id), false)
|
||||
.await?; // * do not collect billions of slow torrents in the session pool
|
||||
if let Some(wait_until_completed_ban) =
|
||||
config.wait_until_completed_ban
|
||||
{
|
||||
let t = Local::now()
|
||||
+ Duration::from_secs(wait_until_completed_ban);
|
||||
log::debug!(
|
||||
"skip awaiting the completion of preload `{h}` data (`{e}`), ban until {t}."
|
||||
);
|
||||
assert!(ban.insert(i, t).is_none())
|
||||
} else {
|
||||
log::debug!(
|
||||
"skip awaiting the completion of preload `{h}` data (`{e}`)"
|
||||
)
|
||||
}
|
||||
continue;
|
||||
}
|
||||
log::debug!("torrent `{h}` preload completed.");
|
||||
|
|
@ -233,13 +220,15 @@ async fn main() -> Result<()> {
|
|||
}
|
||||
Ok(_) => panic!(),
|
||||
Err(e) => {
|
||||
let t = Local::now() + Duration::from_secs(config.resolve_torrent_ban);
|
||||
let t = Local::now()
|
||||
+ Duration::from_secs(ban.len() as u64 * config.timeout);
|
||||
log::debug!("failed to resolve torrent `{h}`: `{e}`, ban until {t}.");
|
||||
assert!(ban.insert(i, t).is_none())
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
let t = Local::now() + Duration::from_secs(config.add_torrent_ban);
|
||||
let t =
|
||||
Local::now() + Duration::from_secs(ban.len() as u64 * config.timeout);
|
||||
log::debug!(
|
||||
"skip awaiting the completion of adding torrent `{h}` data (`{e}`), ban until {t}."
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue