mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 17:15:35 +00:00
implement dynamic ban time calculation, remove extra argument options
This commit is contained in:
parent
b45d1de0b9
commit
9cd28eaa3b
2 changed files with 23 additions and 58 deletions
|
|
@ -74,52 +74,28 @@ pub struct Config {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub preload_max_filecount: Option<usize>,
|
pub preload_max_filecount: Option<usize>,
|
||||||
|
|
||||||
|
/// Limit download speed (b/s)
|
||||||
|
#[arg(long)]
|
||||||
|
pub download_limit: Option<u32>, // * reminder: upload feature is not planed by the crawler impl
|
||||||
|
|
||||||
/// Use `socks5://[username:password@]host:port`
|
/// Use `socks5://[username:password@]host:port`
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub proxy_url: Option<Url>,
|
pub proxy_url: Option<Url>,
|
||||||
|
|
||||||
// Tuneup the peers processor
|
|
||||||
#[arg(long)]
|
|
||||||
pub peer_connect_timeout: Option<u64>,
|
|
||||||
|
|
||||||
#[arg(long)]
|
|
||||||
pub peer_read_write_timeout: Option<u64>,
|
|
||||||
|
|
||||||
#[arg(long)]
|
|
||||||
pub peer_keep_alive_interval: Option<u64>,
|
|
||||||
|
|
||||||
/// Estimated info-hash index capacity
|
/// Estimated info-hash index capacity
|
||||||
///
|
///
|
||||||
/// * use for memory optimization, depending on tracker volumes
|
/// * use for memory optimization, depending on tracker volumes
|
||||||
#[arg(long, default_value_t = 1000)]
|
#[arg(long, default_value_t = 1000)]
|
||||||
pub index_capacity: usize,
|
pub index_capacity: usize,
|
||||||
|
|
||||||
/// Max time in seconds to add new torrent
|
|
||||||
#[arg(long, default_value_t = 60)]
|
|
||||||
pub add_torrent_timeout: u64,
|
|
||||||
|
|
||||||
/// Ban time in seconds on torrent add failure (`add_torrent_timeout` is reached)
|
|
||||||
#[arg(long, default_value_t = 3600)]
|
|
||||||
pub add_torrent_ban: u64,
|
|
||||||
|
|
||||||
/// Ban time in seconds on torrent resolve failure
|
|
||||||
#[arg(long, default_value_t = 3600)]
|
|
||||||
pub resolve_torrent_ban: u64,
|
|
||||||
|
|
||||||
/// Crawl loop delay in seconds
|
/// Crawl loop delay in seconds
|
||||||
#[arg(long, default_value_t = 60)]
|
#[arg(long, default_value_t = 60)]
|
||||||
pub sleep: u64,
|
pub sleep: u64,
|
||||||
|
|
||||||
/// Limit download speed (b/s)
|
/// Skip and ban slow or unresolvable hashes
|
||||||
#[arg(long)]
|
/// when the specified value in seconds is reached
|
||||||
pub download_limit: Option<u32>, // * reminder: upload feature is not planed by the crawler impl
|
///
|
||||||
|
/// * the ban time is dynamically calculated based on the current ban list collected
|
||||||
/// Skip long-thinking connections,
|
|
||||||
/// try to handle the other hashes in this queue after `n` seconds
|
|
||||||
#[arg(long, default_value_t = 60)]
|
#[arg(long, default_value_t = 60)]
|
||||||
pub wait_until_completed: u64,
|
pub timeout: u64,
|
||||||
|
|
||||||
/// Ban time in seconds on torrent data download is longer than `wait_until_completed`
|
|
||||||
#[arg(long)]
|
|
||||||
pub wait_until_completed_ban: Option<u64>,
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
37
src/main.rs
37
src/main.rs
|
|
@ -6,8 +6,7 @@ use anyhow::Result;
|
||||||
use chrono::DateTime;
|
use chrono::DateTime;
|
||||||
use config::Config;
|
use config::Config;
|
||||||
use librqbit::{
|
use librqbit::{
|
||||||
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, PeerConnectionOptions,
|
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, SessionOptions,
|
||||||
SessionOptions,
|
|
||||||
};
|
};
|
||||||
use preload::Preload;
|
use preload::Preload;
|
||||||
use std::{
|
use std::{
|
||||||
|
|
@ -44,11 +43,7 @@ async fn main() -> Result<()> {
|
||||||
connect: Some(ConnectionOptions {
|
connect: Some(ConnectionOptions {
|
||||||
enable_tcp: !config.disable_tcp,
|
enable_tcp: !config.disable_tcp,
|
||||||
proxy_url: config.proxy_url.map(|u| u.to_string()),
|
proxy_url: config.proxy_url.map(|u| u.to_string()),
|
||||||
peer_opts: Some(PeerConnectionOptions {
|
..ConnectionOptions::default()
|
||||||
connect_timeout: config.peer_connect_timeout.map(Duration::from_secs),
|
|
||||||
read_write_timeout: config.peer_read_write_timeout.map(Duration::from_secs),
|
|
||||||
keep_alive_interval: config.peer_keep_alive_interval.map(Duration::from_secs),
|
|
||||||
}),
|
|
||||||
}),
|
}),
|
||||||
disable_dht_persistence: true,
|
disable_dht_persistence: true,
|
||||||
disable_dht: !config.enable_dht,
|
disable_dht: !config.enable_dht,
|
||||||
|
|
@ -112,7 +107,7 @@ async fn main() -> Result<()> {
|
||||||
// run the crawler in single thread for performance reasons,
|
// run the crawler in single thread for performance reasons,
|
||||||
// use `timeout` argument option to skip the dead connections.
|
// use `timeout` argument option to skip the dead connections.
|
||||||
match time::timeout(
|
match time::timeout(
|
||||||
Duration::from_secs(config.add_torrent_timeout),
|
Duration::from_secs(config.timeout),
|
||||||
session.add_torrent(
|
session.add_torrent(
|
||||||
AddTorrent::from_url(magnet(
|
AddTorrent::from_url(magnet(
|
||||||
&h,
|
&h,
|
||||||
|
|
@ -194,28 +189,20 @@ async fn main() -> Result<()> {
|
||||||
session.unpause(&mt).await?;
|
session.unpause(&mt).await?;
|
||||||
log::debug!("begin torrent `{h}` preload...");
|
log::debug!("begin torrent `{h}` preload...");
|
||||||
if let Err(e) = time::timeout(
|
if let Err(e) = time::timeout(
|
||||||
Duration::from_secs(config.wait_until_completed),
|
Duration::from_secs(config.timeout),
|
||||||
mt.wait_until_completed(),
|
mt.wait_until_completed(),
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
|
||||||
session
|
|
||||||
.delete(librqbit::api::TorrentIdOrHash::Id(id), false)
|
|
||||||
.await?; // * do not collect billions of slow torrents in the session pool
|
|
||||||
if let Some(wait_until_completed_ban) =
|
|
||||||
config.wait_until_completed_ban
|
|
||||||
{
|
{
|
||||||
let t = Local::now()
|
let t = Local::now()
|
||||||
+ Duration::from_secs(wait_until_completed_ban);
|
+ Duration::from_secs(ban.len() as u64 * config.timeout);
|
||||||
log::debug!(
|
log::debug!(
|
||||||
"skip awaiting the completion of preload `{h}` data (`{e}`), ban until {t}."
|
"skip awaiting the completion of preload `{h}` data (`{e}`), ban until {t}."
|
||||||
);
|
);
|
||||||
assert!(ban.insert(i, t).is_none())
|
assert!(ban.insert(i, t).is_none());
|
||||||
} else {
|
session
|
||||||
log::debug!(
|
.delete(librqbit::api::TorrentIdOrHash::Id(id), false)
|
||||||
"skip awaiting the completion of preload `{h}` data (`{e}`)"
|
.await?; // * do not collect billions of slow torrents in the session pool
|
||||||
)
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
log::debug!("torrent `{h}` preload completed.");
|
log::debug!("torrent `{h}` preload completed.");
|
||||||
|
|
@ -233,13 +220,15 @@ async fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
Ok(_) => panic!(),
|
Ok(_) => panic!(),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
let t = Local::now() + Duration::from_secs(config.resolve_torrent_ban);
|
let t = Local::now()
|
||||||
|
+ Duration::from_secs(ban.len() as u64 * config.timeout);
|
||||||
log::debug!("failed to resolve torrent `{h}`: `{e}`, ban until {t}.");
|
log::debug!("failed to resolve torrent `{h}`: `{e}`, ban until {t}.");
|
||||||
assert!(ban.insert(i, t).is_none())
|
assert!(ban.insert(i, t).is_none())
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
let t = Local::now() + Duration::from_secs(config.add_torrent_ban);
|
let t =
|
||||||
|
Local::now() + Duration::from_secs(ban.len() as u64 * config.timeout);
|
||||||
log::debug!(
|
log::debug!(
|
||||||
"skip awaiting the completion of adding torrent `{h}` data (`{e}`), ban until {t}."
|
"skip awaiting the completion of adding torrent `{h}` data (`{e}`), ban until {t}."
|
||||||
);
|
);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue