mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 17:15:35 +00:00
implement clear, torrent_tracker, disable_dht arguments
This commit is contained in:
parent
bd6cc95bf3
commit
1da27bfffe
5 changed files with 62 additions and 15 deletions
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "aquatic-crawler"
|
||||
version = "0.1.0"
|
||||
version = "0.2.0"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
|
|
@ -18,3 +18,4 @@ librqbit = "8.1.0"
|
|||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
tokio = { version = "1.45", features = ["full"] }
|
||||
url = "2.5.4"
|
||||
|
|
|
|||
20
README.md
20
README.md
|
|
@ -21,9 +21,6 @@ Crawler/aggregation tool for the [Aquatic](https://github.com/greatest-ape/aquat
|
|||
* [ ] V2
|
||||
* [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search
|
||||
* [ ] SQLite
|
||||
* Tools
|
||||
* [ ] Storage cleaner
|
||||
* [ ] Implement tests
|
||||
|
||||
## Install
|
||||
|
||||
|
|
@ -53,12 +50,23 @@ Options:
|
|||
|
||||
[default: ei]
|
||||
|
||||
-i, --infohash-source <INFOHASH_SOURCE>
|
||||
Filepath(s) to the Aquatic tracker info-hash JSON/API (PR#233)
|
||||
-c, --clear
|
||||
Clear previous index collected on crawl session start
|
||||
|
||||
-t, --torrents-path <TORRENTS_PATH>
|
||||
-i, --infohash-source <INFOHASH_SOURCE>
|
||||
Filepath(s) to the Aquatic tracker info-hash JSON/API
|
||||
|
||||
* PR#233 feature
|
||||
|
||||
--torrents-path <TORRENTS_PATH>
|
||||
Directory path to store the `.torrent` files
|
||||
|
||||
--torrent-tracker <TORRENT_TRACKER>
|
||||
Define custom tracker(s) to preload the `.torrent` files info
|
||||
|
||||
--disable-dht
|
||||
Disable DHT resolver (useful with `torrent_tracker`)
|
||||
|
||||
-s, --sleep <SLEEP>
|
||||
Crawl loop delay in seconds
|
||||
|
||||
|
|
|
|||
|
|
@ -10,16 +10,28 @@ pub struct Argument {
|
|||
#[arg(short, long, default_value_t = String::from("ei"))]
|
||||
pub debug: String,
|
||||
|
||||
/// Clear previous index collected on crawl session start
|
||||
#[arg(short, long, default_value_t = false)]
|
||||
pub clear: bool,
|
||||
|
||||
/// Filepath(s) to the Aquatic tracker info-hash JSON/API
|
||||
///
|
||||
/// * PR #233 info-hash table implementation has multiple source tables for IPv4 and IPv6
|
||||
/// * PR#233 feature
|
||||
#[arg(short, long)]
|
||||
pub infohash_source: Vec<String>,
|
||||
|
||||
/// Directory path to store the `.torrent` files
|
||||
#[arg(short, long)]
|
||||
#[arg(long)]
|
||||
pub torrents_path: Option<String>,
|
||||
|
||||
/// Define custom tracker(s) to preload the `.torrent` files info
|
||||
#[arg(long)]
|
||||
pub torrent_tracker: Vec<String>,
|
||||
|
||||
/// Disable DHT resolver (useful with `torrent_tracker`)
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub disable_dht: bool,
|
||||
|
||||
/// Crawl loop delay in seconds
|
||||
#[arg(short, long, default_value_t = 300)]
|
||||
pub sleep: u64,
|
||||
|
|
|
|||
|
|
@ -4,12 +4,17 @@ use std::{fs, io::Write, path::PathBuf, str::FromStr};
|
|||
pub struct Storage(PathBuf);
|
||||
|
||||
impl Storage {
|
||||
pub fn init(storage: &str) -> Result<Self> {
|
||||
pub fn init(storage: &str, clear: bool) -> Result<Self> {
|
||||
let p = PathBuf::from_str(storage)?;
|
||||
if fs::metadata(&p).is_ok_and(|t| t.is_file()) {
|
||||
if let Ok(t) = fs::metadata(&p) {
|
||||
if t.is_file() {
|
||||
bail!("Target destination is not directory!")
|
||||
}
|
||||
fs::create_dir_all(storage)?;
|
||||
if t.is_dir() && clear {
|
||||
fs::remove_dir_all(&p)?;
|
||||
}
|
||||
}
|
||||
fs::create_dir_all(&p)?;
|
||||
Ok(Self(p))
|
||||
}
|
||||
|
||||
|
|
|
|||
25
src/main.rs
25
src/main.rs
|
|
@ -6,6 +6,9 @@ mod debug;
|
|||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
use clap::Parser;
|
||||
use librqbit::SessionOptions;
|
||||
use std::str::FromStr;
|
||||
|
||||
let argument = argument::Argument::parse();
|
||||
|
||||
// calculate debug level once
|
||||
|
|
@ -14,11 +17,21 @@ async fn main() -> anyhow::Result<()> {
|
|||
|
||||
// init shared members
|
||||
let torrent_storage = if let Some(t) = argument.torrents_path {
|
||||
Some(database::torrent::Storage::init(&t)?)
|
||||
let s = database::torrent::Storage::init(&t, argument.clear)?;
|
||||
if argument.clear && is_debug_i {
|
||||
debug::info(String::from("Cleanup torrent storage"));
|
||||
}
|
||||
Some(s)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let mut trackers = std::collections::HashSet::with_capacity(argument.torrent_tracker.len());
|
||||
for tracker in argument.torrent_tracker {
|
||||
trackers.insert(url::Url::from_str(&tracker)?);
|
||||
}
|
||||
|
||||
// begin
|
||||
if is_debug_i {
|
||||
debug::info(String::from("Crawler started"));
|
||||
}
|
||||
|
|
@ -28,7 +41,15 @@ async fn main() -> anyhow::Result<()> {
|
|||
debug::info(String::from("New index session begin..."));
|
||||
}
|
||||
let mut total = 0;
|
||||
let session = librqbit::Session::new(std::path::PathBuf::new()).await?;
|
||||
let session = librqbit::Session::new_with_opts(
|
||||
std::path::PathBuf::new(),
|
||||
SessionOptions {
|
||||
disable_dht: argument.disable_dht,
|
||||
trackers: trackers.clone(),
|
||||
..SessionOptions::default()
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
// collect info-hashes from API
|
||||
for source in &argument.infohash_source {
|
||||
if is_debug_i {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue