From 8cbae5019d94288c3566dfb1aba88932524319ed Mon Sep 17 00:00:00 2001 From: yggverse Date: Mon, 4 Aug 2025 21:23:55 +0300 Subject: [PATCH] remove extra features --- Cargo.toml | 10 +- README.md | 168 +--------------------- src/config.rs | 49 ++----- src/format.rs | 24 ---- src/index.rs | 111 -------------- src/index/value.rs | 54 ------- src/main.rs | 351 ++++++++++----------------------------------- src/peers.rs | 21 --- src/preload.rs | 143 +++++++----------- src/rss.rs | 142 ------------------ src/torrent.rs | 32 ----- src/trackers.rs | 20 --- 12 files changed, 153 insertions(+), 972 deletions(-) delete mode 100644 src/format.rs delete mode 100644 src/index.rs delete mode 100644 src/index/value.rs delete mode 100644 src/peers.rs delete mode 100644 src/rss.rs delete mode 100644 src/torrent.rs delete mode 100644 src/trackers.rs diff --git a/Cargo.toml b/Cargo.toml index 4eca015..977642b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,25 +4,25 @@ version = "0.2.0" edition = "2024" license = "MIT" readme = "README.md" -description = "SSD-friendly crawler for the Aquatic BitTorrent tracker based on librqbit API" -keywords = ["aquatic", "librqbit", "rqbit", "crawler", "bittorrent"] +description = "SSD-friendly crawler for the Aquatic BitTorrent tracker, based on the librqbit API" +keywords = ["aquatic", "librqbit", "bittorrent", "crawler", "resolver"] categories = ["network-programming"] repository = "https://github.com/YGGverse/aquatic-crawler" # homepage = "https://yggverse.github.io" [dependencies] anyhow = "1.0" -chrono = "0.4.41" +chrono = "0.4" clap = { version = "4.5", features = ["derive"] } -hyper-util = "0.1" librqbit = {version = "9.0.0-beta.1", features = ["disable-upload"]} +log = "0.4" regex = "1.11" tokio = { version = "1.45", features = ["full"] } tracing-subscriber = "0.3" url = "2.5" urlencoding = "2.1" -voca_rs = "1.15" walkdir = "2.5" + [patch.crates-io] librqbit = { git = "https://github.com/ikatson/rqbit.git", rev="b580a9610ae7c6eaacd305a3905f7e2d3202ca69", package = "librqbit" } #librqbit = { version = "9.0.0-beta.1", path = "../../rqbit/crates/librqbit", package = "librqbit" } \ No newline at end of file diff --git a/README.md b/README.md index 269c939..745eb74 100644 --- a/README.md +++ b/README.md @@ -4,37 +4,11 @@ [![Dependencies](https://deps.rs/repo/github/YGGverse/aquatic-crawler/status.svg)](https://deps.rs/repo/github/YGGverse/aquatic-crawler) [![crates.io](https://img.shields.io/crates/v/aquatic-crawler.svg)](https://crates.io/crates/aquatic-crawler) -SSD-friendly crawler for the [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracker based on [librqbit](https://github.com/ikatson/rqbit/tree/main/crates/librqbit) API +SSD-friendly crawler for the [Aquatic](https://github.com/greatest-ape/aquatic) BitTorrent tracker, based on the [librqbit](https://github.com/ikatson/rqbit/tree/main/crates/librqbit) API > [!NOTE] -> Compatible with any other `--infohash` source in `hash1hash2...` binary format (see also the [Online API](https://github.com/YGGverse/aquatic-crawler/wiki/Online-API)) - -## Conception - -See the project [Wiki](https://github.com/YGGverse/aquatic-crawler/wiki) - -## Features - -> [!TIP] -> For details on all implemented features, see the [Options](#options) section - -* Info-hash versions - * [x] 1 - * [ ] 2 -* Import sources - * [x] IPv4 / IPv6 info-hash binary API (requires [PR#233](https://github.com/greatest-ape/aquatic/pull/233), [Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic)) - * [x] local file path - * [ ] remote URL -* Export options - * [x] Content (`--preload`) - * [x] data match the regex pattern (`--preload-regex`) - * [x] data match limits (see `--preload-*` options group) - * [x] Resolved `.torrent` files (`--export-torrents`) - * [x] RSS feed (`--export-rss`) includes resolved torrent meta and magnet links to download - * customize feed options with `--export-rss-*` options group - * [ ] [Gemtext](https://geminiprotocol.net/docs/gemtext.gmi) static files catalog - * [ ] [Manticore](https://github.com/manticoresoftware/manticoresearch-rust) full text search index - * [ ] SQLite database index +> * requires [PR#233](https://github.com/greatest-ape/aquatic/pull/233), see the [Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic) for more details +> * compatible with any other `--infohash` source in `hash1hash2...` binary format (see also the [Online API](https://github.com/YGGverse/aquatic-crawler/wiki/Online-API)) ## Install @@ -53,140 +27,12 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.bin\ --infohash /path/to/another-source.bin\ --tracker udp://host1:port\ --tracker udp://host2:port\ - --preload /path/to/directory\ - --enable-tcp + --preload /path/to/directory ``` +* append `RUST_LOG=debug` to debug ### Options ``` bash - -d, --debug - Print debug output - - --infohash - Absolute path(s) or URL(s) to import infohashes from the Aquatic tracker binary API - * PR#233 feature ([Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic)) - - --tracker - Define custom tracker(s) to preload the `.torrent` files info - - --initial-peer - Define initial peer(s) to preload the `.torrent` files info - - --export-torrents - Save resolved torrent files to given directory - - --export-rss - File path to export RSS feed - - --export-rss-title - Custom title for RSS feed (channel) - - [default: aquatic-crawler] - - --export-rss-link - Custom link for RSS feed (channel) - - --export-rss-description - Custom description for RSS feed (channel) - - --export-trackers - Appends `--tracker` value to magnets and torrents - - --enable-dht - Enable DHT resolver - - --enable-tcp - Enable TCP connection - - --bind - Bind resolver session on specified device name (`tun0`, `mycelium`, etc.) - - --listen - Bind listener on specified `host:port` (`[host]:port` for IPv6) - - * this option is useful only for binding the data exchange service, - to restrict the outgoing connections for torrent resolver, use `bind` option instead - - --listen-upnp - Enable UPnP forwarding - - --enable-upload - Enable upload (share bytes received with BitTorrent network) - - --preload - Directory path to store preloaded data (e.g. `.torrent` files) - - --preload-clear - Clear previous data collected on crawl session start - - --preload-regex - Preload only files match regex pattern (list only without preload by default) - * see also `preload_max_filesize`, `preload_max_filecount` options - - ## Example: - - Filter by image ext ``` --preload-regex '(png|gif|jpeg|jpg|webp)$' ``` - - * requires `storage` argument defined - - --preload-total-size - Stop crawler on total preload files size reached - - --preload-max-filesize - Max size sum of preloaded files per torrent (match `preload_regex`) - - --preload-max-filecount - Max count of preloaded files per torrent (match `preload_regex`) - - --proxy-url - Use `socks5://[username:password@]host:port` - - --peer-connect-timeout - - - --peer-read-write-timeout - - - --peer-keep-alive-interval - - - --index-capacity - Estimated info-hash index capacity - - [default: 1000] - - --index-list - Index torrent files - - --index-list-limit - Limit torrent files quantity to index - * insert the `...` placeholder as the last item, with total size left - - [default: 100] - - --index-timeout - Remove records from index older than `seconds` - - --add-torrent-timeout - Max time to handle each torrent - - [default: 10] - - --sleep - Crawl loop delay in seconds - - [default: 300] - - --upload-limit - Limit upload speed (b/s) - - --download-limit - Limit download speed (b/s) - - -h, --help - Print help (see a summary with '-h') - - -V, --version - Print version -``` \ No newline at end of file +aquatic-crawler --help +``` diff --git a/src/config.rs b/src/config.rs index b6ac645..684b6a1 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,12 +1,11 @@ use clap::Parser; +use regex::Regex; +use std::{net::SocketAddr, path::PathBuf}; +use url::Url; #[derive(Parser, Debug)] #[command(version, about, long_about = None)] pub struct Config { - /// Print debug output - #[arg(short, long, default_value_t = false)] - pub debug: bool, - /// Absolute path(s) or URL(s) to import infohashes from the Aquatic tracker binary API /// /// * PR#233 feature ([Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic)) @@ -15,31 +14,11 @@ pub struct Config { /// Define custom tracker(s) to preload the `.torrent` files info #[arg(long)] - pub tracker: Vec, + pub tracker: Vec, /// Define initial peer(s) to preload the `.torrent` files info #[arg(long)] - pub initial_peer: Vec, - - /// Save resolved torrent files to given directory - #[arg(long)] - pub export_torrents: Option, - - /// File path to export RSS feed - #[arg(long)] - pub export_rss: Option, - - /// Custom title for RSS feed (channel) - #[arg(long, default_value_t = String::from("aquatic-crawler"))] - pub export_rss_title: String, - - /// Custom link for RSS feed (channel) - #[arg(long)] - pub export_rss_link: Option, - - /// Custom description for RSS feed (channel) - #[arg(long)] - pub export_rss_description: Option, + pub initial_peer: Option>, /// Appends `--tracker` value to magnets and torrents #[arg(long, default_value_t = false)] @@ -49,9 +28,9 @@ pub struct Config { #[arg(long, default_value_t = false)] pub enable_dht: bool, - /// Enable TCP connection + /// Disable TCP connection #[arg(long, default_value_t = false)] - pub enable_tcp: bool, + pub disable_tcp: bool, /// Bind resolver session on specified device name (`tun0`, `mycelium`, etc.) #[arg(long)] @@ -74,11 +53,7 @@ pub struct Config { /// Directory path to store preloaded data (e.g. `.torrent` files) #[arg(long)] - pub preload: Option, - - /// Clear previous data collected on crawl session start - #[arg(long, default_value_t = false)] - pub preload_clear: bool, + pub preload: PathBuf, /// Preload only files match regex pattern (list only without preload by default) /// * see also `preload_max_filesize`, `preload_max_filecount` options @@ -92,11 +67,7 @@ pub struct Config { /// /// * requires `storage` argument defined #[arg(long)] - pub preload_regex: Option, - - /// Stop crawler on total preload files size reached - #[arg(long)] - pub preload_total_size: Option, + pub preload_regex: Option, /// Max size sum of preloaded files per torrent (match `preload_regex`) #[arg(long)] @@ -108,7 +79,7 @@ pub struct Config { /// Use `socks5://[username:password@]host:port` #[arg(long)] - pub proxy_url: Option, + pub proxy_url: Option, // Peer options #[arg(long)] diff --git a/src/format.rs b/src/format.rs deleted file mode 100644 index fa2d463..0000000 --- a/src/format.rs +++ /dev/null @@ -1,24 +0,0 @@ -pub trait Format { - /// Format bytes to KB/MB/GB presentation - fn bytes(self) -> String; -} - -impl Format for u64 { - fn bytes(self) -> String { - const KB: f32 = 1024.0; - const MB: f32 = KB * KB; - const GB: f32 = MB * KB; - - let f = self as f32; - - if f < KB { - format!("{self} B") - } else if f < MB { - format!("{:.2} KB", f / KB) - } else if f < GB { - format!("{:.2} MB", f / MB) - } else { - format!("{:.2} GB", f / GB) - } - } -} diff --git a/src/index.rs b/src/index.rs deleted file mode 100644 index 2969ccf..0000000 --- a/src/index.rs +++ /dev/null @@ -1,111 +0,0 @@ -mod value; - -use chrono::{Duration, Utc}; -use std::collections::HashMap; -use value::Value; - -/// Collect processed info hashes to skip on the next iterations (for this session) -/// * also contains optional meta info to export index as RSS or any other format -pub struct Index { - index: HashMap, - /// Removes outdated values from `index` on `Self::refresh` action - timeout: Option, - /// Track index changes to prevent extra disk write operations (safe SSD life) - /// * useful in the static RSS feed generation case, if enabled - is_changed: bool, - /// Store the index value in memory only when it is in use by the init options - has_name: bool, - has_size: bool, - has_list: bool, -} - -impl Index { - pub fn init( - capacity: usize, - timeout: Option, - has_name: bool, - has_size: bool, - has_list: bool, - ) -> Self { - Self { - index: HashMap::with_capacity(capacity), - timeout: timeout.map(Duration::seconds), - has_size, - has_name, - has_list, - is_changed: false, - } - } - - pub fn has(&self, infohash: &str) -> bool { - self.index.contains_key(infohash) - } - - pub fn is_changed(&self) -> bool { - self.is_changed - } - - pub fn list(&self) -> &HashMap { - &self.index - } - - pub fn len(&self) -> usize { - self.index.len() - } - - pub fn nodes(&self) -> u64 { - self.index.values().map(|i| i.node).sum::() - } - - pub fn insert( - &mut self, - infohash: String, - node: u64, - size: u64, - list: Option, u64)>>, - name: Option, - ) { - if self - .index - .insert( - infohash, - Value::new( - node, - if self.has_size { Some(size) } else { None }, - if self.has_name { name } else { None }, - if self.has_list { list } else { None }, - ), - ) - .is_none() - { - self.is_changed = true - } - } - - pub fn refresh(&mut self) { - if let Some(timeout) = self.timeout { - let t = Utc::now(); - self.index.retain(|_, v| t - v.time <= timeout) - } - self.is_changed = false - } -} - -#[test] -fn test() { - use std::{thread::sleep, time::Duration}; - - // test values auto-clean by timeout - let mut i = Index::init(2, Some(3), false, false, false); - - i.insert("h1".to_string(), 0, 0, None, None); - sleep(Duration::from_secs(1)); - i.insert("h2".to_string(), 0, 0, None, None); - - i.refresh(); - assert_eq!(i.len(), 2); - - sleep(Duration::from_secs(2)); - i.refresh(); - assert_eq!(i.len(), 1) -} diff --git a/src/index/value.rs b/src/index/value.rs deleted file mode 100644 index e10c306..0000000 --- a/src/index/value.rs +++ /dev/null @@ -1,54 +0,0 @@ -use chrono::{DateTime, Utc}; -use voca_rs::Voca; - -/// The `Index` value -pub struct Value { - pub time: DateTime, - pub node: u64, - // Isolate by applying internal filter on value set - size: Option, - name: Option, - list: Option, u64)>>, -} - -impl Value { - /// Create new `Self` with current timestamp - pub fn new( - node: u64, - size: Option, - name: Option, - list: Option, u64)>>, - ) -> Self { - Self { - time: Utc::now(), - node, - size, - list: list.map(|f| f.into_iter().map(|(n, l)| (filter(n), l)).collect()), - name: filter(name), - } - } - /// Get reference to the safely constructed `name` member - pub fn name(&self) -> Option<&String> { - self.name.as_ref() - } - /// Get reference to the safely constructed files `list` member - pub fn list(&self) -> Option<&Vec<(Option, u64)>> { - self.list.as_ref() - } - /// Get reference to the safely constructed `length` member - pub fn size(&self) -> Option { - self.size - } -} - -/// Strip tags and bom chars, crop long strings (prevents memory pool overload) -fn filter(value: Option) -> Option { - value.map(|v| { - const C: usize = 125; // + 3 for `...` offset, 128 chars max @TODO optional - let s = v._strip_bom()._strip_tags(); - if s.chars().count() > C { - return format!("{}...", s.chars().take(C).collect::()); - } - s - }) -} diff --git a/src/main.rs b/src/main.rs index 4c0e9d3..97d69d4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,25 +1,15 @@ mod api; mod config; -mod format; -mod index; -mod peers; mod preload; -mod rss; -mod torrent; -mod trackers; use anyhow::Result; use config::Config; -use index::Index; use librqbit::{ - AddTorrent, AddTorrentOptions, AddTorrentResponse, ByteBufOwned, ConnectionOptions, - ListenerOptions, PeerConnectionOptions, SessionOptions, ValidatedTorrentMetaV1Info, + AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions, ListenerOptions, + PeerConnectionOptions, SessionOptions, }; -use peers::Peers; -use rss::Rss; -use std::{collections::HashSet, num::NonZero, path::PathBuf, str::FromStr, time::Duration}; -use torrent::Torrent; -use trackers::Trackers; +use preload::Preload; +use std::{collections::HashSet, num::NonZero, str::FromStr, time::Duration}; use url::Url; #[tokio::main] @@ -27,29 +17,21 @@ async fn main() -> Result<()> { use chrono::Local; use clap::Parser; use tokio::time; - + // init debug + if std::env::var("RUST_LOG").is_ok() { + tracing_subscriber::fmt::init() + } // librqbit // init components let time_init = Local::now(); let config = Config::parse(); - if std::env::var("RUST_LOG").is_ok() { - tracing_subscriber::fmt::init() - } - let peers = Peers::init(&config.initial_peer)?; - let preload = preload::init( + let preload = Preload::init( config.preload, config.preload_regex, config.preload_max_filecount, config.preload_max_filesize, - config.preload_total_size, - config.preload_clear, )?; - let trackers = Trackers::init(&config.tracker)?; - let torrent = config.export_torrents.map(|p| Torrent::init(&p).unwrap()); let session = librqbit::Session::new_with_opts( - match preload { - Some(ref p) => p.path(), - None => PathBuf::new(), - }, + preload.root().clone(), SessionOptions { bind_device_name: config.bind, listen: match config.listen { @@ -66,8 +48,8 @@ async fn main() -> Result<()> { } }, connect: Some(ConnectionOptions { - enable_tcp: config.enable_tcp, - proxy_url: config.proxy_url, + enable_tcp: !config.disable_tcp, + proxy_url: config.proxy_url.map(|u| u.to_string()), peer_opts: Some(PeerConnectionOptions { connect_timeout: config.peer_connect_timeout.map(Duration::from_secs), read_write_timeout: config.peer_read_write_timeout.map(Duration::from_secs), @@ -82,54 +64,35 @@ async fn main() -> Result<()> { upload_bps: config.upload_limit.and_then(NonZero::new), download_bps: config.download_limit.and_then(NonZero::new), }, - trackers: trackers.list().clone(), + trackers: config.tracker.iter().cloned().collect(), ..SessionOptions::default() }, ) .await?; - - // begin - println!("Crawler started on {time_init}"); - let mut index = Index::init( - config.index_capacity, - config.index_timeout, - config.export_rss.is_some(), - config.export_rss.is_some(), - config.export_rss.is_some() && config.index_list, - ); + log::info!("Crawler started on {time_init}"); loop { let time_queue = Local::now(); - if config.debug { - println!("\tQueue crawl begin on {time_queue}...") - } - index.refresh(); + log::debug!("Queue crawl begin on {time_queue}..."); for source in &config.infohash { - if config.debug { - println!("\tIndex source `{source}`...") - } + log::debug!("Index source `{source}`..."); // grab latest info-hashes from this source // * aquatic server may update the stats at this moment, handle result manually for i in match api::get(source, config.index_capacity) { Some(i) => i, None => { // skip without panic - if config.debug { - eprintln!( - "The feed `{source}` has an incomplete format (or is still updating); skip." - ) - } + log::warn!( + "The feed `{source}` has an incomplete format (or is still updating); skip." + ); continue; } } { // convert to string once let i = i.to_string(); - // is already indexed? - if index.has(&i) { + if preload.contains_torrent(&i)? { continue; } - if config.debug { - println!("\t\tIndex `{i}`...") - } + log::debug!("Index `{i}`..."); // run the crawler in single thread for performance reasons, // use `timeout` argument option to skip the dead connections. match time::timeout( @@ -137,18 +100,18 @@ async fn main() -> Result<()> { session.add_torrent( AddTorrent::from_url(magnet( &i, - if config.export_trackers && !trackers.is_empty() { - Some(trackers.list()) - } else { + if config.tracker.is_empty() { None + } else { + Some(config.tracker.as_ref()) }, )), Some(AddTorrentOptions { paused: true, // continue after `only_files` init overwrite: true, - disable_trackers: trackers.is_empty(), - initial_peers: peers.initial_peers(), - list_only: preload.as_ref().is_none_or(|p| p.regex.is_none()), + disable_trackers: config.tracker.is_empty(), + initial_peers: config.initial_peer.clone(), + list_only: false, // it is important to blacklist all files preload until initiation only_files: Some(Vec::with_capacity( config.preload_max_filecount.unwrap_or_default(), @@ -156,8 +119,9 @@ async fn main() -> Result<()> { // the destination folder to preload files match `only_files_regex` // * e.g. images for audio albums output_folder: preload - .as_ref() - .map(|p| p.output_folder(&i, true).unwrap()), + .output_folder(&i)? + .to_str() + .map(|s| s.to_string()), ..Default::default() }), ), @@ -167,183 +131,79 @@ async fn main() -> Result<()> { Ok(r) => match r { // on `preload_regex` case only Ok(AddTorrentResponse::Added(id, mt)) => { - let mut only_files_size = 0; - let mut only_files_keep = Vec::with_capacity( + let mut keep_files = HashSet::with_capacity( config.preload_max_filecount.unwrap_or_default(), ); let mut only_files = HashSet::with_capacity( config.preload_max_filecount.unwrap_or_default(), ); mt.wait_until_initialized().await?; - let (name, size, list) = mt.with_metadata(|m| { - // init preload files list - if let Some(ref p) = preload { - for (id, info) in m.file_infos.iter().enumerate() { - if p.matches(info.relative_filename.to_str().unwrap()) { - if p.max_filesize.is_some_and(|limit| { - only_files_size + info.len > limit - }) { - if config.debug { - println!( - "\t\t\ttotal files size limit `{i}` reached!" - ) - } - break; - } - if p.max_filecount - .is_some_and(|limit| only_files.len() + 1 > limit) - { - if config.debug { - println!( - "\t\t\ttotal files count limit for `{i}` reached!" - ) - } - break; - } - only_files_size += info.len; - if let Some(ref p) = preload { - only_files_keep - .push(p.absolute(&i, &info.relative_filename)) - } - only_files.insert(id); - } + let bytes = mt.with_metadata(|m| { + for (id, info) in m.file_infos.iter().enumerate() { + if preload + .max_filecount + .is_some_and(|limit| only_files.len() + 1 > limit) + { + log::debug!( + "file count limit reached, skip `{id}` for `{i}`" + ); + break; } + if preload.max_filesize.is_some_and(|limit| info.len > limit) { + log::debug!( + "file size limit reached, skip `{id}` for `{i}`" + ); + continue; + } + if preload.regex.as_ref().is_some_and(|r| { + !r.is_match(&info.relative_filename.to_string_lossy()) + }) { + log::debug!("regex filter, skip `{id}` for `{i}`"); + continue; + } + assert!(keep_files.insert(info.relative_filename.clone())); + assert!(only_files.insert(id)); } - if let Some(ref t) = torrent { - save_torrent_file(t, &i, &m.torrent_bytes, config.debug) - } - - ( - m.info.name().as_ref().map(|n| n.to_string()), - size(&m.info), - list(&m.info, config.index_list_limit), - ) + m.torrent_bytes.to_vec() })?; session.update_only_files(&mt, &only_files).await?; session.unpause(&mt).await?; // await for `preload_regex` files download to continue mt.wait_until_completed().await?; + // cleanup irrelevant files (see rqbit#408) + preload.cleanup(&i, Some(keep_files))?; + preload.persist_torrent_bytes(&i, &bytes)?; // remove torrent from session as indexed session .delete(librqbit::api::TorrentIdOrHash::Id(id), false) .await?; - // cleanup irrelevant files (see rqbit#408) - if let Some(p) = &preload { - p.cleanup(&i, Some(only_files_keep))? - } - - if config.debug { - println!("\t\t\tadd `{i}` to index.") - } - - index.insert( - i, - only_files_size, - size, - list, - name.map(|n| n.to_string()), - ) + log::debug!("torrent `{i}` indexed.") } - Ok(AddTorrentResponse::ListOnly(r)) => { - if let Some(ref t) = torrent { - save_torrent_file(t, &i, &r.torrent_bytes, config.debug) - } - - // @TODO - // use `r.info` for Memory, SQLite, - // Manticore and other alternative storage type - - if config.debug { - println!("\t\t\tadd `{i}` to index.") - } - - index.insert( - i, - 0, - size(&r.info), - list(&r.info, config.index_list_limit), - r.info.name().map(|n| n.to_string()), - ) - } - // unexpected as should be deleted - Ok(AddTorrentResponse::AlreadyManaged(..)) => panic!(), - Err(e) => eprintln!("Failed to resolve `{i}`: `{e}`."), + Ok(_) => panic!(), + Err(e) => log::debug!("Failed to resolve `{i}`: `{e}`."), }, - Err(e) => { - if config.debug { - println!("\t\t\tfailed to resolve `{i}`: `{e}`") - } - } + Err(e) => log::debug!("failed to resolve `{i}`: `{e}`"), } } } - - if let Some(ref export_rss) = config.export_rss - && index.is_changed() - { - let mut rss = Rss::new( - export_rss, - &config.export_rss_title, - &config.export_rss_link, - &config.export_rss_description, - if config.export_trackers && !trackers.is_empty() { - Some(trackers.list().clone()) - } else { - None - }, - )?; - for (k, v) in index.list() { - rss.push( - k, - v.name().unwrap_or(k), - rss::item_description(v.size(), v.list()), - Some(&v.time.to_rfc2822()), - )? - } - rss.commit()? - } - if preload - .as_ref() - .is_some_and(|p| p.total_size.is_some_and(|s| index.nodes() > s)) - { - panic!("Preload content size {} bytes reached!", 0) - } - if config.debug { - println!( - "Queue completed on {time_queue}\n\ttotal: {}\n\ttime: {} s\n\tuptime: {} s\n\tawait {} seconds to continue...", - index.len(), - Local::now() - .signed_duration_since(time_queue) - .as_seconds_f32(), - Local::now() - .signed_duration_since(time_init) - .as_seconds_f32(), - config.sleep, - ) - } + log::debug!( + "Queue completed at {time_queue} (time: {} / uptime: {}) await {} seconds to continue...", + Local::now() + .signed_duration_since(time_queue) + .as_seconds_f32(), + Local::now() + .signed_duration_since(time_init) + .as_seconds_f32(), + config.sleep, + ); std::thread::sleep(Duration::from_secs(config.sleep)) } } -/// Shared handler function to save resolved torrents as file -fn save_torrent_file(t: &Torrent, i: &str, b: &[u8], d: bool) { - match t.persist(i, b) { - Ok(r) => { - if d { - match r { - Some(p) => println!("\t\t\tadd torrent file `{}`", p.to_string_lossy()), - None => println!("\t\t\ttorrent file `{i}` already exists"), - } - } - } - Err(e) => eprintln!("Error on save torrent file `{i}`: `{e}`"), - } -} - /// Build magnet URI -fn magnet(infohash: &str, trackers: Option<&HashSet>) -> String { - let mut m = if infohash.len() == 40 { - format!("magnet:?xt=urn:btih:{infohash}") +fn magnet(info_hash: &str, trackers: Option<&Vec>) -> String { + let mut m = if info_hash.len() == 40 { + format!("magnet:?xt=urn:btih:{info_hash}") } else { todo!("infohash v2 is not supported by librqbit") }; @@ -355,62 +215,3 @@ fn magnet(infohash: &str, trackers: Option<&HashSet>) -> String { } m } - -/// Count total size, including torrent files -fn size(meta: &ValidatedTorrentMetaV1Info) -> u64 { - let mut t = 0; - if let Some(l) = meta.info().length { - t += l - } - if let Some(ref files) = meta.info().files { - for f in files { - t += f.length - } - } - t -} - -fn list( - meta: &ValidatedTorrentMetaV1Info, - limit: usize, -) -> Option, u64)>> { - meta.info().files.as_ref().map(|files| { - let mut b = Vec::with_capacity(files.len()); - let mut i = files.iter(); - let mut t = 0; - for f in i.by_ref() { - if t < limit { - t += 1; - b.push(( - String::from_utf8( - f.path - .iter() - .enumerate() - .flat_map(|(n, b)| { - if n == 0 { - b.0.to_vec() - } else { - let mut p = vec![b'/']; - p.extend(b.0.to_vec()); - p - } - }) - .collect(), - ) - .ok(), - f.length, - )); - continue; - } - // limit reached: count sizes left and use placeholder as the last item name - let mut l = 0; - for f in i.by_ref() { - l += f.length - } - b.push((Some("...".to_string()), l)); - break; - } - b[..t].sort_by(|a, b| a.0.cmp(&b.0)); // @TODO optional - b - }) -} diff --git a/src/peers.rs b/src/peers.rs deleted file mode 100644 index f43da62..0000000 --- a/src/peers.rs +++ /dev/null @@ -1,21 +0,0 @@ -use std::{net::SocketAddr, str::FromStr}; - -pub struct Peers(Vec); - -impl Peers { - pub fn init(peers: &Vec) -> anyhow::Result { - let mut p = Vec::with_capacity(peers.len()); - for peer in peers { - p.push(SocketAddr::from_str(peer)?); - } - Ok(Self(p)) - } - - pub fn initial_peers(&self) -> Option> { - if self.0.is_empty() { - None - } else { - Some(self.0.clone()) - } - } -} diff --git a/src/preload.rs b/src/preload.rs index 3d9e213..439d453 100644 --- a/src/preload.rs +++ b/src/preload.rs @@ -1,123 +1,90 @@ use anyhow::{Result, bail}; use regex::Regex; -use std::{fs, path::PathBuf, str::FromStr}; +use std::{collections::HashSet, fs, path::PathBuf}; pub struct Preload { - path: PathBuf, + root: PathBuf, pub max_filecount: Option, pub max_filesize: Option, - pub total_size: Option, pub regex: Option, } impl Preload { - fn init( - directory: &str, - regex: Option, + // Constructors + + pub fn init( + root: PathBuf, + regex: Option, max_filecount: Option, max_filesize: Option, - total_size: Option, - is_clear: bool, ) -> Result { - let path = PathBuf::from_str(directory)?; - if let Ok(t) = fs::metadata(&path) { - if t.is_file() { - bail!("Storage destination is not directory!"); - } - if t.is_dir() && is_clear { - for i in fs::read_dir(&path)? { - let r = i?.path(); - if r.is_dir() { - fs::remove_dir_all(&r)?; - } else { - fs::remove_file(&r)?; - } - } - } + if !root.is_dir() { + bail!("Preload root is not directory") } - fs::create_dir_all(&path)?; Ok(Self { max_filecount, max_filesize, - path, - regex: regex.map(|r| Regex::new(&r).unwrap()), - total_size, + regex, + root: root.canonicalize()?, }) } - pub fn output_folder(&self, infohash: &str, create: bool) -> Result { - let mut p = PathBuf::new(); - p.push(&self.path); - p.push(infohash); - if p.is_file() { - bail!("File destination is not directory!"); - } - if create { - fs::create_dir_all(&p)?; - } - if !p.is_dir() { - bail!("Destination directory not exists!") - } - Ok(p.to_string_lossy().to_string()) - } - - pub fn absolute(&self, infohash: &str, file: &PathBuf) -> PathBuf { - let mut p = PathBuf::new(); - p.push(&self.path); - p.push(infohash); - p.push(file); - p - } + // Actions /// Recursively remove all files under the `infohash` location (see rqbit#408) - pub fn cleanup(&self, infohash: &str, keep_filenames: Option>) -> Result<()> { - for e in walkdir::WalkDir::new(self.output_folder(infohash, false)?) { + pub fn cleanup(&self, info_hash: &str, keep_filenames: Option>) -> Result<()> { + for e in walkdir::WalkDir::new(self.output_folder(info_hash)?) { let e = e?; let p = e.into_path(); if p.is_file() && keep_filenames.as_ref().is_none_or(|k| !k.contains(&p)) { fs::remove_file(p)?; } - } + } // remove empty directories @TODO Ok(()) } - pub fn path(&self) -> PathBuf { - self.path.clone() + pub fn persist_torrent_bytes(&self, info_hash: &str, contents: &[u8]) -> Result { + let p = self.torrent(info_hash)?; + fs::write(&p, contents)?; + Ok(p) } - pub fn matches(&self, pattern: &str) -> bool { - self.regex.as_ref().is_some_and(|r| r.is_match(pattern)) - } -} + // Getters -/// Init `Preload` with validate related argument options -pub fn init( - path: Option, - regex: Option, - max_filecount: Option, - max_filesize: Option, - total_size: Option, - is_clear: bool, -) -> Result> { - Ok(match path { - Some(ref p) => Some(Preload::init( - p, - regex, - max_filecount, - max_filesize, - total_size, - is_clear, - )?), - None => { - if regex.is_some() - || max_filecount.is_some() - || max_filesize.is_some() - || total_size.is_some() - || is_clear - { - bail!("`--preload` directory is required for this configuration!") - } - None + /// * creates new directory if not exists + pub fn output_folder(&self, info_hash: &str) -> Result { + if !is_info_hash(info_hash) { + bail!("Invalid info-hash `{info_hash}`") } - }) + let mut p = PathBuf::from(&self.root); + p.push(info_hash); + if p.is_file() { + bail!("Output directory for info-hash `{info_hash}` is file") + } + if !p.exists() { + fs::create_dir(&p)? + } + Ok(p) + } + + pub fn root(&self) -> &PathBuf { + &self.root + } + + pub fn contains_torrent(&self, info_hash: &str) -> Result { + Ok(fs::exists(self.torrent(info_hash)?)?) + } + + fn torrent(&self, info_hash: &str) -> Result { + if !is_info_hash(info_hash) { + bail!("Invalid info-hash `{info_hash}`") + } + let mut p = PathBuf::from(&self.root); + p.push(format!("{info_hash}.torrent")); + Ok(p) + } +} + +fn is_info_hash(value: &str) -> bool { + value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit()) } diff --git a/src/rss.rs b/src/rss.rs deleted file mode 100644 index 56dec6d..0000000 --- a/src/rss.rs +++ /dev/null @@ -1,142 +0,0 @@ -use anyhow::{Result, bail}; -use std::{collections::HashSet, fs::File, io::Write, path::PathBuf, str::FromStr}; -use url::Url; - -/// Export crawl index to the RSS file -pub struct Rss { - /// Resulting (public) file in the XML format - file: File, - /// Shared directory for the feed `file` and its `tmp` buffer file - target: PathBuf, - /// Creates temporary file to exclude feed format damage on update - tmp: PathBuf, - /// Trackers source for every item in channel - trackers: Option>, -} - -impl Rss { - /// Create writable file for given `filepath` - pub fn new( - filepath: &str, - title: &str, - link: &Option, - description: &Option, - trackers: Option>, - ) -> Result { - // prevent from reading of the incomplete file - let tmp = PathBuf::from_str(&format!("{filepath}.tmp"))?; - - // init public destination - let target = PathBuf::from_str(filepath)?; - if target.is_dir() { - bail!("RSS path `{}` is directory", target.to_string_lossy()) - } - // init temporary file to write - let mut file = File::create(&tmp)?; - - file.write_all( - b"", - )?; - - let t = chrono::Utc::now().to_rfc2822(); - file.write_all(b"")?; - file.write_all(t.as_bytes())?; - file.write_all(b"")?; - file.write_all(b"")?; - file.write_all(t.as_bytes())?; - file.write_all(b"")?; - - file.write_all(b"")?; - file.write_all(escape(title).as_bytes())?; - file.write_all(b"")?; - - if let Some(s) = description { - file.write_all(b"")?; - file.write_all(escape(s).as_bytes())?; - file.write_all(b"")? - } - - if let Some(s) = link { - file.write_all(b"")?; - file.write_all(escape(s).as_bytes())?; - file.write_all(b"")? - } - - Ok(Self { - file, - target, - trackers, - tmp, - }) - } - - /// Append `item` to the feed `channel` - pub fn push( - &mut self, - infohash: &str, - title: &str, - description: Option, - pub_date: Option<&str>, - ) -> Result<()> { - self.file.write_all( - format!( - "{infohash}{}{}", - escape(title), - escape(&crate::magnet(infohash, self.trackers.as_ref())) - ) - .as_bytes(), - )?; - if let Some(s) = description { - self.file.write_all(b"")?; - self.file.write_all(escape(&s).as_bytes())?; - self.file.write_all(b"")? - } - if let Some(s) = pub_date { - self.file.write_all(b"")?; - self.file.write_all(escape(s).as_bytes())?; - self.file.write_all(b"")? - } - self.file.write_all(b"")?; - Ok(()) - } - - /// Write final bytes, replace public file with temporary one - pub fn commit(mut self) -> Result<()> { - self.file.write_all(b"")?; - std::fs::rename(self.tmp, self.target)?; - Ok(()) - } -} - -pub fn item_description( - size: Option, - list: Option<&Vec<(Option, u64)>>, -) -> Option { - use crate::format::Format; - if size.is_none() && list.is_none() { - return None; - } - let mut b = Vec::with_capacity(list.map(|l| l.len()).unwrap_or_default() + 1); - if let Some(s) = size { - b.push(s.bytes()) - } - if let Some(l) = list { - for (path, size) in l { - b.push(format!( - "{} ({})", - path.as_deref().unwrap_or("?"), // @TODO invalid encoding - size.bytes() - )) - } - } - Some(b.join("\n")) -} - -fn escape(subject: &str) -> String { - subject - .replace('&', "&") - .replace('<', "<") - .replace('>', ">") - .replace('"', """) - .replace("'", "'") -} diff --git a/src/torrent.rs b/src/torrent.rs deleted file mode 100644 index fefabb8..0000000 --- a/src/torrent.rs +++ /dev/null @@ -1,32 +0,0 @@ -use anyhow::Result; -use std::{fs, io::Write, path::PathBuf, str::FromStr}; - -pub struct Torrent { - storage: PathBuf, -} - -impl Torrent { - pub fn init(path: &str) -> Result { - Ok(Self { - storage: PathBuf::from_str(path)?.canonicalize()?, - }) - } - - pub fn persist(&self, infohash: &str, data: &[u8]) -> Result> { - Ok(if self.path(infohash).exists() { - None - } else { - let p = self.path(infohash); - let mut f = fs::File::create(&p)?; - f.write_all(data)?; - Some(p) - }) - } - - fn path(&self, infohash: &str) -> PathBuf { - let mut p = PathBuf::new(); - p.push(&self.storage); - p.push(format!("{infohash}.torrent")); - p - } -} diff --git a/src/trackers.rs b/src/trackers.rs deleted file mode 100644 index d01c1e2..0000000 --- a/src/trackers.rs +++ /dev/null @@ -1,20 +0,0 @@ -use std::{collections::HashSet, str::FromStr}; -use url::Url; - -pub struct Trackers(HashSet); - -impl Trackers { - pub fn init(trackers: &Vec) -> anyhow::Result { - let mut t = HashSet::with_capacity(trackers.len()); - for tracker in trackers { - t.insert(Url::from_str(tracker)?); - } - Ok(Self(t)) - } - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } - pub fn list(&self) -> &HashSet { - &self.0 - } -}