From 4f39dc3d0ab6f52fd96c155f82d8892be9bd8ee7 Mon Sep 17 00:00:00 2001 From: yggverse Date: Mon, 16 Jun 2025 01:59:22 +0300 Subject: [PATCH] implement `preload_max_filesize`, `preload_max_filecount` options --- README.md | 8 ++++++- src/argument.rs | 11 ++++++++- src/main.rs | 59 ++++++++++++++++++++++++++++++++++++++++--------- src/storage.rs | 16 ++++++++++---- 4 files changed, 78 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index ee3e903..b56793d 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\ Enable upload --preload-regex - Preload files match regex pattern (list only without preload by default) + Preload only files match regex pattern (list only without preload by default) * see also `preload_max_filesize`, `preload_max_filecount` options ## Example: @@ -94,6 +94,12 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\ * requires `storage` argument defined +--preload-max-filesize + Max size sum of preloaded files per torrent (match `preload_regex`) + +--preload-max-filecount + Max count of preloaded files per torrent (match `preload_regex`) + --save-torrents Save resolved torrent files to the `storage` location diff --git a/src/argument.rs b/src/argument.rs index a6a344d..55eec13 100644 --- a/src/argument.rs +++ b/src/argument.rs @@ -49,7 +49,8 @@ pub struct Argument { #[arg(long, default_value_t = false)] pub enable_upload: bool, - /// Preload files match regex pattern (list only without preload by default) + /// Preload only files match regex pattern (list only without preload by default) + /// * see also `preload_max_filesize`, `preload_max_filecount` options /// /// ## Example: /// @@ -62,6 +63,14 @@ pub struct Argument { #[arg(long)] pub preload_regex: Option, + /// Max size sum of preloaded files per torrent (match `preload_regex`) + #[arg(long)] + pub preload_max_filesize: Option, + + /// Max count of preloaded files per torrent (match `preload_regex`) + #[arg(long)] + pub preload_max_filecount: Option, + /// Save resolved torrent files to the `storage` location #[arg(long, default_value_t = true)] pub save_torrents: bool, diff --git a/src/main.rs b/src/main.rs index 54a76a7..99f61e5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -78,14 +78,16 @@ async fn main() -> Result<()> { session.add_torrent( AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")), Some(AddTorrentOptions { + paused: true, // continue after `only_files` init overwrite: true, disable_trackers: trackers.is_empty(), initial_peers: peers.initial_peers(), list_only: preload_regex.is_none(), + // it is important to blacklist all files preload until initiation + only_files: Some(Vec::new()), // the destination folder to preload files match `only_files_regex` // * e.g. images for audio albums output_folder: storage.output_folder(&i, true).ok(), - only_files_regex: preload_regex.as_ref().map(|r| r.to_string()), ..Default::default() }), ), @@ -95,18 +97,57 @@ async fn main() -> Result<()> { Ok(r) => match r { // on `preload_regex` case only Ok(AddTorrentResponse::Added(id, mt)) => { - if arg.save_torrents { - mt.with_metadata(|m| { + let mut only_files_size = 0; + let mut only_files_save = HashSet::with_capacity( + arg.preload_max_filecount.unwrap_or_default(), + ); + let mut only_files = HashSet::with_capacity( + arg.preload_max_filecount.unwrap_or_default(), + ); + mt.wait_until_initialized().await?; + mt.with_metadata(|m| { + // init preload files list + if let Some(ref regex) = preload_regex { + for (id, info) in m.file_infos.iter().enumerate() { + if regex.is_match( + info.relative_filename.to_str().unwrap(), + ) { + if arg.preload_max_filesize.is_some_and( + |limit| only_files_size + info.len > limit, + ) { + debug.info(&format!( + "Total files size limit `{i}` reached!" + )); + break; + } + if arg.preload_max_filecount.is_some_and( + |limit| only_files.len() + 1 > limit, + ) { + debug.info(&format!( + "Total files count limit for `{i}` reached!" + )); + break; + } + only_files_size += info.len; + only_files_save.insert(storage.absolute(&i, &info.relative_filename)); + only_files.insert(id); + } + } + } + // dump info-hash to the torrent file + if arg.save_torrents { save_torrent_file( &storage, &debug, &i, &m.torrent_bytes, ) - // @TODO - // use `r.info` for Memory, SQLite, Manticore and other alternative storage type - })?; - } + } + // @TODO + // use `r.info` for Memory, SQLite, Manticore and other alternative storage type + })?; + session.update_only_files(&mt, &only_files).await?; + session.unpause(&mt).await?; // await for `preload_regex` files download to continue match time::timeout( Duration::from_secs(arg.download_torrent_timeout), @@ -126,9 +167,7 @@ async fn main() -> Result<()> { ) .await?; // cleanup irrelevant files (see rqbit#408) - if let Some(r) = preload_regex.as_ref() { - storage.cleanup(&i, Some(r))?; - } + storage.cleanup(&i, Some(only_files_save))?; // ignore on the next crawl iterations for this session index.insert(i); } diff --git a/src/storage.rs b/src/storage.rs index 2929ff5..65c892c 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -1,5 +1,5 @@ use anyhow::{Result, bail}; -use std::{fs, io::Write, path::PathBuf, str::FromStr}; +use std::{collections::HashSet, fs, io::Write, path::PathBuf, str::FromStr}; pub struct Storage(PathBuf); @@ -53,12 +53,20 @@ impl Storage { Ok(p.to_string_lossy().to_string()) } + pub fn absolute(&self, infohash: &str, file: &PathBuf) -> PathBuf { + let mut p = PathBuf::new(); + p.push(&self.0); + p.push(infohash); + p.push(file); + p + } + /// Recursively remove all files under the `infohash` location (see rqbit#408) - pub fn cleanup(&self, infohash: &str, skip_filename: Option<®ex::Regex>) -> Result<()> { + pub fn cleanup(&self, infohash: &str, keep_filenames: Option>) -> Result<()> { for e in walkdir::WalkDir::new(self.output_folder(infohash, false)?) { let e = e?; - let p = e.path(); - if p.is_file() && skip_filename.is_none_or(|r| !r.is_match(p.to_str().unwrap())) { + let p = e.into_path(); + if p.is_file() && keep_filenames.as_ref().is_none_or(|k| !k.contains(&p)) { fs::remove_file(p)?; } }