implement preload_max_filesize, preload_max_filecount options

This commit is contained in:
yggverse 2025-06-16 01:59:22 +03:00
parent 7de77b8575
commit 4f39dc3d0a
4 changed files with 78 additions and 16 deletions

View file

@ -86,7 +86,7 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
Enable upload Enable upload
--preload-regex <PRELOAD_REGEX> --preload-regex <PRELOAD_REGEX>
Preload files match regex pattern (list only without preload by default) Preload only files match regex pattern (list only without preload by default) * see also `preload_max_filesize`, `preload_max_filecount` options
## Example: ## Example:
@ -94,6 +94,12 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
* requires `storage` argument defined * requires `storage` argument defined
--preload-max-filesize <PRELOAD_MAX_FILESIZE>
Max size sum of preloaded files per torrent (match `preload_regex`)
--preload-max-filecount <PRELOAD_MAX_FILECOUNT>
Max count of preloaded files per torrent (match `preload_regex`)
--save-torrents --save-torrents
Save resolved torrent files to the `storage` location Save resolved torrent files to the `storage` location

View file

@ -49,7 +49,8 @@ pub struct Argument {
#[arg(long, default_value_t = false)] #[arg(long, default_value_t = false)]
pub enable_upload: bool, pub enable_upload: bool,
/// Preload files match regex pattern (list only without preload by default) /// Preload only files match regex pattern (list only without preload by default)
/// * see also `preload_max_filesize`, `preload_max_filecount` options
/// ///
/// ## Example: /// ## Example:
/// ///
@ -62,6 +63,14 @@ pub struct Argument {
#[arg(long)] #[arg(long)]
pub preload_regex: Option<String>, pub preload_regex: Option<String>,
/// Max size sum of preloaded files per torrent (match `preload_regex`)
#[arg(long)]
pub preload_max_filesize: Option<u64>,
/// Max count of preloaded files per torrent (match `preload_regex`)
#[arg(long)]
pub preload_max_filecount: Option<usize>,
/// Save resolved torrent files to the `storage` location /// Save resolved torrent files to the `storage` location
#[arg(long, default_value_t = true)] #[arg(long, default_value_t = true)]
pub save_torrents: bool, pub save_torrents: bool,

View file

@ -78,14 +78,16 @@ async fn main() -> Result<()> {
session.add_torrent( session.add_torrent(
AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")), AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")),
Some(AddTorrentOptions { Some(AddTorrentOptions {
paused: true, // continue after `only_files` init
overwrite: true, overwrite: true,
disable_trackers: trackers.is_empty(), disable_trackers: trackers.is_empty(),
initial_peers: peers.initial_peers(), initial_peers: peers.initial_peers(),
list_only: preload_regex.is_none(), list_only: preload_regex.is_none(),
// it is important to blacklist all files preload until initiation
only_files: Some(Vec::new()),
// the destination folder to preload files match `only_files_regex` // the destination folder to preload files match `only_files_regex`
// * e.g. images for audio albums // * e.g. images for audio albums
output_folder: storage.output_folder(&i, true).ok(), output_folder: storage.output_folder(&i, true).ok(),
only_files_regex: preload_regex.as_ref().map(|r| r.to_string()),
..Default::default() ..Default::default()
}), }),
), ),
@ -95,18 +97,57 @@ async fn main() -> Result<()> {
Ok(r) => match r { Ok(r) => match r {
// on `preload_regex` case only // on `preload_regex` case only
Ok(AddTorrentResponse::Added(id, mt)) => { Ok(AddTorrentResponse::Added(id, mt)) => {
if arg.save_torrents { let mut only_files_size = 0;
mt.with_metadata(|m| { let mut only_files_save = HashSet::with_capacity(
arg.preload_max_filecount.unwrap_or_default(),
);
let mut only_files = HashSet::with_capacity(
arg.preload_max_filecount.unwrap_or_default(),
);
mt.wait_until_initialized().await?;
mt.with_metadata(|m| {
// init preload files list
if let Some(ref regex) = preload_regex {
for (id, info) in m.file_infos.iter().enumerate() {
if regex.is_match(
info.relative_filename.to_str().unwrap(),
) {
if arg.preload_max_filesize.is_some_and(
|limit| only_files_size + info.len > limit,
) {
debug.info(&format!(
"Total files size limit `{i}` reached!"
));
break;
}
if arg.preload_max_filecount.is_some_and(
|limit| only_files.len() + 1 > limit,
) {
debug.info(&format!(
"Total files count limit for `{i}` reached!"
));
break;
}
only_files_size += info.len;
only_files_save.insert(storage.absolute(&i, &info.relative_filename));
only_files.insert(id);
}
}
}
// dump info-hash to the torrent file
if arg.save_torrents {
save_torrent_file( save_torrent_file(
&storage, &storage,
&debug, &debug,
&i, &i,
&m.torrent_bytes, &m.torrent_bytes,
) )
// @TODO }
// use `r.info` for Memory, SQLite, Manticore and other alternative storage type // @TODO
})?; // use `r.info` for Memory, SQLite, Manticore and other alternative storage type
} })?;
session.update_only_files(&mt, &only_files).await?;
session.unpause(&mt).await?;
// await for `preload_regex` files download to continue // await for `preload_regex` files download to continue
match time::timeout( match time::timeout(
Duration::from_secs(arg.download_torrent_timeout), Duration::from_secs(arg.download_torrent_timeout),
@ -126,9 +167,7 @@ async fn main() -> Result<()> {
) )
.await?; .await?;
// cleanup irrelevant files (see rqbit#408) // cleanup irrelevant files (see rqbit#408)
if let Some(r) = preload_regex.as_ref() { storage.cleanup(&i, Some(only_files_save))?;
storage.cleanup(&i, Some(r))?;
}
// ignore on the next crawl iterations for this session // ignore on the next crawl iterations for this session
index.insert(i); index.insert(i);
} }

View file

@ -1,5 +1,5 @@
use anyhow::{Result, bail}; use anyhow::{Result, bail};
use std::{fs, io::Write, path::PathBuf, str::FromStr}; use std::{collections::HashSet, fs, io::Write, path::PathBuf, str::FromStr};
pub struct Storage(PathBuf); pub struct Storage(PathBuf);
@ -53,12 +53,20 @@ impl Storage {
Ok(p.to_string_lossy().to_string()) Ok(p.to_string_lossy().to_string())
} }
pub fn absolute(&self, infohash: &str, file: &PathBuf) -> PathBuf {
let mut p = PathBuf::new();
p.push(&self.0);
p.push(infohash);
p.push(file);
p
}
/// Recursively remove all files under the `infohash` location (see rqbit#408) /// Recursively remove all files under the `infohash` location (see rqbit#408)
pub fn cleanup(&self, infohash: &str, skip_filename: Option<&regex::Regex>) -> Result<()> { pub fn cleanup(&self, infohash: &str, keep_filenames: Option<HashSet<PathBuf>>) -> Result<()> {
for e in walkdir::WalkDir::new(self.output_folder(infohash, false)?) { for e in walkdir::WalkDir::new(self.output_folder(infohash, false)?) {
let e = e?; let e = e?;
let p = e.path(); let p = e.into_path();
if p.is_file() && skip_filename.is_none_or(|r| !r.is_match(p.to_str().unwrap())) { if p.is_file() && keep_filenames.as_ref().is_none_or(|k| !k.contains(&p)) {
fs::remove_file(p)?; fs::remove_file(p)?;
} }
} }