implement preload_max_filesize, preload_max_filecount options

This commit is contained in:
yggverse 2025-06-16 01:59:22 +03:00
parent 7de77b8575
commit 4f39dc3d0a
4 changed files with 78 additions and 16 deletions

View file

@ -86,7 +86,7 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
Enable upload
--preload-regex <PRELOAD_REGEX>
Preload files match regex pattern (list only without preload by default)
Preload only files match regex pattern (list only without preload by default) * see also `preload_max_filesize`, `preload_max_filecount` options
## Example:
@ -94,6 +94,12 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
* requires `storage` argument defined
--preload-max-filesize <PRELOAD_MAX_FILESIZE>
Max size sum of preloaded files per torrent (match `preload_regex`)
--preload-max-filecount <PRELOAD_MAX_FILECOUNT>
Max count of preloaded files per torrent (match `preload_regex`)
--save-torrents
Save resolved torrent files to the `storage` location

View file

@ -49,7 +49,8 @@ pub struct Argument {
#[arg(long, default_value_t = false)]
pub enable_upload: bool,
/// Preload files match regex pattern (list only without preload by default)
/// Preload only files match regex pattern (list only without preload by default)
/// * see also `preload_max_filesize`, `preload_max_filecount` options
///
/// ## Example:
///
@ -62,6 +63,14 @@ pub struct Argument {
#[arg(long)]
pub preload_regex: Option<String>,
/// Max size sum of preloaded files per torrent (match `preload_regex`)
#[arg(long)]
pub preload_max_filesize: Option<u64>,
/// Max count of preloaded files per torrent (match `preload_regex`)
#[arg(long)]
pub preload_max_filecount: Option<usize>,
/// Save resolved torrent files to the `storage` location
#[arg(long, default_value_t = true)]
pub save_torrents: bool,

View file

@ -78,14 +78,16 @@ async fn main() -> Result<()> {
session.add_torrent(
AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")),
Some(AddTorrentOptions {
paused: true, // continue after `only_files` init
overwrite: true,
disable_trackers: trackers.is_empty(),
initial_peers: peers.initial_peers(),
list_only: preload_regex.is_none(),
// it is important to blacklist all files preload until initiation
only_files: Some(Vec::new()),
// the destination folder to preload files match `only_files_regex`
// * e.g. images for audio albums
output_folder: storage.output_folder(&i, true).ok(),
only_files_regex: preload_regex.as_ref().map(|r| r.to_string()),
..Default::default()
}),
),
@ -95,18 +97,57 @@ async fn main() -> Result<()> {
Ok(r) => match r {
// on `preload_regex` case only
Ok(AddTorrentResponse::Added(id, mt)) => {
if arg.save_torrents {
let mut only_files_size = 0;
let mut only_files_save = HashSet::with_capacity(
arg.preload_max_filecount.unwrap_or_default(),
);
let mut only_files = HashSet::with_capacity(
arg.preload_max_filecount.unwrap_or_default(),
);
mt.wait_until_initialized().await?;
mt.with_metadata(|m| {
// init preload files list
if let Some(ref regex) = preload_regex {
for (id, info) in m.file_infos.iter().enumerate() {
if regex.is_match(
info.relative_filename.to_str().unwrap(),
) {
if arg.preload_max_filesize.is_some_and(
|limit| only_files_size + info.len > limit,
) {
debug.info(&format!(
"Total files size limit `{i}` reached!"
));
break;
}
if arg.preload_max_filecount.is_some_and(
|limit| only_files.len() + 1 > limit,
) {
debug.info(&format!(
"Total files count limit for `{i}` reached!"
));
break;
}
only_files_size += info.len;
only_files_save.insert(storage.absolute(&i, &info.relative_filename));
only_files.insert(id);
}
}
}
// dump info-hash to the torrent file
if arg.save_torrents {
save_torrent_file(
&storage,
&debug,
&i,
&m.torrent_bytes,
)
}
// @TODO
// use `r.info` for Memory, SQLite, Manticore and other alternative storage type
})?;
}
session.update_only_files(&mt, &only_files).await?;
session.unpause(&mt).await?;
// await for `preload_regex` files download to continue
match time::timeout(
Duration::from_secs(arg.download_torrent_timeout),
@ -126,9 +167,7 @@ async fn main() -> Result<()> {
)
.await?;
// cleanup irrelevant files (see rqbit#408)
if let Some(r) = preload_regex.as_ref() {
storage.cleanup(&i, Some(r))?;
}
storage.cleanup(&i, Some(only_files_save))?;
// ignore on the next crawl iterations for this session
index.insert(i);
}

View file

@ -1,5 +1,5 @@
use anyhow::{Result, bail};
use std::{fs, io::Write, path::PathBuf, str::FromStr};
use std::{collections::HashSet, fs, io::Write, path::PathBuf, str::FromStr};
pub struct Storage(PathBuf);
@ -53,12 +53,20 @@ impl Storage {
Ok(p.to_string_lossy().to_string())
}
pub fn absolute(&self, infohash: &str, file: &PathBuf) -> PathBuf {
let mut p = PathBuf::new();
p.push(&self.0);
p.push(infohash);
p.push(file);
p
}
/// Recursively remove all files under the `infohash` location (see rqbit#408)
pub fn cleanup(&self, infohash: &str, skip_filename: Option<&regex::Regex>) -> Result<()> {
pub fn cleanup(&self, infohash: &str, keep_filenames: Option<HashSet<PathBuf>>) -> Result<()> {
for e in walkdir::WalkDir::new(self.output_folder(infohash, false)?) {
let e = e?;
let p = e.path();
if p.is_file() && skip_filename.is_none_or(|r| !r.is_match(p.to_str().unwrap())) {
let p = e.into_path();
if p.is_file() && keep_filenames.as_ref().is_none_or(|k| !k.contains(&p)) {
fs::remove_file(p)?;
}
}