implement preload_max_filesize, preload_max_filecount options

2026-03-31 17:15:35 +00:00 · 2025-06-16 01:59:22 +03:00 · 2025-06-16 01:59:22 +03:00 · 4f39dc3d0a
commit 4f39dc3d0a
parent 7de77b8575
4 changed files with 78 additions and 16 deletions
--- a/README.md
+++ b/README.md
@ -86,7 +86,7 @@ aquatic-crawler --infohash-file   /path/to/info-hash-ipv4.json\
        Enable upload

 --preload-regex <PRELOAD_REGEX>
-        Preload files match regex pattern (list only without preload by default)
+        Preload only files match regex pattern (list only without preload by default) * see also `preload_max_filesize`, `preload_max_filecount` options

        ## Example:

@ -94,6 +94,12 @@ aquatic-crawler --infohash-file   /path/to/info-hash-ipv4.json\

        * requires `storage` argument defined

+--preload-max-filesize <PRELOAD_MAX_FILESIZE>
+        Max size sum of preloaded files per torrent (match `preload_regex`)
+
+--preload-max-filecount <PRELOAD_MAX_FILECOUNT>
+        Max count of preloaded files per torrent (match `preload_regex`)
+
 --save-torrents
        Save resolved torrent files to the `storage` location

--- a/src/argument.rs
+++ b/src/argument.rs
@ -49,7 +49,8 @@ pub struct Argument {
    #[arg(long, default_value_t = false)]
    pub enable_upload: bool,

-    /// Preload files match regex pattern (list only without preload by default)
+    /// Preload only files match regex pattern (list only without preload by default)
+    /// * see also `preload_max_filesize`, `preload_max_filecount` options
    ///
    /// ## Example:
    ///
@ -62,6 +63,14 @@ pub struct Argument {
    #[arg(long)]
    pub preload_regex: Option<String>,

+    /// Max size sum of preloaded files per torrent (match `preload_regex`)
+    #[arg(long)]
+    pub preload_max_filesize: Option<u64>,
+
+    /// Max count of preloaded files per torrent (match `preload_regex`)
+    #[arg(long)]
+    pub preload_max_filecount: Option<usize>,
+
    /// Save resolved torrent files to the `storage` location
    #[arg(long, default_value_t = true)]
    pub save_torrents: bool,
--- a/src/main.rs
+++ b/src/main.rs
@ -78,14 +78,16 @@ async fn main() -> Result<()> {
                            session.add_torrent(
                                AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")),
                                Some(AddTorrentOptions {
+                                    paused: true, // continue after `only_files` init
                                    overwrite: true,
                                    disable_trackers: trackers.is_empty(),
                                    initial_peers: peers.initial_peers(),
                                    list_only: preload_regex.is_none(),
+                                    // it is important to blacklist all files preload until initiation
+                                    only_files: Some(Vec::new()),
                                    // the destination folder to preload files match `only_files_regex`
                                    // * e.g. images for audio albums
                                    output_folder: storage.output_folder(&i, true).ok(),
-                                    only_files_regex: preload_regex.as_ref().map(|r| r.to_string()),
                                    ..Default::default()
                                }),
                            ),
@ -95,18 +97,57 @@ async fn main() -> Result<()> {
                            Ok(r) => match r {
                                // on `preload_regex` case only
                                Ok(AddTorrentResponse::Added(id, mt)) => {
-                                    if arg.save_torrents {
+                                    let mut only_files_size = 0;
+                                    let mut only_files_save = HashSet::with_capacity(
+                                        arg.preload_max_filecount.unwrap_or_default(),
+                                    );
+                                    let mut only_files = HashSet::with_capacity(
+                                        arg.preload_max_filecount.unwrap_or_default(),
+                                    );
+                                    mt.wait_until_initialized().await?;
                                    mt.with_metadata(|m| {
+                                        // init preload files list
+                                        if let Some(ref regex) = preload_regex {
+                                            for (id, info) in m.file_infos.iter().enumerate() {
+                                                if regex.is_match(
+                                                    info.relative_filename.to_str().unwrap(),
+                                                ) {
+                                                    if arg.preload_max_filesize.is_some_and(
+                                                        |limit| only_files_size + info.len > limit,
+                                                    ) {
+                                                        debug.info(&format!(
+                                                            "Total files size limit `{i}` reached!"
+                                                        ));
+                                                        break;
+                                                    }
+                                                    if arg.preload_max_filecount.is_some_and(
+                                                        |limit| only_files.len() + 1 > limit,
+                                                    ) {
+                                                        debug.info(&format!(
+                                                            "Total files count limit for `{i}` reached!"
+                                                        ));
+                                                        break;
+                                                    }
+                                                    only_files_size += info.len;
+                                                    only_files_save.insert(storage.absolute(&i, &info.relative_filename));
+                                                    only_files.insert(id);
+                                                }
+                                            }
+                                        }
+                                        // dump info-hash to the torrent file
+                                        if arg.save_torrents {
                                            save_torrent_file(
                                                &storage,
                                                &debug,
                                                &i,
                                                &m.torrent_bytes,
                                            )
+                                        }
                                        // @TODO
                                        // use `r.info` for Memory, SQLite, Manticore and other alternative storage type
                                    })?;
-                                    }
+                                    session.update_only_files(&mt, &only_files).await?;
+                                    session.unpause(&mt).await?;
                                    // await for `preload_regex` files download to continue
                                    match time::timeout(
                                        Duration::from_secs(arg.download_torrent_timeout),
@ -126,9 +167,7 @@ async fn main() -> Result<()> {
                                                    )
                                                    .await?;
                                                // cleanup irrelevant files (see rqbit#408)
-                                                if let Some(r) = preload_regex.as_ref() {
-                                                    storage.cleanup(&i, Some(r))?;
-                                                }
+                                                storage.cleanup(&i, Some(only_files_save))?;
                                                // ignore on the next crawl iterations for this session
                                                index.insert(i);
                                            }
--- a/src/storage.rs
+++ b/src/storage.rs
@ -1,5 +1,5 @@
 use anyhow::{Result, bail};
-use std::{fs, io::Write, path::PathBuf, str::FromStr};
+use std::{collections::HashSet, fs, io::Write, path::PathBuf, str::FromStr};

 pub struct Storage(PathBuf);

@ -53,12 +53,20 @@ impl Storage {
        Ok(p.to_string_lossy().to_string())
    }

+    pub fn absolute(&self, infohash: &str, file: &PathBuf) -> PathBuf {
+        let mut p = PathBuf::new();
+        p.push(&self.0);
+        p.push(infohash);
+        p.push(file);
+        p
+    }
+
    /// Recursively remove all files under the `infohash` location (see rqbit#408)
-    pub fn cleanup(&self, infohash: &str, skip_filename: Option<&regex::Regex>) -> Result<()> {
+    pub fn cleanup(&self, infohash: &str, keep_filenames: Option<HashSet<PathBuf>>) -> Result<()> {
        for e in walkdir::WalkDir::new(self.output_folder(infohash, false)?) {
            let e = e?;
-            let p = e.path();
-            if p.is_file() && skip_filename.is_none_or(|r| !r.is_match(p.to_str().unwrap())) {
+            let p = e.into_path();
+            if p.is_file() && keep_filenames.as_ref().is_none_or(|k| !k.contains(&p)) {
                fs::remove_file(p)?;
            }
        }