mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 09:05:33 +00:00
implement preload_max_filesize, preload_max_filecount options
This commit is contained in:
parent
7de77b8575
commit
4f39dc3d0a
4 changed files with 78 additions and 16 deletions
|
|
@ -86,7 +86,7 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
||||||
Enable upload
|
Enable upload
|
||||||
|
|
||||||
--preload-regex <PRELOAD_REGEX>
|
--preload-regex <PRELOAD_REGEX>
|
||||||
Preload files match regex pattern (list only without preload by default)
|
Preload only files match regex pattern (list only without preload by default) * see also `preload_max_filesize`, `preload_max_filecount` options
|
||||||
|
|
||||||
## Example:
|
## Example:
|
||||||
|
|
||||||
|
|
@ -94,6 +94,12 @@ aquatic-crawler --infohash-file /path/to/info-hash-ipv4.json\
|
||||||
|
|
||||||
* requires `storage` argument defined
|
* requires `storage` argument defined
|
||||||
|
|
||||||
|
--preload-max-filesize <PRELOAD_MAX_FILESIZE>
|
||||||
|
Max size sum of preloaded files per torrent (match `preload_regex`)
|
||||||
|
|
||||||
|
--preload-max-filecount <PRELOAD_MAX_FILECOUNT>
|
||||||
|
Max count of preloaded files per torrent (match `preload_regex`)
|
||||||
|
|
||||||
--save-torrents
|
--save-torrents
|
||||||
Save resolved torrent files to the `storage` location
|
Save resolved torrent files to the `storage` location
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,8 @@ pub struct Argument {
|
||||||
#[arg(long, default_value_t = false)]
|
#[arg(long, default_value_t = false)]
|
||||||
pub enable_upload: bool,
|
pub enable_upload: bool,
|
||||||
|
|
||||||
/// Preload files match regex pattern (list only without preload by default)
|
/// Preload only files match regex pattern (list only without preload by default)
|
||||||
|
/// * see also `preload_max_filesize`, `preload_max_filecount` options
|
||||||
///
|
///
|
||||||
/// ## Example:
|
/// ## Example:
|
||||||
///
|
///
|
||||||
|
|
@ -62,6 +63,14 @@ pub struct Argument {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub preload_regex: Option<String>,
|
pub preload_regex: Option<String>,
|
||||||
|
|
||||||
|
/// Max size sum of preloaded files per torrent (match `preload_regex`)
|
||||||
|
#[arg(long)]
|
||||||
|
pub preload_max_filesize: Option<u64>,
|
||||||
|
|
||||||
|
/// Max count of preloaded files per torrent (match `preload_regex`)
|
||||||
|
#[arg(long)]
|
||||||
|
pub preload_max_filecount: Option<usize>,
|
||||||
|
|
||||||
/// Save resolved torrent files to the `storage` location
|
/// Save resolved torrent files to the `storage` location
|
||||||
#[arg(long, default_value_t = true)]
|
#[arg(long, default_value_t = true)]
|
||||||
pub save_torrents: bool,
|
pub save_torrents: bool,
|
||||||
|
|
|
||||||
59
src/main.rs
59
src/main.rs
|
|
@ -78,14 +78,16 @@ async fn main() -> Result<()> {
|
||||||
session.add_torrent(
|
session.add_torrent(
|
||||||
AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")),
|
AddTorrent::from_url(format!("magnet:?xt=urn:btih:{i}")),
|
||||||
Some(AddTorrentOptions {
|
Some(AddTorrentOptions {
|
||||||
|
paused: true, // continue after `only_files` init
|
||||||
overwrite: true,
|
overwrite: true,
|
||||||
disable_trackers: trackers.is_empty(),
|
disable_trackers: trackers.is_empty(),
|
||||||
initial_peers: peers.initial_peers(),
|
initial_peers: peers.initial_peers(),
|
||||||
list_only: preload_regex.is_none(),
|
list_only: preload_regex.is_none(),
|
||||||
|
// it is important to blacklist all files preload until initiation
|
||||||
|
only_files: Some(Vec::new()),
|
||||||
// the destination folder to preload files match `only_files_regex`
|
// the destination folder to preload files match `only_files_regex`
|
||||||
// * e.g. images for audio albums
|
// * e.g. images for audio albums
|
||||||
output_folder: storage.output_folder(&i, true).ok(),
|
output_folder: storage.output_folder(&i, true).ok(),
|
||||||
only_files_regex: preload_regex.as_ref().map(|r| r.to_string()),
|
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}),
|
}),
|
||||||
),
|
),
|
||||||
|
|
@ -95,18 +97,57 @@ async fn main() -> Result<()> {
|
||||||
Ok(r) => match r {
|
Ok(r) => match r {
|
||||||
// on `preload_regex` case only
|
// on `preload_regex` case only
|
||||||
Ok(AddTorrentResponse::Added(id, mt)) => {
|
Ok(AddTorrentResponse::Added(id, mt)) => {
|
||||||
if arg.save_torrents {
|
let mut only_files_size = 0;
|
||||||
mt.with_metadata(|m| {
|
let mut only_files_save = HashSet::with_capacity(
|
||||||
|
arg.preload_max_filecount.unwrap_or_default(),
|
||||||
|
);
|
||||||
|
let mut only_files = HashSet::with_capacity(
|
||||||
|
arg.preload_max_filecount.unwrap_or_default(),
|
||||||
|
);
|
||||||
|
mt.wait_until_initialized().await?;
|
||||||
|
mt.with_metadata(|m| {
|
||||||
|
// init preload files list
|
||||||
|
if let Some(ref regex) = preload_regex {
|
||||||
|
for (id, info) in m.file_infos.iter().enumerate() {
|
||||||
|
if regex.is_match(
|
||||||
|
info.relative_filename.to_str().unwrap(),
|
||||||
|
) {
|
||||||
|
if arg.preload_max_filesize.is_some_and(
|
||||||
|
|limit| only_files_size + info.len > limit,
|
||||||
|
) {
|
||||||
|
debug.info(&format!(
|
||||||
|
"Total files size limit `{i}` reached!"
|
||||||
|
));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if arg.preload_max_filecount.is_some_and(
|
||||||
|
|limit| only_files.len() + 1 > limit,
|
||||||
|
) {
|
||||||
|
debug.info(&format!(
|
||||||
|
"Total files count limit for `{i}` reached!"
|
||||||
|
));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
only_files_size += info.len;
|
||||||
|
only_files_save.insert(storage.absolute(&i, &info.relative_filename));
|
||||||
|
only_files.insert(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// dump info-hash to the torrent file
|
||||||
|
if arg.save_torrents {
|
||||||
save_torrent_file(
|
save_torrent_file(
|
||||||
&storage,
|
&storage,
|
||||||
&debug,
|
&debug,
|
||||||
&i,
|
&i,
|
||||||
&m.torrent_bytes,
|
&m.torrent_bytes,
|
||||||
)
|
)
|
||||||
// @TODO
|
}
|
||||||
// use `r.info` for Memory, SQLite, Manticore and other alternative storage type
|
// @TODO
|
||||||
})?;
|
// use `r.info` for Memory, SQLite, Manticore and other alternative storage type
|
||||||
}
|
})?;
|
||||||
|
session.update_only_files(&mt, &only_files).await?;
|
||||||
|
session.unpause(&mt).await?;
|
||||||
// await for `preload_regex` files download to continue
|
// await for `preload_regex` files download to continue
|
||||||
match time::timeout(
|
match time::timeout(
|
||||||
Duration::from_secs(arg.download_torrent_timeout),
|
Duration::from_secs(arg.download_torrent_timeout),
|
||||||
|
|
@ -126,9 +167,7 @@ async fn main() -> Result<()> {
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
// cleanup irrelevant files (see rqbit#408)
|
// cleanup irrelevant files (see rqbit#408)
|
||||||
if let Some(r) = preload_regex.as_ref() {
|
storage.cleanup(&i, Some(only_files_save))?;
|
||||||
storage.cleanup(&i, Some(r))?;
|
|
||||||
}
|
|
||||||
// ignore on the next crawl iterations for this session
|
// ignore on the next crawl iterations for this session
|
||||||
index.insert(i);
|
index.insert(i);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
use anyhow::{Result, bail};
|
use anyhow::{Result, bail};
|
||||||
use std::{fs, io::Write, path::PathBuf, str::FromStr};
|
use std::{collections::HashSet, fs, io::Write, path::PathBuf, str::FromStr};
|
||||||
|
|
||||||
pub struct Storage(PathBuf);
|
pub struct Storage(PathBuf);
|
||||||
|
|
||||||
|
|
@ -53,12 +53,20 @@ impl Storage {
|
||||||
Ok(p.to_string_lossy().to_string())
|
Ok(p.to_string_lossy().to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn absolute(&self, infohash: &str, file: &PathBuf) -> PathBuf {
|
||||||
|
let mut p = PathBuf::new();
|
||||||
|
p.push(&self.0);
|
||||||
|
p.push(infohash);
|
||||||
|
p.push(file);
|
||||||
|
p
|
||||||
|
}
|
||||||
|
|
||||||
/// Recursively remove all files under the `infohash` location (see rqbit#408)
|
/// Recursively remove all files under the `infohash` location (see rqbit#408)
|
||||||
pub fn cleanup(&self, infohash: &str, skip_filename: Option<®ex::Regex>) -> Result<()> {
|
pub fn cleanup(&self, infohash: &str, keep_filenames: Option<HashSet<PathBuf>>) -> Result<()> {
|
||||||
for e in walkdir::WalkDir::new(self.output_folder(infohash, false)?) {
|
for e in walkdir::WalkDir::new(self.output_folder(infohash, false)?) {
|
||||||
let e = e?;
|
let e = e?;
|
||||||
let p = e.path();
|
let p = e.into_path();
|
||||||
if p.is_file() && skip_filename.is_none_or(|r| !r.is_match(p.to_str().unwrap())) {
|
if p.is_file() && keep_filenames.as_ref().is_none_or(|k| !k.contains(&p)) {
|
||||||
fs::remove_file(p)?;
|
fs::remove_file(p)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue