mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 17:15:35 +00:00
implement temporary data cleanup on commit preload content
This commit is contained in:
parent
6f4d2894b3
commit
3e053fa806
3 changed files with 58 additions and 27 deletions
|
|
@ -21,7 +21,6 @@ tokio = { version = "1.45", features = ["full"] }
|
||||||
tracing-subscriber = "0.3"
|
tracing-subscriber = "0.3"
|
||||||
url = "2.5"
|
url = "2.5"
|
||||||
urlencoding = "2.1"
|
urlencoding = "2.1"
|
||||||
walkdir = "2.5"
|
|
||||||
|
|
||||||
[patch.crates-io]
|
[patch.crates-io]
|
||||||
librqbit = { git = "https://github.com/ikatson/rqbit.git", rev="b580a9610ae7c6eaacd305a3905f7e2d3202ca69", package = "librqbit" }
|
librqbit = { git = "https://github.com/ikatson/rqbit.git", rev="b580a9610ae7c6eaacd305a3905f7e2d3202ca69", package = "librqbit" }
|
||||||
|
|
|
||||||
13
src/main.rs
13
src/main.rs
|
|
@ -116,12 +116,9 @@ async fn main() -> Result<()> {
|
||||||
only_files: Some(Vec::with_capacity(
|
only_files: Some(Vec::with_capacity(
|
||||||
config.preload_max_filecount.unwrap_or_default(),
|
config.preload_max_filecount.unwrap_or_default(),
|
||||||
)),
|
)),
|
||||||
// the destination folder to preload files match `only_files_regex`
|
// the destination folder to preload files match `preload_regex`
|
||||||
// * e.g. images for audio albums
|
// * e.g. images for audio albums
|
||||||
output_folder: preload
|
output_folder: preload.tmp(&i)?.to_str().map(|s| s.to_string()),
|
||||||
.output_folder(&i)?
|
|
||||||
.to_str()
|
|
||||||
.map(|s| s.to_string()),
|
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}),
|
}),
|
||||||
),
|
),
|
||||||
|
|
@ -170,9 +167,9 @@ async fn main() -> Result<()> {
|
||||||
session.unpause(&mt).await?;
|
session.unpause(&mt).await?;
|
||||||
// await for `preload_regex` files download to continue
|
// await for `preload_regex` files download to continue
|
||||||
mt.wait_until_completed().await?;
|
mt.wait_until_completed().await?;
|
||||||
// cleanup irrelevant files (see rqbit#408)
|
// persist torrent bytes and preloaded content,
|
||||||
preload.cleanup(&i, Some(keep_files))?;
|
// cleanup tmp (see rqbit#408)
|
||||||
preload.persist_torrent_bytes(&i, &bytes)?;
|
preload.commit(&i, &bytes, Some(keep_files))?;
|
||||||
// remove torrent from session as indexed
|
// remove torrent from session as indexed
|
||||||
session
|
session
|
||||||
.delete(librqbit::api::TorrentIdOrHash::Id(id), false)
|
.delete(librqbit::api::TorrentIdOrHash::Id(id), false)
|
||||||
|
|
|
||||||
|
|
@ -31,35 +31,66 @@ impl Preload {
|
||||||
|
|
||||||
// Actions
|
// Actions
|
||||||
|
|
||||||
/// Recursively remove all files under the `infohash` location (see rqbit#408)
|
/// Persist torrent bytes and preloaded content, cleanup tmp (see rqbit#408)
|
||||||
pub fn cleanup(&self, info_hash: &str, keep_filenames: Option<HashSet<PathBuf>>) -> Result<()> {
|
pub fn commit(
|
||||||
for e in walkdir::WalkDir::new(self.output_folder(info_hash)?) {
|
&self,
|
||||||
let e = e?;
|
info_hash: &str,
|
||||||
let p = e.into_path();
|
torrent_bytes: &[u8],
|
||||||
if p.is_file() && keep_filenames.as_ref().is_none_or(|k| !k.contains(&p)) {
|
persist_files: Option<HashSet<PathBuf>>,
|
||||||
fs::remove_file(p)?;
|
) -> Result<()> {
|
||||||
|
// persist torrent bytes to file
|
||||||
|
fs::write(self.torrent(info_hash)?, torrent_bytes)?;
|
||||||
|
// persist preload files
|
||||||
|
let mut d = PathBuf::from(&self.root);
|
||||||
|
d.push(info_hash);
|
||||||
|
if d.exists() {
|
||||||
|
// clean previous data
|
||||||
|
fs::remove_dir_all(&d)?
|
||||||
|
}
|
||||||
|
if let Some(f) = persist_files {
|
||||||
|
let r = d.components().count(); // count root offset once
|
||||||
|
for p in f {
|
||||||
|
// make sure preload path is referring to the expected location
|
||||||
|
let o = p.canonicalize()?;
|
||||||
|
if !o.starts_with(&self.root) || o.is_dir() {
|
||||||
|
bail!("Unexpected canonical path `{}`", o.to_string_lossy())
|
||||||
|
}
|
||||||
|
// build new permanent path /root/info-hash
|
||||||
|
let mut n = PathBuf::from(&d);
|
||||||
|
for component in o.components().skip(r) {
|
||||||
|
n.push(component)
|
||||||
|
}
|
||||||
|
// make sure segments count is same to continue
|
||||||
|
if o.components().count() != n.components().count() {
|
||||||
|
bail!(
|
||||||
|
"Unexpected components count: `{}` > `{}`",
|
||||||
|
o.to_string_lossy(),
|
||||||
|
n.to_string_lossy(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
// fs::create_dir_all(n.parent().unwrap())?;
|
||||||
|
fs::rename(o, n)?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// cleanup temporary files
|
||||||
|
let t = self.tmp(info_hash)?;
|
||||||
|
if t.exists() {
|
||||||
|
fs::remove_dir_all(t)?
|
||||||
}
|
}
|
||||||
} // remove empty directories @TODO
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn persist_torrent_bytes(&self, info_hash: &str, contents: &[u8]) -> Result<PathBuf> {
|
|
||||||
let p = self.torrent(info_hash)?;
|
|
||||||
fs::write(&p, contents)?;
|
|
||||||
Ok(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Getters
|
// Getters
|
||||||
|
|
||||||
/// * creates new directory if not exists
|
/// * creates new temporary directory if not exists
|
||||||
pub fn output_folder(&self, info_hash: &str) -> Result<PathBuf> {
|
pub fn tmp(&self, info_hash: &str) -> Result<PathBuf> {
|
||||||
if !is_info_hash(info_hash) {
|
if !is_info_hash(info_hash) {
|
||||||
bail!("Invalid info-hash `{info_hash}`")
|
bail!("Invalid info-hash `{info_hash}`")
|
||||||
}
|
}
|
||||||
let mut p = PathBuf::from(&self.root);
|
let mut p = PathBuf::from(&self.root);
|
||||||
p.push(info_hash);
|
p.push(tmp(info_hash));
|
||||||
if p.is_file() {
|
if p.is_file() {
|
||||||
bail!("Output directory for info-hash `{info_hash}` is file")
|
bail!("Output directory `{}` is file", p.to_string_lossy())
|
||||||
}
|
}
|
||||||
if !p.exists() {
|
if !p.exists() {
|
||||||
fs::create_dir(&p)?
|
fs::create_dir(&p)?
|
||||||
|
|
@ -88,3 +119,7 @@ impl Preload {
|
||||||
fn is_info_hash(value: &str) -> bool {
|
fn is_info_hash(value: &str) -> bool {
|
||||||
value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit())
|
value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn tmp(info_hash: &str) -> String {
|
||||||
|
format!(".{info_hash}")
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue