mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 09:05:33 +00:00
use librqbit Id20 impl to validate info-hash bytes, require valid type for public methods
This commit is contained in:
parent
d61af4b970
commit
704a2e5c29
4 changed files with 24 additions and 55 deletions
|
|
@ -1,11 +1,10 @@
|
|||
mod info_hash;
|
||||
use info_hash::InfoHash;
|
||||
use librqbit::dht::Id20;
|
||||
|
||||
/// Parse infohash from the source filepath,
|
||||
/// decode hash bytes to `InfoHash` array on success.
|
||||
///
|
||||
/// * return `None` if the `path` is not reachable
|
||||
pub fn get(path: &str, capacity: usize) -> Option<Vec<InfoHash>> {
|
||||
pub fn get(path: &str, capacity: usize) -> Option<Vec<Id20>> {
|
||||
use std::io::Read;
|
||||
if !path.ends_with(".bin") {
|
||||
todo!("Only sources in the `.bin` format are supported!")
|
||||
|
|
@ -21,7 +20,7 @@ pub fn get(path: &str, capacity: usize) -> Option<Vec<InfoHash>> {
|
|||
if f.read(&mut b).ok()? != L {
|
||||
break;
|
||||
}
|
||||
r.push(InfoHash::V1(b))
|
||||
r.push(Id20::from_bytes(&b).ok()?)
|
||||
}
|
||||
Some(r)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,15 +0,0 @@
|
|||
pub enum InfoHash {
|
||||
V1([u8; 20]),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for InfoHash {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::V1(i) => write!(
|
||||
f,
|
||||
"{}",
|
||||
i.iter().map(|b| format!("{b:02x}")).collect::<String>()
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
24
src/main.rs
24
src/main.rs
|
|
@ -89,18 +89,18 @@ async fn main() -> Result<()> {
|
|||
}
|
||||
} {
|
||||
// convert to string once
|
||||
let i = i.to_string();
|
||||
let is = i.as_string();
|
||||
if preload.contains_torrent(&i)? {
|
||||
continue;
|
||||
}
|
||||
log::debug!("Index `{i}`...");
|
||||
log::debug!("Index `{is}`...");
|
||||
// run the crawler in single thread for performance reasons,
|
||||
// use `timeout` argument option to skip the dead connections.
|
||||
match time::timeout(
|
||||
Duration::from_secs(config.add_torrent_timeout),
|
||||
session.add_torrent(
|
||||
AddTorrent::from_url(magnet(
|
||||
&i,
|
||||
&is,
|
||||
if config.tracker.is_empty() {
|
||||
None
|
||||
} else {
|
||||
|
|
@ -143,20 +143,20 @@ async fn main() -> Result<()> {
|
|||
.is_some_and(|limit| only_files.len() + 1 > limit)
|
||||
{
|
||||
log::debug!(
|
||||
"file count limit reached, skip `{id}` for `{i}`"
|
||||
"file count limit reached, skip `{id}` for `{is}`"
|
||||
);
|
||||
break;
|
||||
}
|
||||
if preload.max_filesize.is_some_and(|limit| info.len > limit) {
|
||||
log::debug!(
|
||||
"file size limit reached, skip `{id}` for `{i}`"
|
||||
"file size limit reached, skip `{id}` for `{is}`"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if preload.regex.as_ref().is_some_and(|r| {
|
||||
!r.is_match(&info.relative_filename.to_string_lossy())
|
||||
}) {
|
||||
log::debug!("regex filter, skip `{id}` for `{i}`");
|
||||
log::debug!("regex filter, skip `{id}` for `{is}`");
|
||||
continue;
|
||||
}
|
||||
assert!(keep_files.insert(info.relative_filename.clone()));
|
||||
|
|
@ -175,12 +175,12 @@ async fn main() -> Result<()> {
|
|||
session
|
||||
.delete(librqbit::api::TorrentIdOrHash::Id(id), false)
|
||||
.await?;
|
||||
log::debug!("torrent `{i}` indexed.")
|
||||
log::debug!("torrent `{is}` indexed.")
|
||||
}
|
||||
Ok(_) => panic!(),
|
||||
Err(e) => log::debug!("Failed to resolve `{i}`: `{e}`."),
|
||||
Err(e) => log::debug!("Failed to resolve `{is}`: `{e}`."),
|
||||
},
|
||||
Err(e) => log::debug!("failed to resolve `{i}`: `{e}`"),
|
||||
Err(e) => log::debug!("failed to resolve `{is}`: `{e}`"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -200,11 +200,7 @@ async fn main() -> Result<()> {
|
|||
|
||||
/// Build magnet URI
|
||||
fn magnet(info_hash: &str, trackers: Option<&Vec<Url>>) -> String {
|
||||
let mut m = if info_hash.len() == 40 {
|
||||
format!("magnet:?xt=urn:btih:{info_hash}")
|
||||
} else {
|
||||
todo!("infohash v2 is not supported by librqbit")
|
||||
};
|
||||
let mut m = format!("magnet:?xt=urn:btih:{info_hash}");
|
||||
if let Some(t) = trackers {
|
||||
for tracker in t {
|
||||
m.push_str("&tr=");
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use anyhow::{Result, bail};
|
||||
use librqbit::dht::Id20;
|
||||
use regex::Regex;
|
||||
use std::{collections::HashSet, fs, path::PathBuf};
|
||||
|
||||
|
|
@ -36,14 +37,14 @@ impl Preload {
|
|||
/// cleanup tmp data on success (see rqbit#408)
|
||||
pub fn commit(
|
||||
&self,
|
||||
info_hash: &str,
|
||||
info_hash: &Id20,
|
||||
torrent_bytes: Vec<u8>,
|
||||
persist_files: Option<HashSet<PathBuf>>,
|
||||
) -> Result<()> {
|
||||
validate_info_hash(info_hash)?;
|
||||
let i = info_hash.as_string();
|
||||
// persist preload files
|
||||
let mut d = PathBuf::from(&self.root);
|
||||
d.push(info_hash);
|
||||
d.push(&i);
|
||||
if d.exists() {
|
||||
// clean previous data
|
||||
fs::remove_dir_all(&d)?;
|
||||
|
|
@ -87,7 +88,7 @@ impl Preload {
|
|||
log::debug!("clean tmp data `{}`", tmp.to_string_lossy())
|
||||
}
|
||||
// persist torrent bytes to file (on previous operations success)
|
||||
let t = self.torrent(info_hash);
|
||||
let t = self.torrent(i);
|
||||
fs::write(&t, torrent_bytes)?;
|
||||
log::debug!("persist torrent bytes for `{}`", t.to_string_lossy());
|
||||
Ok(())
|
||||
|
|
@ -97,10 +98,9 @@ impl Preload {
|
|||
|
||||
/// Get absolute path to the temporary directory
|
||||
/// * optionally creates directory if not exists
|
||||
pub fn tmp(&self, info_hash: &str, is_create: bool) -> Result<PathBuf> {
|
||||
validate_info_hash(info_hash)?;
|
||||
pub fn tmp(&self, info_hash: &Id20, is_create: bool) -> Result<PathBuf> {
|
||||
let mut p = PathBuf::from(&self.root);
|
||||
p.push(tmp_component(info_hash));
|
||||
p.push(tmp_component(info_hash.as_string()));
|
||||
if p.is_file() {
|
||||
bail!("Output directory `{}` is file", p.to_string_lossy())
|
||||
}
|
||||
|
|
@ -117,30 +117,19 @@ impl Preload {
|
|||
}
|
||||
|
||||
/// Check the given hash is contain resolved torrent file
|
||||
pub fn contains_torrent(&self, info_hash: &str) -> Result<bool> {
|
||||
validate_info_hash(info_hash)?;
|
||||
Ok(fs::exists(self.torrent(info_hash))?)
|
||||
pub fn contains_torrent(&self, info_hash: &Id20) -> Result<bool> {
|
||||
Ok(fs::exists(self.torrent(info_hash.as_string()))?)
|
||||
}
|
||||
|
||||
/// Get absolute path to the torrent file
|
||||
fn torrent(&self, info_hash: &str) -> PathBuf {
|
||||
fn torrent(&self, info_hash: String) -> PathBuf {
|
||||
let mut p = PathBuf::from(&self.root);
|
||||
p.push(format!("{info_hash}.torrent"));
|
||||
p
|
||||
}
|
||||
}
|
||||
|
||||
/// Non-expensive method to make sure the given string is safe to use in path builders
|
||||
/// @TODO implement custom type?
|
||||
fn validate_info_hash(value: &str) -> Result<()> {
|
||||
if value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
Ok(())
|
||||
} else {
|
||||
bail!("Invalid info-hash value `{value}`")
|
||||
}
|
||||
}
|
||||
|
||||
/// Build constant path component
|
||||
fn tmp_component(info_hash: &str) -> String {
|
||||
fn tmp_component(info_hash: String) -> String {
|
||||
format!(".{info_hash}")
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue