mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 17:15:35 +00:00
use librqbit Id20 impl to validate info-hash bytes, require valid type for public methods
This commit is contained in:
parent
d61af4b970
commit
704a2e5c29
4 changed files with 24 additions and 55 deletions
|
|
@ -1,11 +1,10 @@
|
||||||
mod info_hash;
|
use librqbit::dht::Id20;
|
||||||
use info_hash::InfoHash;
|
|
||||||
|
|
||||||
/// Parse infohash from the source filepath,
|
/// Parse infohash from the source filepath,
|
||||||
/// decode hash bytes to `InfoHash` array on success.
|
/// decode hash bytes to `InfoHash` array on success.
|
||||||
///
|
///
|
||||||
/// * return `None` if the `path` is not reachable
|
/// * return `None` if the `path` is not reachable
|
||||||
pub fn get(path: &str, capacity: usize) -> Option<Vec<InfoHash>> {
|
pub fn get(path: &str, capacity: usize) -> Option<Vec<Id20>> {
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
if !path.ends_with(".bin") {
|
if !path.ends_with(".bin") {
|
||||||
todo!("Only sources in the `.bin` format are supported!")
|
todo!("Only sources in the `.bin` format are supported!")
|
||||||
|
|
@ -21,7 +20,7 @@ pub fn get(path: &str, capacity: usize) -> Option<Vec<InfoHash>> {
|
||||||
if f.read(&mut b).ok()? != L {
|
if f.read(&mut b).ok()? != L {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
r.push(InfoHash::V1(b))
|
r.push(Id20::from_bytes(&b).ok()?)
|
||||||
}
|
}
|
||||||
Some(r)
|
Some(r)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
pub enum InfoHash {
|
|
||||||
V1([u8; 20]),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for InfoHash {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
Self::V1(i) => write!(
|
|
||||||
f,
|
|
||||||
"{}",
|
|
||||||
i.iter().map(|b| format!("{b:02x}")).collect::<String>()
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
24
src/main.rs
24
src/main.rs
|
|
@ -89,18 +89,18 @@ async fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
} {
|
} {
|
||||||
// convert to string once
|
// convert to string once
|
||||||
let i = i.to_string();
|
let is = i.as_string();
|
||||||
if preload.contains_torrent(&i)? {
|
if preload.contains_torrent(&i)? {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
log::debug!("Index `{i}`...");
|
log::debug!("Index `{is}`...");
|
||||||
// run the crawler in single thread for performance reasons,
|
// run the crawler in single thread for performance reasons,
|
||||||
// use `timeout` argument option to skip the dead connections.
|
// use `timeout` argument option to skip the dead connections.
|
||||||
match time::timeout(
|
match time::timeout(
|
||||||
Duration::from_secs(config.add_torrent_timeout),
|
Duration::from_secs(config.add_torrent_timeout),
|
||||||
session.add_torrent(
|
session.add_torrent(
|
||||||
AddTorrent::from_url(magnet(
|
AddTorrent::from_url(magnet(
|
||||||
&i,
|
&is,
|
||||||
if config.tracker.is_empty() {
|
if config.tracker.is_empty() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -143,20 +143,20 @@ async fn main() -> Result<()> {
|
||||||
.is_some_and(|limit| only_files.len() + 1 > limit)
|
.is_some_and(|limit| only_files.len() + 1 > limit)
|
||||||
{
|
{
|
||||||
log::debug!(
|
log::debug!(
|
||||||
"file count limit reached, skip `{id}` for `{i}`"
|
"file count limit reached, skip `{id}` for `{is}`"
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if preload.max_filesize.is_some_and(|limit| info.len > limit) {
|
if preload.max_filesize.is_some_and(|limit| info.len > limit) {
|
||||||
log::debug!(
|
log::debug!(
|
||||||
"file size limit reached, skip `{id}` for `{i}`"
|
"file size limit reached, skip `{id}` for `{is}`"
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if preload.regex.as_ref().is_some_and(|r| {
|
if preload.regex.as_ref().is_some_and(|r| {
|
||||||
!r.is_match(&info.relative_filename.to_string_lossy())
|
!r.is_match(&info.relative_filename.to_string_lossy())
|
||||||
}) {
|
}) {
|
||||||
log::debug!("regex filter, skip `{id}` for `{i}`");
|
log::debug!("regex filter, skip `{id}` for `{is}`");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
assert!(keep_files.insert(info.relative_filename.clone()));
|
assert!(keep_files.insert(info.relative_filename.clone()));
|
||||||
|
|
@ -175,12 +175,12 @@ async fn main() -> Result<()> {
|
||||||
session
|
session
|
||||||
.delete(librqbit::api::TorrentIdOrHash::Id(id), false)
|
.delete(librqbit::api::TorrentIdOrHash::Id(id), false)
|
||||||
.await?;
|
.await?;
|
||||||
log::debug!("torrent `{i}` indexed.")
|
log::debug!("torrent `{is}` indexed.")
|
||||||
}
|
}
|
||||||
Ok(_) => panic!(),
|
Ok(_) => panic!(),
|
||||||
Err(e) => log::debug!("Failed to resolve `{i}`: `{e}`."),
|
Err(e) => log::debug!("Failed to resolve `{is}`: `{e}`."),
|
||||||
},
|
},
|
||||||
Err(e) => log::debug!("failed to resolve `{i}`: `{e}`"),
|
Err(e) => log::debug!("failed to resolve `{is}`: `{e}`"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -200,11 +200,7 @@ async fn main() -> Result<()> {
|
||||||
|
|
||||||
/// Build magnet URI
|
/// Build magnet URI
|
||||||
fn magnet(info_hash: &str, trackers: Option<&Vec<Url>>) -> String {
|
fn magnet(info_hash: &str, trackers: Option<&Vec<Url>>) -> String {
|
||||||
let mut m = if info_hash.len() == 40 {
|
let mut m = format!("magnet:?xt=urn:btih:{info_hash}");
|
||||||
format!("magnet:?xt=urn:btih:{info_hash}")
|
|
||||||
} else {
|
|
||||||
todo!("infohash v2 is not supported by librqbit")
|
|
||||||
};
|
|
||||||
if let Some(t) = trackers {
|
if let Some(t) = trackers {
|
||||||
for tracker in t {
|
for tracker in t {
|
||||||
m.push_str("&tr=");
|
m.push_str("&tr=");
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
use anyhow::{Result, bail};
|
use anyhow::{Result, bail};
|
||||||
|
use librqbit::dht::Id20;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::{collections::HashSet, fs, path::PathBuf};
|
use std::{collections::HashSet, fs, path::PathBuf};
|
||||||
|
|
||||||
|
|
@ -36,14 +37,14 @@ impl Preload {
|
||||||
/// cleanup tmp data on success (see rqbit#408)
|
/// cleanup tmp data on success (see rqbit#408)
|
||||||
pub fn commit(
|
pub fn commit(
|
||||||
&self,
|
&self,
|
||||||
info_hash: &str,
|
info_hash: &Id20,
|
||||||
torrent_bytes: Vec<u8>,
|
torrent_bytes: Vec<u8>,
|
||||||
persist_files: Option<HashSet<PathBuf>>,
|
persist_files: Option<HashSet<PathBuf>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
validate_info_hash(info_hash)?;
|
let i = info_hash.as_string();
|
||||||
// persist preload files
|
// persist preload files
|
||||||
let mut d = PathBuf::from(&self.root);
|
let mut d = PathBuf::from(&self.root);
|
||||||
d.push(info_hash);
|
d.push(&i);
|
||||||
if d.exists() {
|
if d.exists() {
|
||||||
// clean previous data
|
// clean previous data
|
||||||
fs::remove_dir_all(&d)?;
|
fs::remove_dir_all(&d)?;
|
||||||
|
|
@ -87,7 +88,7 @@ impl Preload {
|
||||||
log::debug!("clean tmp data `{}`", tmp.to_string_lossy())
|
log::debug!("clean tmp data `{}`", tmp.to_string_lossy())
|
||||||
}
|
}
|
||||||
// persist torrent bytes to file (on previous operations success)
|
// persist torrent bytes to file (on previous operations success)
|
||||||
let t = self.torrent(info_hash);
|
let t = self.torrent(i);
|
||||||
fs::write(&t, torrent_bytes)?;
|
fs::write(&t, torrent_bytes)?;
|
||||||
log::debug!("persist torrent bytes for `{}`", t.to_string_lossy());
|
log::debug!("persist torrent bytes for `{}`", t.to_string_lossy());
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
@ -97,10 +98,9 @@ impl Preload {
|
||||||
|
|
||||||
/// Get absolute path to the temporary directory
|
/// Get absolute path to the temporary directory
|
||||||
/// * optionally creates directory if not exists
|
/// * optionally creates directory if not exists
|
||||||
pub fn tmp(&self, info_hash: &str, is_create: bool) -> Result<PathBuf> {
|
pub fn tmp(&self, info_hash: &Id20, is_create: bool) -> Result<PathBuf> {
|
||||||
validate_info_hash(info_hash)?;
|
|
||||||
let mut p = PathBuf::from(&self.root);
|
let mut p = PathBuf::from(&self.root);
|
||||||
p.push(tmp_component(info_hash));
|
p.push(tmp_component(info_hash.as_string()));
|
||||||
if p.is_file() {
|
if p.is_file() {
|
||||||
bail!("Output directory `{}` is file", p.to_string_lossy())
|
bail!("Output directory `{}` is file", p.to_string_lossy())
|
||||||
}
|
}
|
||||||
|
|
@ -117,30 +117,19 @@ impl Preload {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check the given hash is contain resolved torrent file
|
/// Check the given hash is contain resolved torrent file
|
||||||
pub fn contains_torrent(&self, info_hash: &str) -> Result<bool> {
|
pub fn contains_torrent(&self, info_hash: &Id20) -> Result<bool> {
|
||||||
validate_info_hash(info_hash)?;
|
Ok(fs::exists(self.torrent(info_hash.as_string()))?)
|
||||||
Ok(fs::exists(self.torrent(info_hash))?)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get absolute path to the torrent file
|
/// Get absolute path to the torrent file
|
||||||
fn torrent(&self, info_hash: &str) -> PathBuf {
|
fn torrent(&self, info_hash: String) -> PathBuf {
|
||||||
let mut p = PathBuf::from(&self.root);
|
let mut p = PathBuf::from(&self.root);
|
||||||
p.push(format!("{info_hash}.torrent"));
|
p.push(format!("{info_hash}.torrent"));
|
||||||
p
|
p
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Non-expensive method to make sure the given string is safe to use in path builders
|
|
||||||
/// @TODO implement custom type?
|
|
||||||
fn validate_info_hash(value: &str) -> Result<()> {
|
|
||||||
if value.len() == 40 && value.chars().all(|c| c.is_ascii_hexdigit()) {
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
bail!("Invalid info-hash value `{value}`")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Build constant path component
|
/// Build constant path component
|
||||||
fn tmp_component(info_hash: &str) -> String {
|
fn tmp_component(info_hash: String) -> String {
|
||||||
format!(".{info_hash}")
|
format!(".{info_hash}")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue