mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 17:15:35 +00:00
implement torrent files index option
This commit is contained in:
parent
d0d469ee78
commit
29ff0b52cc
5 changed files with 95 additions and 25 deletions
|
|
@ -67,7 +67,7 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.bin\
|
||||||
--infohash <INFOHASH>
|
--infohash <INFOHASH>
|
||||||
Absolute path(s) or URL(s) to import infohashes from the Aquatic tracker binary API
|
Absolute path(s) or URL(s) to import infohashes from the Aquatic tracker binary API
|
||||||
|
|
||||||
* PR#233 feature
|
* PR#233 feature ([Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic))
|
||||||
|
|
||||||
--tracker <TRACKER>
|
--tracker <TRACKER>
|
||||||
Define custom tracker(s) to preload the `.torrent` files info
|
Define custom tracker(s) to preload the `.torrent` files info
|
||||||
|
|
@ -149,6 +149,9 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.bin\
|
||||||
|
|
||||||
[default: 1000]
|
[default: 1000]
|
||||||
|
|
||||||
|
--index-list
|
||||||
|
Index torrent files
|
||||||
|
|
||||||
--index-timeout <INDEX_TIMEOUT>
|
--index-timeout <INDEX_TIMEOUT>
|
||||||
Remove records from index older than `seconds`
|
Remove records from index older than `seconds`
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -117,6 +117,10 @@ pub struct Config {
|
||||||
#[arg(long, default_value_t = 1000)]
|
#[arg(long, default_value_t = 1000)]
|
||||||
pub index_capacity: usize,
|
pub index_capacity: usize,
|
||||||
|
|
||||||
|
/// Index torrent files
|
||||||
|
#[arg(long, default_value_t = false)]
|
||||||
|
pub index_list: bool,
|
||||||
|
|
||||||
/// Remove records from index older than `seconds`
|
/// Remove records from index older than `seconds`
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub index_timeout: Option<i64>,
|
pub index_timeout: Option<i64>,
|
||||||
|
|
|
||||||
27
src/index.rs
27
src/index.rs
|
|
@ -16,15 +16,23 @@ pub struct Index {
|
||||||
/// Store the index value in memory only when it is in use by the init options
|
/// Store the index value in memory only when it is in use by the init options
|
||||||
has_name: bool,
|
has_name: bool,
|
||||||
has_size: bool,
|
has_size: bool,
|
||||||
|
has_list: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
pub fn init(capacity: usize, timeout: Option<i64>, has_name: bool, has_size: bool) -> Self {
|
pub fn init(
|
||||||
|
capacity: usize,
|
||||||
|
timeout: Option<i64>,
|
||||||
|
has_name: bool,
|
||||||
|
has_size: bool,
|
||||||
|
has_list: bool,
|
||||||
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
index: HashMap::with_capacity(capacity),
|
index: HashMap::with_capacity(capacity),
|
||||||
timeout: timeout.map(Duration::seconds),
|
timeout: timeout.map(Duration::seconds),
|
||||||
has_size,
|
has_size,
|
||||||
has_name,
|
has_name,
|
||||||
|
has_list,
|
||||||
is_changed: false,
|
is_changed: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -49,7 +57,15 @@ impl Index {
|
||||||
self.index.values().map(|i| i.node).sum::<u64>()
|
self.index.values().map(|i| i.node).sum::<u64>()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn insert(&mut self, infohash: String, node: u64, size: u64, name: Option<String>) {
|
pub fn insert(
|
||||||
|
&mut self,
|
||||||
|
infohash: String,
|
||||||
|
node: u64,
|
||||||
|
size: u64,
|
||||||
|
list: Option<Vec<(String, u64)>>,
|
||||||
|
name: Option<String>,
|
||||||
|
) {
|
||||||
|
println!("{:?}", &list);
|
||||||
if self
|
if self
|
||||||
.index
|
.index
|
||||||
.insert(
|
.insert(
|
||||||
|
|
@ -58,6 +74,7 @@ impl Index {
|
||||||
node,
|
node,
|
||||||
if self.has_size { Some(size) } else { None },
|
if self.has_size { Some(size) } else { None },
|
||||||
if self.has_name { name } else { None },
|
if self.has_name { name } else { None },
|
||||||
|
if self.has_list { list } else { None },
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
.is_none()
|
.is_none()
|
||||||
|
|
@ -80,11 +97,11 @@ fn test() {
|
||||||
use std::{thread::sleep, time::Duration};
|
use std::{thread::sleep, time::Duration};
|
||||||
|
|
||||||
// test values auto-clean by timeout
|
// test values auto-clean by timeout
|
||||||
let mut i = Index::init(2, Some(3), false, false);
|
let mut i = Index::init(2, Some(3), false, false, false);
|
||||||
|
|
||||||
i.insert("h1".to_string(), 0, 0, None);
|
i.insert("h1".to_string(), 0, 0, None, None);
|
||||||
sleep(Duration::from_secs(1));
|
sleep(Duration::from_secs(1));
|
||||||
i.insert("h2".to_string(), 0, 0, None);
|
i.insert("h2".to_string(), 0, 0, None, None);
|
||||||
|
|
||||||
i.refresh();
|
i.refresh();
|
||||||
assert_eq!(i.len(), 2);
|
assert_eq!(i.len(), 2);
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
|
|
||||||
const NAME_MAX_LEN: usize = 125; // + 3 bytes for `...` offset @TODO optional
|
|
||||||
|
|
||||||
/// The `Index` value
|
/// The `Index` value
|
||||||
pub struct Value {
|
pub struct Value {
|
||||||
pub time: DateTime<Utc>,
|
pub time: DateTime<Utc>,
|
||||||
|
|
@ -9,15 +7,22 @@ pub struct Value {
|
||||||
// Isolate by applying internal filter on value set
|
// Isolate by applying internal filter on value set
|
||||||
size: Option<u64>,
|
size: Option<u64>,
|
||||||
name: Option<String>,
|
name: Option<String>,
|
||||||
|
list: Option<Vec<(String, u64)>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Value {
|
impl Value {
|
||||||
/// Create new `Self` with current timestamp
|
/// Create new `Self` with current timestamp
|
||||||
pub fn new(node: u64, size: Option<u64>, name: Option<String>) -> Self {
|
pub fn new(
|
||||||
|
node: u64,
|
||||||
|
size: Option<u64>,
|
||||||
|
name: Option<String>,
|
||||||
|
list: Option<Vec<(String, u64)>>,
|
||||||
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
time: Utc::now(),
|
time: Utc::now(),
|
||||||
node,
|
node,
|
||||||
size,
|
size,
|
||||||
|
list: filter_list(list),
|
||||||
name: filter_name(name),
|
name: filter_name(name),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -25,23 +30,38 @@ impl Value {
|
||||||
pub fn name(&self) -> Option<&String> {
|
pub fn name(&self) -> Option<&String> {
|
||||||
self.name.as_ref()
|
self.name.as_ref()
|
||||||
}
|
}
|
||||||
|
/// Get reference to the safely constructed files `list` member
|
||||||
|
pub fn list(&self) -> Option<&Vec<(String, u64)>> {
|
||||||
|
self.list.as_ref()
|
||||||
|
}
|
||||||
/// Get reference to the safely constructed `length` member
|
/// Get reference to the safely constructed `length` member
|
||||||
pub fn size(&self) -> Option<u64> {
|
pub fn size(&self) -> Option<u64> {
|
||||||
self.size
|
self.size
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Prevent unexpected memory usage on store values from unknown source
|
|
||||||
fn filter_name(value: Option<String>) -> Option<String> {
|
fn filter_name(value: Option<String>) -> Option<String> {
|
||||||
value.map(|v| {
|
value.map(crop)
|
||||||
if v.len() > NAME_MAX_LEN {
|
|
||||||
format!("{}...", sanitize(&v[..NAME_MAX_LEN]))
|
|
||||||
} else {
|
|
||||||
v
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn filter_list(value: Option<Vec<(String, u64)>>) -> Option<Vec<(String, u64)>> {
|
||||||
|
value.map(|f| {
|
||||||
|
f.into_iter()
|
||||||
|
.map(|(n, l)| (crop(sanitize(&n)), l))
|
||||||
|
.collect()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Crop long values (prevents unexpected memory pool usage)
|
||||||
|
fn crop(value: String) -> String {
|
||||||
|
const L: usize = 125; // + 3 bytes for `...` offset, 128 max @TODO optional
|
||||||
|
if value.len() > L {
|
||||||
|
format!("{}...", sanitize(&value[..L]))
|
||||||
|
} else {
|
||||||
|
value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Strip tags & bom chars from string
|
/// Strip tags & bom chars from string
|
||||||
fn sanitize(value: &str) -> String {
|
fn sanitize(value: &str) -> String {
|
||||||
use voca_rs::strip::*;
|
use voca_rs::strip::*;
|
||||||
|
|
|
||||||
44
src/main.rs
44
src/main.rs
|
|
@ -14,6 +14,10 @@ use config::Config;
|
||||||
use debug::Debug;
|
use debug::Debug;
|
||||||
use format::Format;
|
use format::Format;
|
||||||
use index::Index;
|
use index::Index;
|
||||||
|
use librqbit::{
|
||||||
|
AddTorrent, AddTorrentOptions, AddTorrentResponse, ByteBufOwned, ConnectionOptions,
|
||||||
|
PeerConnectionOptions, SessionOptions, TorrentMetaV1Info,
|
||||||
|
};
|
||||||
use peers::Peers;
|
use peers::Peers;
|
||||||
use rss::Rss;
|
use rss::Rss;
|
||||||
use std::{collections::HashSet, num::NonZero, path::PathBuf, time::Duration};
|
use std::{collections::HashSet, num::NonZero, path::PathBuf, time::Duration};
|
||||||
|
|
@ -24,10 +28,6 @@ use url::Url;
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use librqbit::{
|
|
||||||
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions,
|
|
||||||
PeerConnectionOptions, SessionOptions,
|
|
||||||
};
|
|
||||||
use tokio::time;
|
use tokio::time;
|
||||||
|
|
||||||
// init components
|
// init components
|
||||||
|
|
@ -80,6 +80,7 @@ async fn main() -> Result<()> {
|
||||||
config.index_timeout,
|
config.index_timeout,
|
||||||
config.export_rss.is_some(),
|
config.export_rss.is_some(),
|
||||||
config.export_rss.is_some(),
|
config.export_rss.is_some(),
|
||||||
|
config.export_rss.is_some() && config.index_list,
|
||||||
);
|
);
|
||||||
loop {
|
loop {
|
||||||
debug.info("Index queue begin...");
|
debug.info("Index queue begin...");
|
||||||
|
|
@ -150,7 +151,7 @@ async fn main() -> Result<()> {
|
||||||
config.preload_max_filecount.unwrap_or_default(),
|
config.preload_max_filecount.unwrap_or_default(),
|
||||||
);
|
);
|
||||||
mt.wait_until_initialized().await?;
|
mt.wait_until_initialized().await?;
|
||||||
let (name, size) = mt.with_metadata(|m| {
|
let (name, size, list) = mt.with_metadata(|m| {
|
||||||
// init preload files list
|
// init preload files list
|
||||||
if let Some(ref p) = preload {
|
if let Some(ref p) = preload {
|
||||||
for (id, info) in m.file_infos.iter().enumerate() {
|
for (id, info) in m.file_infos.iter().enumerate() {
|
||||||
|
|
@ -184,7 +185,11 @@ async fn main() -> Result<()> {
|
||||||
save_torrent_file(t, &debug, &i, &m.torrent_bytes)
|
save_torrent_file(t, &debug, &i, &m.torrent_bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
(m.info.name.as_ref().map(|n| n.to_string()), size(&m.info))
|
(
|
||||||
|
m.info.name.as_ref().map(|n| n.to_string()),
|
||||||
|
size(&m.info),
|
||||||
|
list(&m.info),
|
||||||
|
)
|
||||||
})?;
|
})?;
|
||||||
session.update_only_files(&mt, &only_files).await?;
|
session.update_only_files(&mt, &only_files).await?;
|
||||||
session.unpause(&mt).await?;
|
session.unpause(&mt).await?;
|
||||||
|
|
@ -199,7 +204,7 @@ async fn main() -> Result<()> {
|
||||||
p.cleanup(&i, Some(only_files_keep))?
|
p.cleanup(&i, Some(only_files_keep))?
|
||||||
}
|
}
|
||||||
|
|
||||||
index.insert(i, only_files_size, size, name)
|
index.insert(i, only_files_size, size, list, name)
|
||||||
}
|
}
|
||||||
Ok(AddTorrentResponse::ListOnly(r)) => {
|
Ok(AddTorrentResponse::ListOnly(r)) => {
|
||||||
if let Some(ref t) = torrent {
|
if let Some(ref t) = torrent {
|
||||||
|
|
@ -210,7 +215,13 @@ async fn main() -> Result<()> {
|
||||||
// use `r.info` for Memory, SQLite,
|
// use `r.info` for Memory, SQLite,
|
||||||
// Manticore and other alternative storage type
|
// Manticore and other alternative storage type
|
||||||
|
|
||||||
index.insert(i, 0, size(&r.info), r.info.name.map(|n| n.to_string()))
|
index.insert(
|
||||||
|
i,
|
||||||
|
0,
|
||||||
|
size(&r.info),
|
||||||
|
list(&r.info),
|
||||||
|
r.info.name.map(|n| n.to_string()),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
// unexpected as should be deleted
|
// unexpected as should be deleted
|
||||||
Ok(AddTorrentResponse::AlreadyManaged(..)) => panic!(),
|
Ok(AddTorrentResponse::AlreadyManaged(..)) => panic!(),
|
||||||
|
|
@ -288,7 +299,7 @@ fn magnet(infohash: &str, trackers: Option<&HashSet<Url>>) -> String {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Count total size, including torrent files
|
/// Count total size, including torrent files
|
||||||
fn size(info: &librqbit::TorrentMetaV1Info<librqbit::ByteBufOwned>) -> u64 {
|
fn size(info: &TorrentMetaV1Info<ByteBufOwned>) -> u64 {
|
||||||
let mut t = 0;
|
let mut t = 0;
|
||||||
if let Some(l) = info.length {
|
if let Some(l) = info.length {
|
||||||
t += l
|
t += l
|
||||||
|
|
@ -300,3 +311,18 @@ fn size(info: &librqbit::TorrentMetaV1Info<librqbit::ByteBufOwned>) -> u64 {
|
||||||
}
|
}
|
||||||
t
|
t
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn list(info: &TorrentMetaV1Info<ByteBufOwned>) -> Option<Vec<(String, u64)>> {
|
||||||
|
info.files.as_ref().map(|files| {
|
||||||
|
files
|
||||||
|
.iter()
|
||||||
|
.map(|f| {
|
||||||
|
(
|
||||||
|
String::from_utf8(f.path.iter().flat_map(|b| b.to_vec()).collect())
|
||||||
|
.unwrap_or_default(),
|
||||||
|
f.length,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue