mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 09:05:33 +00:00
implement torrent files index option
This commit is contained in:
parent
d0d469ee78
commit
29ff0b52cc
5 changed files with 95 additions and 25 deletions
|
|
@ -67,7 +67,7 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.bin\
|
|||
--infohash <INFOHASH>
|
||||
Absolute path(s) or URL(s) to import infohashes from the Aquatic tracker binary API
|
||||
|
||||
* PR#233 feature
|
||||
* PR#233 feature ([Wiki](https://github.com/YGGverse/aquatic-crawler/wiki/Aquatic))
|
||||
|
||||
--tracker <TRACKER>
|
||||
Define custom tracker(s) to preload the `.torrent` files info
|
||||
|
|
@ -149,6 +149,9 @@ aquatic-crawler --infohash /path/to/info-hash-ipv4.bin\
|
|||
|
||||
[default: 1000]
|
||||
|
||||
--index-list
|
||||
Index torrent files
|
||||
|
||||
--index-timeout <INDEX_TIMEOUT>
|
||||
Remove records from index older than `seconds`
|
||||
|
||||
|
|
|
|||
|
|
@ -117,6 +117,10 @@ pub struct Config {
|
|||
#[arg(long, default_value_t = 1000)]
|
||||
pub index_capacity: usize,
|
||||
|
||||
/// Index torrent files
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub index_list: bool,
|
||||
|
||||
/// Remove records from index older than `seconds`
|
||||
#[arg(long)]
|
||||
pub index_timeout: Option<i64>,
|
||||
|
|
|
|||
27
src/index.rs
27
src/index.rs
|
|
@ -16,15 +16,23 @@ pub struct Index {
|
|||
/// Store the index value in memory only when it is in use by the init options
|
||||
has_name: bool,
|
||||
has_size: bool,
|
||||
has_list: bool,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn init(capacity: usize, timeout: Option<i64>, has_name: bool, has_size: bool) -> Self {
|
||||
pub fn init(
|
||||
capacity: usize,
|
||||
timeout: Option<i64>,
|
||||
has_name: bool,
|
||||
has_size: bool,
|
||||
has_list: bool,
|
||||
) -> Self {
|
||||
Self {
|
||||
index: HashMap::with_capacity(capacity),
|
||||
timeout: timeout.map(Duration::seconds),
|
||||
has_size,
|
||||
has_name,
|
||||
has_list,
|
||||
is_changed: false,
|
||||
}
|
||||
}
|
||||
|
|
@ -49,7 +57,15 @@ impl Index {
|
|||
self.index.values().map(|i| i.node).sum::<u64>()
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, infohash: String, node: u64, size: u64, name: Option<String>) {
|
||||
pub fn insert(
|
||||
&mut self,
|
||||
infohash: String,
|
||||
node: u64,
|
||||
size: u64,
|
||||
list: Option<Vec<(String, u64)>>,
|
||||
name: Option<String>,
|
||||
) {
|
||||
println!("{:?}", &list);
|
||||
if self
|
||||
.index
|
||||
.insert(
|
||||
|
|
@ -58,6 +74,7 @@ impl Index {
|
|||
node,
|
||||
if self.has_size { Some(size) } else { None },
|
||||
if self.has_name { name } else { None },
|
||||
if self.has_list { list } else { None },
|
||||
),
|
||||
)
|
||||
.is_none()
|
||||
|
|
@ -80,11 +97,11 @@ fn test() {
|
|||
use std::{thread::sleep, time::Duration};
|
||||
|
||||
// test values auto-clean by timeout
|
||||
let mut i = Index::init(2, Some(3), false, false);
|
||||
let mut i = Index::init(2, Some(3), false, false, false);
|
||||
|
||||
i.insert("h1".to_string(), 0, 0, None);
|
||||
i.insert("h1".to_string(), 0, 0, None, None);
|
||||
sleep(Duration::from_secs(1));
|
||||
i.insert("h2".to_string(), 0, 0, None);
|
||||
i.insert("h2".to_string(), 0, 0, None, None);
|
||||
|
||||
i.refresh();
|
||||
assert_eq!(i.len(), 2);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
use chrono::{DateTime, Utc};
|
||||
|
||||
const NAME_MAX_LEN: usize = 125; // + 3 bytes for `...` offset @TODO optional
|
||||
|
||||
/// The `Index` value
|
||||
pub struct Value {
|
||||
pub time: DateTime<Utc>,
|
||||
|
|
@ -9,15 +7,22 @@ pub struct Value {
|
|||
// Isolate by applying internal filter on value set
|
||||
size: Option<u64>,
|
||||
name: Option<String>,
|
||||
list: Option<Vec<(String, u64)>>,
|
||||
}
|
||||
|
||||
impl Value {
|
||||
/// Create new `Self` with current timestamp
|
||||
pub fn new(node: u64, size: Option<u64>, name: Option<String>) -> Self {
|
||||
pub fn new(
|
||||
node: u64,
|
||||
size: Option<u64>,
|
||||
name: Option<String>,
|
||||
list: Option<Vec<(String, u64)>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
time: Utc::now(),
|
||||
node,
|
||||
size,
|
||||
list: filter_list(list),
|
||||
name: filter_name(name),
|
||||
}
|
||||
}
|
||||
|
|
@ -25,23 +30,38 @@ impl Value {
|
|||
pub fn name(&self) -> Option<&String> {
|
||||
self.name.as_ref()
|
||||
}
|
||||
/// Get reference to the safely constructed files `list` member
|
||||
pub fn list(&self) -> Option<&Vec<(String, u64)>> {
|
||||
self.list.as_ref()
|
||||
}
|
||||
/// Get reference to the safely constructed `length` member
|
||||
pub fn size(&self) -> Option<u64> {
|
||||
self.size
|
||||
}
|
||||
}
|
||||
|
||||
/// Prevent unexpected memory usage on store values from unknown source
|
||||
fn filter_name(value: Option<String>) -> Option<String> {
|
||||
value.map(|v| {
|
||||
if v.len() > NAME_MAX_LEN {
|
||||
format!("{}...", sanitize(&v[..NAME_MAX_LEN]))
|
||||
} else {
|
||||
v
|
||||
}
|
||||
value.map(crop)
|
||||
}
|
||||
|
||||
fn filter_list(value: Option<Vec<(String, u64)>>) -> Option<Vec<(String, u64)>> {
|
||||
value.map(|f| {
|
||||
f.into_iter()
|
||||
.map(|(n, l)| (crop(sanitize(&n)), l))
|
||||
.collect()
|
||||
})
|
||||
}
|
||||
|
||||
/// Crop long values (prevents unexpected memory pool usage)
|
||||
fn crop(value: String) -> String {
|
||||
const L: usize = 125; // + 3 bytes for `...` offset, 128 max @TODO optional
|
||||
if value.len() > L {
|
||||
format!("{}...", sanitize(&value[..L]))
|
||||
} else {
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip tags & bom chars from string
|
||||
fn sanitize(value: &str) -> String {
|
||||
use voca_rs::strip::*;
|
||||
|
|
|
|||
44
src/main.rs
44
src/main.rs
|
|
@ -14,6 +14,10 @@ use config::Config;
|
|||
use debug::Debug;
|
||||
use format::Format;
|
||||
use index::Index;
|
||||
use librqbit::{
|
||||
AddTorrent, AddTorrentOptions, AddTorrentResponse, ByteBufOwned, ConnectionOptions,
|
||||
PeerConnectionOptions, SessionOptions, TorrentMetaV1Info,
|
||||
};
|
||||
use peers::Peers;
|
||||
use rss::Rss;
|
||||
use std::{collections::HashSet, num::NonZero, path::PathBuf, time::Duration};
|
||||
|
|
@ -24,10 +28,6 @@ use url::Url;
|
|||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
use clap::Parser;
|
||||
use librqbit::{
|
||||
AddTorrent, AddTorrentOptions, AddTorrentResponse, ConnectionOptions,
|
||||
PeerConnectionOptions, SessionOptions,
|
||||
};
|
||||
use tokio::time;
|
||||
|
||||
// init components
|
||||
|
|
@ -80,6 +80,7 @@ async fn main() -> Result<()> {
|
|||
config.index_timeout,
|
||||
config.export_rss.is_some(),
|
||||
config.export_rss.is_some(),
|
||||
config.export_rss.is_some() && config.index_list,
|
||||
);
|
||||
loop {
|
||||
debug.info("Index queue begin...");
|
||||
|
|
@ -150,7 +151,7 @@ async fn main() -> Result<()> {
|
|||
config.preload_max_filecount.unwrap_or_default(),
|
||||
);
|
||||
mt.wait_until_initialized().await?;
|
||||
let (name, size) = mt.with_metadata(|m| {
|
||||
let (name, size, list) = mt.with_metadata(|m| {
|
||||
// init preload files list
|
||||
if let Some(ref p) = preload {
|
||||
for (id, info) in m.file_infos.iter().enumerate() {
|
||||
|
|
@ -184,7 +185,11 @@ async fn main() -> Result<()> {
|
|||
save_torrent_file(t, &debug, &i, &m.torrent_bytes)
|
||||
}
|
||||
|
||||
(m.info.name.as_ref().map(|n| n.to_string()), size(&m.info))
|
||||
(
|
||||
m.info.name.as_ref().map(|n| n.to_string()),
|
||||
size(&m.info),
|
||||
list(&m.info),
|
||||
)
|
||||
})?;
|
||||
session.update_only_files(&mt, &only_files).await?;
|
||||
session.unpause(&mt).await?;
|
||||
|
|
@ -199,7 +204,7 @@ async fn main() -> Result<()> {
|
|||
p.cleanup(&i, Some(only_files_keep))?
|
||||
}
|
||||
|
||||
index.insert(i, only_files_size, size, name)
|
||||
index.insert(i, only_files_size, size, list, name)
|
||||
}
|
||||
Ok(AddTorrentResponse::ListOnly(r)) => {
|
||||
if let Some(ref t) = torrent {
|
||||
|
|
@ -210,7 +215,13 @@ async fn main() -> Result<()> {
|
|||
// use `r.info` for Memory, SQLite,
|
||||
// Manticore and other alternative storage type
|
||||
|
||||
index.insert(i, 0, size(&r.info), r.info.name.map(|n| n.to_string()))
|
||||
index.insert(
|
||||
i,
|
||||
0,
|
||||
size(&r.info),
|
||||
list(&r.info),
|
||||
r.info.name.map(|n| n.to_string()),
|
||||
)
|
||||
}
|
||||
// unexpected as should be deleted
|
||||
Ok(AddTorrentResponse::AlreadyManaged(..)) => panic!(),
|
||||
|
|
@ -288,7 +299,7 @@ fn magnet(infohash: &str, trackers: Option<&HashSet<Url>>) -> String {
|
|||
}
|
||||
|
||||
/// Count total size, including torrent files
|
||||
fn size(info: &librqbit::TorrentMetaV1Info<librqbit::ByteBufOwned>) -> u64 {
|
||||
fn size(info: &TorrentMetaV1Info<ByteBufOwned>) -> u64 {
|
||||
let mut t = 0;
|
||||
if let Some(l) = info.length {
|
||||
t += l
|
||||
|
|
@ -300,3 +311,18 @@ fn size(info: &librqbit::TorrentMetaV1Info<librqbit::ByteBufOwned>) -> u64 {
|
|||
}
|
||||
t
|
||||
}
|
||||
|
||||
fn list(info: &TorrentMetaV1Info<ByteBufOwned>) -> Option<Vec<(String, u64)>> {
|
||||
info.files.as_ref().map(|files| {
|
||||
files
|
||||
.iter()
|
||||
.map(|f| {
|
||||
(
|
||||
String::from_utf8(f.path.iter().flat_map(|b| b.to_vec()).collect())
|
||||
.unwrap_or_default(),
|
||||
f.length,
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue