mirror of
https://github.com/YGGverse/aquatic-crawler.git
synced 2026-03-31 17:15:35 +00:00
update comments
This commit is contained in:
parent
ce36c5dd87
commit
41b386717d
1 changed files with 11 additions and 4 deletions
15
src/main.rs
15
src/main.rs
|
|
@ -43,23 +43,24 @@ async fn main() -> Result<()> {
|
||||||
// begin
|
// begin
|
||||||
debug.info("Crawler started");
|
debug.info("Crawler started");
|
||||||
|
|
||||||
// collect processed info hashes to skip on the next cycles
|
// collect processed info hashes to skip on the next iterations (for this session)
|
||||||
let mut index = HashSet::with_capacity(arg.index_capacity);
|
let mut index = HashSet::with_capacity(arg.index_capacity);
|
||||||
loop {
|
loop {
|
||||||
debug.info("Index queue begin...");
|
debug.info("Index queue begin...");
|
||||||
// collect info-hashes from each API channel
|
// collect info-hashes from each API channel
|
||||||
for source in &arg.infohash_file {
|
for source in &arg.infohash_file {
|
||||||
debug.info(&format!("Index source `{source}`..."));
|
debug.info(&format!("Index source `{source}`..."));
|
||||||
// aquatic server may update the stats at this moment,
|
// aquatic server may update the stats at this moment, handle result manually
|
||||||
// handle this state manually
|
|
||||||
match api::infohashes(source) {
|
match api::infohashes(source) {
|
||||||
Ok(infohashes) => {
|
Ok(infohashes) => {
|
||||||
for i in infohashes {
|
for i in infohashes {
|
||||||
|
// is already indexed?
|
||||||
if index.contains(&i) {
|
if index.contains(&i) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
debug.info(&format!("Index `{i}`..."));
|
debug.info(&format!("Index `{i}`..."));
|
||||||
// run the crawler in single thread, use timeout to skip dead connections
|
// run the crawler in single thread for performance reasons,
|
||||||
|
// use `timeout` argument option to skip the dead connections.
|
||||||
match time::timeout(
|
match time::timeout(
|
||||||
timeout,
|
timeout,
|
||||||
session.add_torrent(
|
session.add_torrent(
|
||||||
|
|
@ -86,6 +87,7 @@ async fn main() -> Result<()> {
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(r) => match r {
|
Ok(r) => match r {
|
||||||
|
// on `preload_regex` case only
|
||||||
Ok(AddTorrentResponse::Added(id, mt)) => {
|
Ok(AddTorrentResponse::Added(id, mt)) => {
|
||||||
if arg.save_torrents {
|
if arg.save_torrents {
|
||||||
mt.with_metadata(|m| {
|
mt.with_metadata(|m| {
|
||||||
|
|
@ -99,17 +101,20 @@ async fn main() -> Result<()> {
|
||||||
// use `r.info` for Memory, SQLite, Manticore and other alternative storage type
|
// use `r.info` for Memory, SQLite, Manticore and other alternative storage type
|
||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
|
// await for `preload_regex` files download to continue
|
||||||
match time::timeout(timeout, mt.wait_until_completed()).await {
|
match time::timeout(timeout, mt.wait_until_completed()).await {
|
||||||
Ok(r) => {
|
Ok(r) => {
|
||||||
if let Err(e) = r {
|
if let Err(e) = r {
|
||||||
debug.info(&format!("Skip `{i}`: `{e}`."))
|
debug.info(&format!("Skip `{i}`: `{e}`."))
|
||||||
} else {
|
} else {
|
||||||
|
// remove torrent from session as indexed
|
||||||
session
|
session
|
||||||
.delete(
|
.delete(
|
||||||
librqbit::api::TorrentIdOrHash::Id(id),
|
librqbit::api::TorrentIdOrHash::Id(id),
|
||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
// ignore on the next crawl iterations for this session
|
||||||
index.insert(mt.info_hash().as_string());
|
index.insert(mt.info_hash().as_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -123,6 +128,8 @@ async fn main() -> Result<()> {
|
||||||
// @TODO
|
// @TODO
|
||||||
// use `r.info` for Memory, SQLite,
|
// use `r.info` for Memory, SQLite,
|
||||||
// Manticore and other alternative storage type
|
// Manticore and other alternative storage type
|
||||||
|
|
||||||
|
// ignore on the next crawl iterations for this session
|
||||||
index.insert(r.info_hash.as_string());
|
index.insert(r.info_hash.as_string());
|
||||||
}
|
}
|
||||||
// unexpected as should be deleted
|
// unexpected as should be deleted
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue