mirror of
https://github.com/YGGverse/rssto.git
synced 2026-03-31 17:15:29 +00:00
implement initial llm features
This commit is contained in:
parent
5608e2e081
commit
de3fda435a
10 changed files with 269 additions and 14 deletions
|
|
@ -3,5 +3,6 @@ resolver = "2"
|
|||
members = [
|
||||
"crates/crawler",
|
||||
"crates/http",
|
||||
"crates/llm",
|
||||
"crates/mysql",
|
||||
]
|
||||
|
|
@ -11,7 +11,7 @@ Crawl content from RSS feeds into multiple formats
|
|||
|
||||
## Components
|
||||
|
||||
* [x] `rssto-crawler` - RSS feed reader and data scrapper daemon
|
||||
* [x] `rssto-http` - Web server implementation based on the Rocket engine
|
||||
* [x] `rssto-mysql` - Shared database library
|
||||
* [ ] `rssto-llama` - Feeds auto-translation tool
|
||||
* `rssto-crawler` - RSS feed reader and data scrapper daemon
|
||||
* `rssto-http` - Web server implementation based on the Rocket engine
|
||||
* `rssto-llm` - Feeds auto-translation
|
||||
* `rssto-mysql` - Shared database library
|
||||
|
|
@ -190,7 +190,7 @@ fn crawl(db: &Mysql, channel_config: &config::Channel) -> Result<()> {
|
|||
db.contents_by_channel_item_id_provider_id(channel_item_id, None, Some(1))?
|
||||
.is_empty()
|
||||
);
|
||||
let _content_id = db.insert_content(channel_item_id, None, title, description)?;
|
||||
let _content_id = db.insert_content(channel_item_id, None, &title, &description)?;
|
||||
// @TODO preload media
|
||||
}
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@ Web server implementation based on the Rocket engine
|
|||
|
||||
```
|
||||
cd rssto/crates/rssto-http
|
||||
cargo run -- --mysql-username USER \
|
||||
--mysql-password PASS \
|
||||
--mysql-database NAME
|
||||
```
|
||||
cargo run -- --mysql-username {USER} \
|
||||
--mysql-password {PASS} \
|
||||
--mysql-database {NAME}
|
||||
```
|
||||
* optionally, use `--provider-id {ID}` to filter content using post-processing results (e.g. generated by the `rssto-llm` crate)
|
||||
20
crates/llm/Cargo.toml
Normal file
20
crates/llm/Cargo.toml
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
[package]
|
||||
name = "rssto-llm"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
description = "LLM daemon for the rssto DB translations"
|
||||
keywords = ["rss", "llm", "translation", "localization", "server"]
|
||||
categories = ["command-line-utilities", "parsing", "text-processing", "value-formatting"]
|
||||
repository = "https://github.com/YGGverse/rssto"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.100"
|
||||
chrono = "0.4.42"
|
||||
clap = { version = "4.5.54", features = ["derive"] }
|
||||
lancor = "0.1.1"
|
||||
log = "0.4.29"
|
||||
mysql = { package = "rssto-mysql", version = "0.1.0", path = "../mysql" }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
tracing-subscriber = { version = "0.3.22", features = ["env-filter"] }
|
||||
21
crates/llm/LICENSE
Normal file
21
crates/llm/LICENSE
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2026 YGGverse
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
28
crates/llm/README.md
Normal file
28
crates/llm/README.md
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# rssto-llm
|
||||
|
||||
LLM daemon for the rssto DB translations
|
||||
|
||||
> [!NOTE]
|
||||
> In development!
|
||||
|
||||
1. Setup `rssto-crawler` first and collect initial data
|
||||
|
||||
2. Run LLM server:
|
||||
|
||||
```
|
||||
llama-server -hf ggml-org/gemma-3-1b-it-GGUF`
|
||||
```
|
||||
|
||||
3. Launch `rssto-llm` to handle `content` DB:
|
||||
|
||||
```
|
||||
cd rssto/crates/rssto-llm
|
||||
cargo run -- --mysql-username {USER} \
|
||||
--mysql-password {PASS} \
|
||||
--mysql-database {NAME} \
|
||||
--llm-host {HOST} \
|
||||
--llm-port {PORT} \
|
||||
--llm-model {MODEL} \
|
||||
--llm-message {MESSAGE}
|
||||
```
|
||||
* see `--help` to display all supported options
|
||||
37
crates/llm/src/argument.rs
Normal file
37
crates/llm/src/argument.rs
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
use clap::Parser;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
pub struct Argument {
|
||||
// LLM
|
||||
#[arg(long, default_value_t = String::from("http"))]
|
||||
pub llm_scheme: String,
|
||||
#[arg(long, default_value_t = String::from("localhost"))]
|
||||
pub llm_host: String,
|
||||
#[arg(long, default_value_t = 8080)]
|
||||
pub llm_port: u16,
|
||||
|
||||
/// Model name (e.g. `INSAIT-Institute/MamayLM-Gemma-2-9B-IT-v0.1-GGUF` or `ggml-org/gemma-3-1b-it-GGUF`)
|
||||
#[arg(long)]
|
||||
pub llm_model: String,
|
||||
|
||||
/// Initial message for the `content` subject (e.g. `translate to...`)
|
||||
#[arg(long)]
|
||||
pub llm_message: String,
|
||||
|
||||
// Database
|
||||
#[arg(long, default_value_t = String::from("localhost"))]
|
||||
pub mysql_host: String,
|
||||
#[arg(long, default_value_t = 3306)]
|
||||
pub mysql_port: u16,
|
||||
#[arg(long)]
|
||||
pub mysql_username: String,
|
||||
#[arg(long)]
|
||||
pub mysql_password: String,
|
||||
#[arg(long)]
|
||||
pub mysql_database: String,
|
||||
/// Loop update in seconds
|
||||
/// * None to exit on complete
|
||||
#[arg(long, short)]
|
||||
pub update: Option<u64>,
|
||||
}
|
||||
103
crates/llm/src/main.rs
Normal file
103
crates/llm/src/main.rs
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
mod argument;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
use argument::Argument;
|
||||
use chrono::Local;
|
||||
use clap::Parser;
|
||||
use lancor::{ChatCompletionRequest, LlamaCppClient, Message};
|
||||
use log::{debug, info};
|
||||
use mysql::{Mysql, Sort};
|
||||
use std::env::var;
|
||||
|
||||
if var("RUST_LOG").is_ok() {
|
||||
use tracing_subscriber::{EnvFilter, fmt::*};
|
||||
struct T;
|
||||
impl time::FormatTime for T {
|
||||
fn format_time(&self, w: &mut format::Writer<'_>) -> std::fmt::Result {
|
||||
write!(w, "{}", Local::now())
|
||||
}
|
||||
}
|
||||
fmt()
|
||||
.with_timer(T)
|
||||
.with_env_filter(EnvFilter::from_default_env())
|
||||
.init()
|
||||
}
|
||||
|
||||
let arg = Argument::parse();
|
||||
let db = Mysql::connect(
|
||||
&arg.mysql_host,
|
||||
arg.mysql_port,
|
||||
&arg.mysql_username,
|
||||
&arg.mysql_password,
|
||||
&arg.mysql_database,
|
||||
)?;
|
||||
let llm = LlamaCppClient::new(format!(
|
||||
"{}://{}:{}",
|
||||
arg.llm_scheme, arg.llm_host, arg.llm_port
|
||||
))?;
|
||||
|
||||
let provider_id = match db.provider_by_name(&arg.llm_model)? {
|
||||
Some(p) => {
|
||||
debug!(
|
||||
"Use existing DB provider #{} matches model name `{}`",
|
||||
p.provider_id, &arg.llm_model
|
||||
);
|
||||
p.provider_id
|
||||
}
|
||||
None => {
|
||||
let provider_id = db.insert_provider(&arg.llm_model)?;
|
||||
info!(
|
||||
"Provider `{}` not found in database, created new one with ID `{provider_id}`",
|
||||
&arg.llm_model
|
||||
);
|
||||
provider_id
|
||||
}
|
||||
};
|
||||
|
||||
info!("Daemon started");
|
||||
loop {
|
||||
debug!("New queue begin...");
|
||||
for source in db.contents_queue_for_provider_id(provider_id, Sort::Asc, None)? {
|
||||
debug!(
|
||||
"Begin generating `content_id` #{} using `provider_id` #{provider_id}.",
|
||||
source.content_id
|
||||
);
|
||||
|
||||
let title =
|
||||
llm.chat_completion(ChatCompletionRequest::new(&arg.llm_model).message(
|
||||
Message::user(format!("{}\n{}", arg.llm_message, source.title)),
|
||||
))
|
||||
.await?;
|
||||
|
||||
println!("{}", &title.choices[0].message.content);
|
||||
|
||||
let description =
|
||||
llm.chat_completion(ChatCompletionRequest::new(&arg.llm_model).message(
|
||||
Message::user(format!("{}\n{}", arg.llm_message, source.description)),
|
||||
))
|
||||
.await?;
|
||||
|
||||
let content_id = db.insert_content(
|
||||
source.channel_item_id,
|
||||
Some(provider_id),
|
||||
&title.choices[0].message.content,
|
||||
&description.choices[0].message.content,
|
||||
)?;
|
||||
|
||||
debug!(
|
||||
"Created `content_id` #{content_id} using `content_id` #{} source by `provider_id` #{provider_id}.",
|
||||
source.content_id
|
||||
)
|
||||
}
|
||||
debug!("Queue completed");
|
||||
if let Some(update) = arg.update {
|
||||
debug!("Wait {update} seconds to continue...");
|
||||
std::thread::sleep(std::time::Duration::from_secs(update))
|
||||
} else {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -108,7 +108,7 @@ impl Mysql {
|
|||
|
||||
pub fn contents_total_by_provider_id(&self, provider_id: Option<u64>) -> Result<usize, Error> {
|
||||
let total: Option<usize> = self.pool.get_conn()?.exec_first(
|
||||
"SELECT COUNT(*) FROM `content` WHERE `provider_id` = ?",
|
||||
"SELECT COUNT(*) FROM `content` WHERE `provider_id` <=> ?",
|
||||
(provider_id,),
|
||||
)?;
|
||||
Ok(total.unwrap_or(0))
|
||||
|
|
@ -125,12 +125,35 @@ impl Mysql {
|
|||
`channel_item_id`,
|
||||
`provider_id`,
|
||||
`title`,
|
||||
`description` FROM `content` WHERE `provider_id` = ? ORDER BY `content_id` {sort} LIMIT {}",
|
||||
`description` FROM `content` WHERE `provider_id` <=> ? ORDER BY `content_id` {sort} LIMIT {}",
|
||||
limit.unwrap_or(DEFAULT_LIMIT)
|
||||
),
|
||||
(provider_id, ))
|
||||
}
|
||||
|
||||
/// Get subjects for `rssto-llm` queue
|
||||
pub fn contents_queue_for_provider_id(
|
||||
&self,
|
||||
provider_id: u64,
|
||||
sort: Sort,
|
||||
limit: Option<usize>,
|
||||
) -> Result<Vec<Content>, Error> {
|
||||
self.pool.get_conn()?.exec(
|
||||
format!(
|
||||
"SELECT `c1`.`content_id`,
|
||||
`c1`.`channel_item_id`,
|
||||
`c1`.`provider_id`,
|
||||
`c1`.`title`,
|
||||
`c1`.`description`
|
||||
FROM `content` AS `c1` WHERE `c1`.`provider_id` IS NULL AND NOT EXISTS (
|
||||
SELECT NULL FROM `content` AS `c2` WHERE `c2`.`channel_item_id` = `c1`.`channel_item_id` AND `c2`.`provider_id` = ? LIMIT 1
|
||||
) ORDER BY `c1`.`content_id` {sort} LIMIT {}",
|
||||
limit.unwrap_or(DEFAULT_LIMIT)
|
||||
),
|
||||
(provider_id,),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn contents_by_channel_item_id_provider_id(
|
||||
&self,
|
||||
channel_item_id: u64,
|
||||
|
|
@ -143,7 +166,7 @@ impl Mysql {
|
|||
`channel_item_id`,
|
||||
`provider_id`,
|
||||
`title`,
|
||||
`description` FROM `content` WHERE `channel_item_id` = ? AND `provider_id` = ? LIMIT {}",
|
||||
`description` FROM `content` WHERE `channel_item_id` = ? AND `provider_id` <=> ? LIMIT {}",
|
||||
limit.unwrap_or(DEFAULT_LIMIT)
|
||||
),
|
||||
(channel_item_id, provider_id),
|
||||
|
|
@ -154,8 +177,8 @@ impl Mysql {
|
|||
&self,
|
||||
channel_item_id: u64,
|
||||
provider_id: Option<u64>,
|
||||
title: String,
|
||||
description: String,
|
||||
title: &str,
|
||||
description: &str,
|
||||
) -> Result<u64, Error> {
|
||||
let mut c = self.pool.get_conn()?;
|
||||
c.exec_drop(
|
||||
|
|
@ -164,6 +187,21 @@ impl Mysql {
|
|||
)?;
|
||||
Ok(c.last_insert_id())
|
||||
}
|
||||
|
||||
pub fn provider_by_name(&self, name: &str) -> Result<Option<Provider>, Error> {
|
||||
self.pool.get_conn()?.exec_first(
|
||||
"SELECT `provider_id`,
|
||||
`name`
|
||||
FROM `provider` WHERE `name` = ?",
|
||||
(name,),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn insert_provider(&self, name: &str) -> Result<u64, Error> {
|
||||
let mut c = self.pool.get_conn()?;
|
||||
c.exec_drop("INSERT INTO `provider` SET `name` = ?", (name,))?;
|
||||
Ok(c.last_insert_id())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, FromRow)]
|
||||
|
|
@ -194,6 +232,12 @@ pub struct Content {
|
|||
pub description: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, FromRow)]
|
||||
pub struct Provider {
|
||||
pub provider_id: u64,
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
pub enum Sort {
|
||||
Asc,
|
||||
Desc,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue