Skip to main content

oku_fs/database/posts/
core.rs

1use super::super::core::*;
2use super::super::users::*;
3use crate::fs::FS_PATH;
4use iroh_docs::sync::Entry;
5use iroh_docs::AuthorId;
6use log::error;
7use native_db::*;
8use native_model::{native_model, Model};
9use serde::{Deserialize, Serialize};
10use std::hash::{Hash, Hasher};
11use std::{
12    collections::{HashMap, HashSet},
13    path::PathBuf,
14    str::FromStr,
15    sync::{Arc, LazyLock},
16    time::SystemTime,
17};
18use tantivy::{
19    directory::MmapDirectory,
20    schema::{Field, Schema, Value, FAST, STORED, TEXT},
21    Directory, Index, IndexReader, IndexWriter, TantivyDocument, Term,
22};
23use tokio::sync::Mutex;
24use url::Url;
25
26pub(crate) static POST_INDEX_PATH: LazyLock<PathBuf> =
27    LazyLock::new(|| PathBuf::from(FS_PATH).join("POST_INDEX"));
28pub(crate) static POST_SCHEMA: LazyLock<(Schema, HashMap<&str, Field>)> = LazyLock::new(|| {
29    let mut schema_builder = Schema::builder();
30    let fields = HashMap::from([
31        ("id", schema_builder.add_bytes_field("id", STORED)),
32        (
33            "author_id",
34            schema_builder.add_text_field("author_id", TEXT | STORED),
35        ),
36        ("path", schema_builder.add_text_field("path", TEXT | STORED)),
37        ("url", schema_builder.add_text_field("url", TEXT | STORED)),
38        (
39            "title",
40            schema_builder.add_text_field("title", TEXT | STORED),
41        ),
42        ("body", schema_builder.add_text_field("body", TEXT | STORED)),
43        ("tag", schema_builder.add_text_field("tag", TEXT | STORED)),
44        (
45            "timestamp",
46            schema_builder.add_date_field("timestamp", FAST),
47        ),
48    ]);
49    let schema = schema_builder.build();
50    (schema, fields)
51});
52pub(crate) static POST_INDEX: LazyLock<Index> = LazyLock::new(|| {
53    if let Err(e) = std::fs::create_dir_all(&*POST_INDEX_PATH) {
54        error!("{e}");
55    }
56    let mmap_directory: Box<dyn Directory> =
57        Box::new(MmapDirectory::open(&*POST_INDEX_PATH).unwrap());
58    Index::open_or_create(mmap_directory, POST_SCHEMA.0.clone()).unwrap()
59});
60pub(crate) static POST_INDEX_READER: LazyLock<IndexReader> =
61    LazyLock::new(|| POST_INDEX.reader().unwrap());
62pub(crate) static POST_INDEX_WRITER: LazyLock<Arc<Mutex<IndexWriter>>> =
63    LazyLock::new(|| Arc::new(Mutex::new(POST_INDEX.writer(50_000_000).unwrap())));
64
65#[derive(Serialize, Deserialize, Debug, Clone)]
66#[native_model(id = 2, version = 2)]
67#[native_db(
68    primary_key(primary_key -> (Vec<u8>, Vec<u8>))
69)]
70/// An OkuNet post.
71pub struct OkuPost {
72    /// A record of a version of the post file.
73    pub entry: Entry,
74    /// The content of the post on OkuNet.
75    pub note: OkuNote,
76}
77
78impl PartialEq for OkuPost {
79    fn eq(&self, other: &Self) -> bool {
80        self.primary_key() == other.primary_key()
81    }
82}
83impl Eq for OkuPost {}
84impl Hash for OkuPost {
85    fn hash<H: Hasher>(&self, state: &mut H) {
86        self.primary_key().hash(state);
87    }
88}
89
90impl From<OkuPost> for TantivyDocument {
91    fn from(value: OkuPost) -> Self {
92        let post_key: [Vec<u8>; 2] = value.primary_key().into();
93        let post_key_bytes = post_key.concat();
94
95        let mut doc = TantivyDocument::default();
96        doc.add_bytes(POST_SCHEMA.1["id"], &post_key_bytes);
97        doc.add_text(
98            POST_SCHEMA.1["author_id"],
99            crate::fs::util::fmt(value.entry.author()),
100        );
101        doc.add_text(
102            POST_SCHEMA.1["path"],
103            String::from_utf8_lossy(value.entry.key()),
104        );
105        doc.add_text(POST_SCHEMA.1["url"], value.note.url.to_string());
106        doc.add_text(POST_SCHEMA.1["title"], value.note.title);
107        doc.add_text(POST_SCHEMA.1["body"], value.note.body);
108        for tag in value.note.tags {
109            doc.add_text(POST_SCHEMA.1["tag"], tag);
110        }
111        doc.add_date(
112            POST_SCHEMA.1["timestamp"],
113            tantivy::DateTime::from_timestamp_micros(value.entry.timestamp() as i64),
114        );
115        doc
116    }
117}
118
119impl TryFrom<TantivyDocument> for OkuPost {
120    type Error = anyhow::Error;
121
122    fn try_from(value: TantivyDocument) -> Result<Self, Self::Error> {
123        let author_id = AuthorId::from_str(
124            value
125                .get_first(POST_SCHEMA.1["author_id"])
126                .ok_or(anyhow::anyhow!("No author ID for document in index … "))?
127                .as_str()
128                .ok_or(anyhow::anyhow!("No author ID for document in index … "))?,
129        )?;
130        let path = value
131            .get_first(POST_SCHEMA.1["path"])
132            .ok_or(anyhow::anyhow!("No path for document in index … "))?
133            .as_str()
134            .ok_or(anyhow::anyhow!("No path for document in index … "))?
135            .to_string();
136        DATABASE
137            .get_post(&author_id, &path.clone().into())
138            .ok()
139            .flatten()
140            .ok_or(anyhow::anyhow!(
141                "No post with author {} and path {} found … ",
142                author_id,
143                path
144            ))
145    }
146}
147
148impl OkuPost {
149    pub(crate) fn primary_key(&self) -> (Vec<u8>, Vec<u8>) {
150        (
151            self.entry.author().as_bytes().to_vec(),
152            self.entry.key().to_vec(),
153        )
154    }
155
156    pub(crate) fn index_term(&self) -> Term {
157        let post_key: [Vec<u8>; 2] = self.primary_key().into();
158        let post_key_bytes = post_key.concat();
159        Term::from_field_bytes(POST_SCHEMA.1["id"], &post_key_bytes)
160    }
161
162    /// Obtain the author of this post from the OkuNet database.
163    pub fn user(&self) -> OkuUser {
164        match DATABASE.get_user(&self.entry.author()).ok().flatten() {
165            Some(user) => user,
166            None => OkuUser {
167                author_id: self.entry.author(),
168                last_fetched: SystemTime::now(),
169                posts: vec![self.entry.clone()],
170                identity: None,
171            },
172        }
173    }
174}
175
176#[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
177/// A note left by an Oku user regarding some URL-addressed content.
178pub struct OkuNote {
179    /// The URL the note is regarding.
180    pub url: Url,
181    /// The title of the note.
182    pub title: String,
183    /// The body of the note.
184    pub body: String,
185    /// A list of tags associated with the note.
186    pub tags: HashSet<String>,
187}
188
189impl OkuNote {
190    /// Generate a post path for the note.
191    pub fn post_path(&self) -> String {
192        Self::post_path_from_url(&self.url.to_string())
193    }
194
195    /// Generate a post path using a URL.
196    pub fn post_path_from_url(url: &String) -> String {
197        format!("/posts/{}.toml", bs58::encode(url.as_bytes()).into_string())
198    }
199
200    /// Generate an embedding path for the note.
201    pub fn embedding_path(&self) -> String {
202        Self::embedding_path_from_url(&self.url.to_string())
203    }
204
205    /// Generate an archive path for the note.
206    pub fn archive_path(&self) -> String {
207        Self::archive_path_from_url(&self.url.to_string())
208    }
209
210    /// Generate an embedding path using a URL.
211    pub fn embedding_path_from_url(url: &String) -> String {
212        format!(
213            "/embeddings/{}.json",
214            bs58::encode(url.as_bytes()).into_string()
215        )
216    }
217
218    /// Generate an archive path using a URL.
219    pub fn archive_path_from_url(url: &String) -> String {
220        format!("/archives/{}", bs58::encode(url.as_bytes()).into_string())
221    }
222}