1use super::super::core::*;
2use super::super::users::*;
3use crate::fs::FS_PATH;
4use iroh_docs::sync::Entry;
5use iroh_docs::AuthorId;
6use log::error;
7use native_db::*;
8use native_model::{native_model, Model};
9use serde::{Deserialize, Serialize};
10use std::hash::{Hash, Hasher};
11use std::{
12 collections::{HashMap, HashSet},
13 path::PathBuf,
14 str::FromStr,
15 sync::{Arc, LazyLock},
16 time::SystemTime,
17};
18use tantivy::{
19 directory::MmapDirectory,
20 schema::{Field, Schema, Value, FAST, STORED, TEXT},
21 Directory, Index, IndexReader, IndexWriter, TantivyDocument, Term,
22};
23use tokio::sync::Mutex;
24use url::Url;
25
26pub(crate) static POST_INDEX_PATH: LazyLock<PathBuf> =
27 LazyLock::new(|| PathBuf::from(FS_PATH).join("POST_INDEX"));
28pub(crate) static POST_SCHEMA: LazyLock<(Schema, HashMap<&str, Field>)> = LazyLock::new(|| {
29 let mut schema_builder = Schema::builder();
30 let fields = HashMap::from([
31 ("id", schema_builder.add_bytes_field("id", STORED)),
32 (
33 "author_id",
34 schema_builder.add_text_field("author_id", TEXT | STORED),
35 ),
36 ("path", schema_builder.add_text_field("path", TEXT | STORED)),
37 ("url", schema_builder.add_text_field("url", TEXT | STORED)),
38 (
39 "title",
40 schema_builder.add_text_field("title", TEXT | STORED),
41 ),
42 ("body", schema_builder.add_text_field("body", TEXT | STORED)),
43 ("tag", schema_builder.add_text_field("tag", TEXT | STORED)),
44 (
45 "timestamp",
46 schema_builder.add_date_field("timestamp", FAST),
47 ),
48 ]);
49 let schema = schema_builder.build();
50 (schema, fields)
51});
52pub(crate) static POST_INDEX: LazyLock<Index> = LazyLock::new(|| {
53 if let Err(e) = std::fs::create_dir_all(&*POST_INDEX_PATH) {
54 error!("{e}");
55 }
56 let mmap_directory: Box<dyn Directory> =
57 Box::new(MmapDirectory::open(&*POST_INDEX_PATH).unwrap());
58 Index::open_or_create(mmap_directory, POST_SCHEMA.0.clone()).unwrap()
59});
60pub(crate) static POST_INDEX_READER: LazyLock<IndexReader> =
61 LazyLock::new(|| POST_INDEX.reader().unwrap());
62pub(crate) static POST_INDEX_WRITER: LazyLock<Arc<Mutex<IndexWriter>>> =
63 LazyLock::new(|| Arc::new(Mutex::new(POST_INDEX.writer(50_000_000).unwrap())));
64
65#[derive(Serialize, Deserialize, Debug, Clone)]
66#[native_model(id = 2, version = 2)]
67#[native_db(
68 primary_key(primary_key -> (Vec<u8>, Vec<u8>))
69)]
70pub struct OkuPost {
72 pub entry: Entry,
74 pub note: OkuNote,
76}
77
78impl PartialEq for OkuPost {
79 fn eq(&self, other: &Self) -> bool {
80 self.primary_key() == other.primary_key()
81 }
82}
83impl Eq for OkuPost {}
84impl Hash for OkuPost {
85 fn hash<H: Hasher>(&self, state: &mut H) {
86 self.primary_key().hash(state);
87 }
88}
89
90impl From<OkuPost> for TantivyDocument {
91 fn from(value: OkuPost) -> Self {
92 let post_key: [Vec<u8>; 2] = value.primary_key().into();
93 let post_key_bytes = post_key.concat();
94
95 let mut doc = TantivyDocument::default();
96 doc.add_bytes(POST_SCHEMA.1["id"], &post_key_bytes);
97 doc.add_text(
98 POST_SCHEMA.1["author_id"],
99 crate::fs::util::fmt(value.entry.author()),
100 );
101 doc.add_text(
102 POST_SCHEMA.1["path"],
103 String::from_utf8_lossy(value.entry.key()),
104 );
105 doc.add_text(POST_SCHEMA.1["url"], value.note.url.to_string());
106 doc.add_text(POST_SCHEMA.1["title"], value.note.title);
107 doc.add_text(POST_SCHEMA.1["body"], value.note.body);
108 for tag in value.note.tags {
109 doc.add_text(POST_SCHEMA.1["tag"], tag);
110 }
111 doc.add_date(
112 POST_SCHEMA.1["timestamp"],
113 tantivy::DateTime::from_timestamp_micros(value.entry.timestamp() as i64),
114 );
115 doc
116 }
117}
118
119impl TryFrom<TantivyDocument> for OkuPost {
120 type Error = anyhow::Error;
121
122 fn try_from(value: TantivyDocument) -> Result<Self, Self::Error> {
123 let author_id = AuthorId::from_str(
124 value
125 .get_first(POST_SCHEMA.1["author_id"])
126 .ok_or(anyhow::anyhow!("No author ID for document in index … "))?
127 .as_str()
128 .ok_or(anyhow::anyhow!("No author ID for document in index … "))?,
129 )?;
130 let path = value
131 .get_first(POST_SCHEMA.1["path"])
132 .ok_or(anyhow::anyhow!("No path for document in index … "))?
133 .as_str()
134 .ok_or(anyhow::anyhow!("No path for document in index … "))?
135 .to_string();
136 DATABASE
137 .get_post(&author_id, &path.clone().into())
138 .ok()
139 .flatten()
140 .ok_or(anyhow::anyhow!(
141 "No post with author {} and path {} found … ",
142 author_id,
143 path
144 ))
145 }
146}
147
148impl OkuPost {
149 pub(crate) fn primary_key(&self) -> (Vec<u8>, Vec<u8>) {
150 (
151 self.entry.author().as_bytes().to_vec(),
152 self.entry.key().to_vec(),
153 )
154 }
155
156 pub(crate) fn index_term(&self) -> Term {
157 let post_key: [Vec<u8>; 2] = self.primary_key().into();
158 let post_key_bytes = post_key.concat();
159 Term::from_field_bytes(POST_SCHEMA.1["id"], &post_key_bytes)
160 }
161
162 pub fn user(&self) -> OkuUser {
164 match DATABASE.get_user(&self.entry.author()).ok().flatten() {
165 Some(user) => user,
166 None => OkuUser {
167 author_id: self.entry.author(),
168 last_fetched: SystemTime::now(),
169 posts: vec![self.entry.clone()],
170 identity: None,
171 },
172 }
173 }
174}
175
176#[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
177pub struct OkuNote {
179 pub url: Url,
181 pub title: String,
183 pub body: String,
185 pub tags: HashSet<String>,
187}
188
189impl OkuNote {
190 pub fn post_path(&self) -> String {
192 Self::post_path_from_url(&self.url.to_string())
193 }
194
195 pub fn post_path_from_url(url: &String) -> String {
197 format!("/posts/{}.toml", bs58::encode(url.as_bytes()).into_string())
198 }
199
200 pub fn embedding_path(&self) -> String {
202 Self::embedding_path_from_url(&self.url.to_string())
203 }
204
205 pub fn archive_path(&self) -> String {
207 Self::archive_path_from_url(&self.url.to_string())
208 }
209
210 pub fn embedding_path_from_url(url: &String) -> String {
212 format!(
213 "/embeddings/{}.json",
214 bs58::encode(url.as_bytes()).into_string()
215 )
216 }
217
218 pub fn archive_path_from_url(url: &String) -> String {
220 format!("/archives/{}", bs58::encode(url.as_bytes()).into_string())
221 }
222}