crabmail

Static HTML email archive viewer in Rust
git clone git://git.alexwennerberg.com/crabmail
Log | Files | Refs | README | LICENSE

commit 30d322d2d071e0fb574da913b6739002ada8d7c2
parent 7763dbe7b90984fdd21e1fe941c91de32321251f
Author: alex wennerberg <alex@alexwennerberg.com>
Date:   Mon, 28 Mar 2022 18:52:58 -0700

Cleanup some bugz

Diffstat:
MREADME.md | 7+++++++
MTODO | 3+++
Dsrc/jwzthreading.rs | 138-------------------------------------------------------------------------------
Msrc/main.rs | 8+++++++-
Msrc/templates/html.rs | 2+-
Msrc/templates/mod.rs | 2+-
Msrc/threading.rs | 26+++++++++++++-------------
7 files changed, 32 insertions(+), 154 deletions(-)

diff --git a/README.md b/README.md @@ -45,6 +45,13 @@ If you want to use an mbox file (for example, to mirror another archive), use [mblaze](https://github.com/leahneukirchen/mblaze) to import it into a maildir. Mblaze also has some tools that you may find supplementary to crabmail. +For example: +``` +mkdir -p lists/mylist/cur lists/mylist/tmp lists/mylist/new +mdeliver -M lists/mylist < mylist.mbox +crabmail lists +``` + Open `site/index.html` in a web browser ## Contributing diff --git a/TODO b/TODO @@ -1,12 +1,15 @@ TODO ==== +fix threading algorithM!!!!!!!! atom get item href working on list fix bottom anchor/ latest link check for html escape bugz more thoroughly? URL encode spaces in links for gemini export +remove intermediate struct between message -> html. waste of time later ---- +Create optional user indexes that link to all their posts Duplicate ID verification: warn on duplicate ID, use first received-date. This is to prevent someone overwriting old emails secretly reference mblaze command, add examples to readme fix docs diff --git a/src/jwzthreading.rs b/src/jwzthreading.rs @@ -1,138 +0,0 @@ -// jwz threading https://www.jwz.org/doc/threading.html -// -// -// implementing this in Rust is a nightmare and makes me feel bad about myself so I am probably -// going to do something simpler - -use anyhow::{Context, Result}; -use mail_parser::parsers::fields::thread::thread_name; -use mail_parser::Message; -use std::cell::RefCell; -use std::collections::HashMap; -use std::fmt::Display; -use std::rc::{Rc, Weak}; - -#[derive(Default, Clone)] -struct JwzContainer { - message: Option<JwzMessage>, - parent: Option<MessageId>, - children: Vec<MessageId>, - next: Option<MessageId>, -} - -impl JwzContainer {} - -#[derive(Default, Clone)] -pub struct JwzMessage { - id: String, - subject: String, - references: Vec<String>, -} - -impl JwzMessage { - // TODO move out of here - pub fn parse(msg: Message) -> Result<Self> { - let id = msg - .get_message_id() - .context("Missing message ID")? - .to_owned(); - let subject = msg.get_subject().context("Missing subject")?.to_owned(); - let references = vec![]; - Ok(JwzMessage { - id, - subject, - references, - }) - } -} - -type MessageId = String; - -#[derive(Default, Clone)] -pub struct List { - id_table: HashMap<MessageId, JwzContainer>, - subject_table: HashMap<String, MessageId>, -} - -impl List { - pub fn new() -> Self { - List::default() - } - - // Todo enumerate errors or something - pub fn add_email(&mut self, jwz_msg: JwzMessage) { - let msg_id = jwz_msg.id.clone(); - let references = jwz_msg.references.clone(); - // 1A - if self - .id_table - .get(&msg_id) - .and_then(|c| Some(c.message.is_none())) - == Some(true) - { - let cont = self.id_table.get_mut(&msg_id).unwrap(); - cont.message = Some(jwz_msg) - } else { - let new_container = JwzContainer { - message: Some(jwz_msg), - ..Default::default() - }; - self.id_table.insert(msg_id.clone(), new_container); - } - // 1B - for pair in references.windows(2) { - // TODO check loop - let parent = self.container_from_id(&pair[0]); - if !parent.children.contains(&pair[1]) { - parent.children.push(pair[1].to_owned()); - } - let child = self.container_from_id(&pair[1]); - child.parent = Some(pair[0].to_owned()); - } - - // 1C - if references.len() > 0 { - let container = self.container_from_id(&msg_id); - container.parent = Some(references[references.len() - 1].clone()); - } - - // 2-4 - let root: Vec<&JwzContainer> = self - .id_table - .iter() - .filter_map(|(k, v)| { - if v.parent.is_none() { - return Some(v); - } - return None; - }) - .filter(|c| c.children.len() > 0) - // TODO Filter and promote if no message (4B) - .collect(); - - // 5 - for item in root { - // TODO If there is no message in the Container... - // ^^^ WHY WOULD THIS HAPPEN JWZ?? - if let Some(i) = &item.message { - let threadn = thread_name(&i.subject); - if threadn == "" { - continue; - } - } - } - } - - fn container_from_id(&mut self, msg_id: &str) -> &mut JwzContainer { - match self.id_table.get(msg_id) { - Some(c) => self.id_table.get_mut(msg_id).unwrap(), - None => { - self.id_table - .insert(msg_id.to_string(), JwzContainer::default()); - self.id_table.get_mut(msg_id).unwrap() - } - } - } - - pub fn finalize(&mut self) {} -} diff --git a/src/main.rs b/src/main.rs @@ -210,7 +210,13 @@ fn main() -> Result<()> { { let data = std::fs::read(f.path())?; // TODO move these 2 lines to dirreader - let msg = mail_parser::Message::parse(&data).context("Missing mail bytes")?; + let msg = match mail_parser::Message::parse(&data) { + Some(e) => e, + None => { + println!("Could not parse message {:?}", f.path()); + continue; + } + }; list.add_email(&msg, f.path().to_path_buf()); } list.finalize(); diff --git a/src/templates/html.rs b/src/templates/html.rs @@ -124,7 +124,7 @@ impl List { HEADER, r#" <h1 class="page-title"> - {title}<a href="atom.xml"><img alt="Atom feed" src='{rss_svg}' /></a> + {title} <a href="atom.xml"><img alt="Atom feed" src='{rss_svg}' /></a> </h1> {description}<br> <a href="{mailto:list_email}">{list_email}</a> diff --git a/src/templates/mod.rs b/src/templates/mod.rs @@ -3,4 +3,4 @@ pub mod html; pub mod util; pub mod xml; -const PAGE_SIZE: usize = 25; +const PAGE_SIZE: usize = 50; diff --git a/src/threading.rs b/src/threading.rs @@ -1,4 +1,6 @@ // Simple threading algorithm based on https://datatracker.ietf.org/doc/html/rfc8621 +// Only threads based on subject +// An alternative is implementing https://www.jwz.org/doc/threading.html which is a PITA // A thread is a collection of messages sorted by date. // Assumes msg can be found on disk at `path` -- should be made more abstract to handle other mail // stores @@ -34,18 +36,20 @@ impl ThreadIdx { // TODO should be format agnostic (use internal representation of email) pub fn add_email(&mut self, msg: &Message, path: PathBuf) { let msg_id = msg.get_message_id().unwrap(); // TODO unwrap - if self.id_index.get(msg_id).is_some() { - // TODO handle duplicate id case - panic!("duplicate msg id found") - } - let t = msg + let t = match msg .get_received() .as_datetime_ref() .or_else(|| msg.get_date()) - .unwrap(); // TODO fix unwrap + { + Some(t) => t, + None => return, + }; + if self.id_index.get(msg_id).is_some() { + // TODO handle duplicate msg case. Don't allow overwrites + return; + } + // TODO fix unwrap let time = t.to_timestamp().unwrap_or(-1); // todo unwrap. shouldnt occur. trying to change upstream https://github.com/stalwartlabs/mail-parser/pull/15 - let in_reply_to = msg.get_in_reply_to().as_text_ref(); - let last_reference = msg.get_in_reply_to().as_text_ref(); let thread_name = thread_name(msg.get_subject().unwrap_or("(No Subject)")); let msg = Msg { @@ -53,12 +57,8 @@ impl ThreadIdx { path, time, }; - let reference = in_reply_to.or_else(|| last_reference); + let idx = self.subject_index.get(thread_name); - let idx = match reference { - Some(id) => self.id_index.get(id), - None => self.subject_index.get(thread_name), - }; let id = match idx { Some(i) => { self.threads[*i].push(msg);