crabmail

Static HTML email archive viewer in Rust
git clone git://git.alexwennerberg.com/crabmail
Log | Files | Refs | README | LICENSE

commit e47eac8488273afd15cd045420f2131a94212250
parent 63019457f181a9299d8b24ee73f9b634c13503c1
Author: alex wennerberg <alex@alexwennerberg.com>
Date:   Sun, 12 Dec 2021 12:34:01 -0800

cleanup code

Diffstat:
MREADME.md | 1-
Mcrabmail/Cargo.lock | 20++++++++++++++++++++
Mcrabmail/Cargo.toml | 1+
Mcrabmail/src/filters.rs | 59+++++++++++++++++++++++++----------------------------------
Mcrabmail/src/main.rs | 140+++++++++++++++++++++++++++++--------------------------------------------------
Mcrabmail/templates/thread.html | 8++++----
Mcrabmail/templates/threadlist.html | 8++++++--
7 files changed, 107 insertions(+), 130 deletions(-)

diff --git a/README.md b/README.md @@ -4,4 +4,3 @@ Crabmail is a set of tools for self-hosting mailing list archives. Each tool can * Crabmail: A static HTML email viewer * Crabmail-sync: A simple pull-based mbox sync -* Crabmail-admin: a web application for managing mailing lists diff --git a/crabmail/Cargo.lock b/crabmail/Cargo.lock @@ -144,6 +144,7 @@ dependencies = [ "anyhow", "askama", "mailparse", + "mbox-reader", "pico-args", ] @@ -314,12 +315,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" [[package]] +name = "mbox-reader" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6231e973c0a8caceed71fac7355555012ba73fe230365989b298b36022e9e2ab" +dependencies = [ + "memmap", +] + +[[package]] name = "memchr" version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" [[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi", +] + +[[package]] name = "new_debug_unreachable" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/crabmail/Cargo.toml b/crabmail/Cargo.toml @@ -12,6 +12,7 @@ html = ["ammonia"] [dependencies] mailparse = "0.13" +mbox-reader = "0.2.0" #unamaintained, should remove dep pico-args = "0.4.1" askama = "0.10" anyhow = "1.0" diff --git a/crabmail/src/filters.rs b/crabmail/src/filters.rs @@ -1,38 +1,29 @@ use mailparse::{parse_mail, MailHeaderMap, ParsedMail}; -pub fn get_header(email: &&ParsedMail, header: &str) -> askama::Result<String> { - Ok(email - .headers - .get_first_value(header) - .unwrap_or("".to_string())) -} +// // NOTE this function is currently unsafe +// pub fn get_body(email: &&ParsedMail) -> Result<String> { +// let core_email = email.subparts.get(0).unwrap_or(email); -// NOTE this function is currently unsafe -pub fn get_body(email: &&ParsedMail) -> askama::Result<String> { - let core_email = email.subparts.get(0).unwrap_or(email); +// #[cfg(feature = "html")] +// { +// use ammonia; +// use std::collections::HashSet; +// use std::iter::FromIterator; +// // TODO dont initialize each time +// // TODO sanitize id, classes, etc. +// let tags = HashSet::from_iter(vec!["a", "b", "i", "br", "p", "span", "u"]); +// if core_email.ctype.mimetype == "text/html" { +// let a = ammonia::Builder::new() +// .tags(tags) +// .clean(&core_email.get_body().unwrap_or("".to_string())) +// .to_string(); +// return Ok(a); +// } +// } - #[cfg(feature = "html")] - { - use ammonia; - use std::collections::HashSet; - use std::iter::FromIterator; - // TODO dont initialize each time - // TODO sanitize id, classes, etc. - let tags = HashSet::from_iter(vec!["a", "b", "i", "br", "p", "span", "u"]); - if core_email.ctype.mimetype == "text/html" { - let a = ammonia::Builder::new() - .tags(tags) - .clean(&core_email.get_body().unwrap_or("".to_string())) - .to_string(); - return Ok(a); - } - } - - if core_email.ctype.mimetype == "text/plain" { - // TODO html escape this. - return Ok(core_email.get_body().unwrap_or("".to_string())); - } - return Ok(String::from("[No valid body found]")); -} - -// pub fn get_attachment(email: &&ParsedMail) -> askama::Result<String> {} +// if core_email.ctype.mimetype == "text/plain" { +// // TODO html escape this. +// return Ok(core_email.get_body().unwrap_or("".to_string())); +// } +// return Ok(String::from("[No valid body found]")); +// } diff --git a/crabmail/src/main.rs b/crabmail/src/main.rs @@ -1,7 +1,9 @@ -use anyhow::Result; +use anyhow::{Context, Result}; use askama::Template; use mailparse::{dateparse, parse_headers, parse_mail, MailHeaderMap, ParsedMail}; +use mbox_reader::MboxFile; use std::collections::HashMap; +use std::fmt; use std::fs::{File, OpenOptions}; use std::io::prelude::*; @@ -11,16 +13,47 @@ mod utils; const HELP: &str = "\ Usage: crabmail -TODO +-m --mbox input mbox file "; // TODO be more clear about the expected input types // maildi -#[derive(Debug)] -struct RawEmail { - date: i64, // unix - data: Vec<u8>, +// Not a "raw email" struct, but an email object that can be represented by +// crabmail. +struct Email { + // TODO allocs + id: String, + from: String, + subject: String, + in_reply_to: Option<String>, + date: i64, // unix epoch + body: String, + // raw_email: String, +} + +fn local_parse_email(data: &[u8]) -> Result<Email> { + let parsed_mail = parse_mail(data)?; + let headers = parsed_mail.headers; + let id = headers + .get_first_value("message-id") + .context("No message ID")?; + // Assume 1 in-reply-to header. a reasonable assumption + let in_reply_to = headers.get_first_value("in-reply-to"); + let subject = headers + .get_first_value("subject") + .unwrap_or("(no subject)".to_owned()); + let date = dateparse(&headers.get_first_value("date").context("No date header")?)?; + let from = headers.get_first_value("from").context("No from header")?; + let body = "lorem ipsum".to_owned(); + return Ok(Email { + id, + in_reply_to, + from, + subject, + date, + body, + }); } // TODO refactor @@ -35,88 +68,17 @@ fn main() -> Result<()> { let out_dir = pargs .opt_value_from_os_str(["-d", "--dir"], parse_path)? .unwrap_or("site".into()); - // this function doesnt do what I want - let in_mboxes = pargs.values_from_os_str(["-m", "--mail"], parse_path)?; - if in_mboxes.len() == 0 { - println!("Please provide an input folder"); - std::process::exit(1); - } - - // Maps thread msg id to all items in the thread - let mut threads: HashMap<String, Vec<RawEmail>> = HashMap::new(); - - for file in std::fs::read_dir(&in_mboxes[0])? { - // assuming one email per file for now - let mut buffer = Vec::new(); - let mut f = File::open(&file?.path())?; - f.read_to_end(&mut buffer)?; - let (headers, _) = parse_headers(&buffer)?; - let msg_id = headers - .get_first_value("message-id") - .unwrap_or(String::new()); // TODO error + let in_mbox = pargs.value_from_os_str(["-m", "--mbox"], parse_path)?; - // TOOD handle case where in reply to is not the root message of the thread - let in_reply_to = headers.get_first_value("in-reply-to"); - // Note that date can be forged by the client - let date = dateparse( - &headers - .get_first_value("date") - .unwrap_or(String::from("-1")), - )?; + let mbox = MboxFile::from_file(&in_mbox)?; - let message = RawEmail { - date: date, - data: buffer, - }; - - // TODO clean message id - match in_reply_to { - Some(irt) => { - if threads.get(&irt).is_none() { - threads.insert(irt, vec![message]); - } else { - threads.get_mut(&irt).unwrap().push(message); - } - } - None => { - threads.insert(msg_id, vec![message]); - } - } - } - - // sort items in each thread by date - for (_, value) in &mut threads { - value.sort_by(|a, b| a.date.cmp(&b.date)); - } + let mut mail_index: HashMap<String, Email> = HashMap::new(); + let mut reply_index: HashMap<String, String> = HashMap::new(); - // TODO generate thread list sorted by most recent email in thread - std::fs::create_dir(&out_dir).ok(); - let thread_dir = &out_dir.join("threads"); - std::fs::create_dir(thread_dir).ok(); - let mut file = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .open(out_dir.join("index.html"))?; - let thread_list = ThreadList { - // assumes first message chronologically is the root - messages: threads - .values() - .map(|t| parse_mail(&t[0].data).unwrap()) - .collect(), - }; - file.write(thread_list.render()?.as_bytes()).ok(); - // TODO prevent path traversal bug from ./.. in message id - for (key, value) in threads { - let mut file = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .open(thread_dir.join(&key))?; - let thread = Thread { - messages: value.iter().map(|m| parse_mail(&m.data).unwrap()).collect(), - }; - file.write(thread.render()?.as_bytes()).ok(); + for entry in mbox.iter() { + let buffer = entry.message().unwrap(); + // unwrap or warn + let email = local_parse_email(buffer)?; } Ok(()) } @@ -127,13 +89,13 @@ fn parse_path(s: &std::ffi::OsStr) -> Result<std::path::PathBuf, &'static str> { #[derive(Template)] #[template(path = "thread.html")] -struct Thread<'a> { - messages: Vec<ParsedMail<'a>>, +struct Thread { + messages: Vec<Email>, } #[derive(Template)] #[template(path = "threadlist.html")] -struct ThreadList<'a> { +struct ThreadList { // message root - messages: Vec<ParsedMail<'a>>, + messages: Vec<Email>, } diff --git a/crabmail/templates/thread.html b/crabmail/templates/thread.html @@ -5,11 +5,11 @@ <div> <div class="message"> {% for message in messages %} - <h3>{{message|get_header("subject")}}</h3> - <b>From: </b>{{message|get_header("from")}}<br> - <b>Date: </b>{{message|get_header("date")}}<br> + <h3>{{message.subject}}</h3> + <b>From: </b>{{message.from}}<br> + <b>Date: </b>{{message.date}}<br> <div class="email-body"> - {{message|get_body|safe}} + {{message.body}} </div> {% endfor %} </div> diff --git a/crabmail/templates/threadlist.html b/crabmail/templates/threadlist.html @@ -3,9 +3,13 @@ {% block content %} <div class="page-title"><h1>Crabmail Mailing List</h1></div> <div> + <table> {% for message in messages %} - <a href="threads/{{message|get_header("message-id")}}">{{message|get_header("subject")}}</a> {{message|get_header("from")}} {{message|get_header("date")}} - <br> + <tr> + <td><a href="threads/{{message.id}}">{{message.subject}}</a></td> + <td> {{message.from}}</td> + <td>{{message.date}}</td> {% endfor %} + </table> </div> {% endblock %}