crabmail

Static HTML email archive viewer in Rust
git clone git://git.alexwennerberg.com/crabmail
Log | Files | Refs | README | LICENSE

commit edb3c8f47b2788687f08bc6823351229390103b3
parent 4d2c5c777a587721fb1f91e4e00bad1940cbeebb
Author: alex wennerberg <alex@alexwennerberg.com>
Date:   Sat, 31 Jul 2021 16:48:52 -0700

Work on project skeleton

Diffstat:
MCargo.lock | 242++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
MCargo.toml | 1+
MREADME.md | 4++--
Asrc/filters.rs | 12++++++++++++
Msrc/main.rs | 152++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Msrc/utils.rs | 18+++++++++++++++---
Atemplates/base.html | 22++++++++++++++++++++++
Atemplates/fullmail.html | 0
Atemplates/shortmail.html | 0
Atemplates/thread.html | 15+++++++++++++++
Atemplates/threadlist.html | 11+++++++++++
11 files changed, 406 insertions(+), 71 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "anyhow" version = "1.0.40" @@ -7,6 +9,64 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28b2cd92db5cbd74e8e5028f7e27dd7aa3090e89e4f2a197cc7c8dfb69c7063b" [[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + +[[package]] +name = "askama" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d298738b6e47e1034e560e5afe63aa488fea34e25ec11b855a76f0d7b8e73134" +dependencies = [ + "askama_derive", + "askama_escape", + "askama_shared", +] + +[[package]] +name = "askama_derive" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2925c4c290382f9d2fa3d1c1b6a63fa1427099721ecca4749b154cc9c25522" +dependencies = [ + "askama_shared", + "proc-macro2", + "syn", +] + +[[package]] +name = "askama_escape" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90c108c1a94380c89d2215d0ac54ce09796823cca0fd91b299cfff3b33e346fb" + +[[package]] +name = "askama_shared" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2582b77e0f3c506ec4838a25fa8a5f97b9bed72bb6d3d272ea1c031d8bd373bc" +dependencies = [ + "askama_escape", + "humansize", + "nom", + "num-traits", + "percent-encoding", + "proc-macro2", + "quote", + "serde", + "syn", + "toml", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] name = "base64" version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -22,6 +82,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" [[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "bitvec" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8942c8d352ae1838c9dda0b0ca2ab657696ef2232a20147cf1b30ae1a9cb4321" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] name = "byteorder" version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -48,6 +126,7 @@ name = "crabmail" version = "0.1.0" dependencies = [ "anyhow", + "askama", "mailparse", "pico-args", ] @@ -62,10 +141,35 @@ dependencies = [ ] [[package]] +name = "funty" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" + +[[package]] +name = "humansize" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02296996cb8796d7c6e3bc2d9211b7802812d36999a51bb754123ead7d37d026" + +[[package]] +name = "lexical-core" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if", + "ryu", + "static_assertions", +] + +[[package]] name = "mailparse" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62db73ff1a42b0e3a8858cf0d5c183bdfc23491f7294ae4a8200c83577457386" +checksum = "c06f526fc13a50f46a3689a6f438cb833c59817c898bb40a3954f341ddf74ce1" dependencies = [ "base64 0.13.0", "charset", @@ -73,13 +177,147 @@ dependencies = [ ] [[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "nom" +version = "6.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c5c51b9083a3c620fa67a2a635d1ce7d95b897e957d6b28ff9a5da960a103a6" +dependencies = [ + "bitvec", + "funty", + "lexical-core", + "memchr", + "version_check", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] name = "pico-args" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d7afeb98c5a10e0bffcc7fc16e105b04d06729fac5fd6384aebf7ff5cb5a67d" [[package]] +name = "proc-macro2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + +[[package]] name = "quoted_printable" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1238256b09923649ec89b08104c4dfe9f6cb2fea734a5db5384e44916d59e9c5" + +[[package]] +name = "radium" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "serde" +version = "1.0.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "1.0.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "toml" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa" +dependencies = [ + "serde", +] + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + +[[package]] +name = "wyz" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" diff --git a/Cargo.toml b/Cargo.toml @@ -11,4 +11,5 @@ mailparse = "0.13" # TODO feature flag html emails # ammonia = "3" pico-args = "0.4.1" +askama = "0.10" anyhow = "1.0" diff --git a/README.md b/README.md @@ -1,6 +1,6 @@ # crabmail An html mail archive, written in Rust. Similar to [Hypermail](https://github.com/hypermail-project/hypermail). -Notes: -* https://github.com/w3c/mailing-list-archives/issues/8 +THIS IS VERY EARLY IN DEVELOPMENT! +Probably you don't want to work on this until it is more complete diff --git a/src/filters.rs b/src/filters.rs @@ -0,0 +1,12 @@ +use mailparse::{parse_mail, MailHeaderMap, ParsedMail}; + +pub fn get_header(email: &&ParsedMail, header: &str) -> askama::Result<String> { + Ok(email + .headers + .get_first_value(header) + .unwrap_or("".to_string())) +} + +pub fn get_body(email: &&ParsedMail) -> askama::Result<String> { + Ok(email.get_body().unwrap_or("".to_string())) +} diff --git a/src/main.rs b/src/main.rs @@ -1,14 +1,16 @@ use anyhow::Result; -use mailparse::{parse_mail, MailHeaderMap, ParsedMail}; +use askama::Template; +use mailparse::{dateparse, parse_headers, parse_mail, MailHeaderMap, ParsedMail}; +use std::collections::HashMap; use std::fs::{File, OpenOptions}; use std::io::prelude::*; use std::path::Path; -use utils::EscapedHTML; +mod filters; mod utils; const HELP: &str = "\ -Usage: crabmail +Usage: crabmail (THIS STRING IS JUNK) FLAGS: -h, --help Prints this help information and exits. @@ -20,6 +22,16 @@ OPTIONS: -m, --mbox Mbox file, files, or directories to read in "; +// TODO be more clear about the expected input types +// maildi + +#[derive(Debug)] +struct RawEmail { + date: i64, // unix + data: Vec<u8>, +} + +// TODO refactor fn main() -> Result<()> { let mut pargs = pico_args::Arguments::from_env(); @@ -31,26 +43,81 @@ fn main() -> Result<()> { let out_dir = pargs .opt_value_from_os_str(["-d", "--dir"], parse_path)? .unwrap_or("site".into()); - // Create if does not exist - let in_mboxes = pargs.values_from_os_str(["-m", "--mbox"], parse_path)?; + // this function doesnt do what I want + let in_mboxes = pargs.values_from_os_str(["-m", "--mail"], parse_path)?; if in_mboxes.len() == 0 { - println!("Please provide one or more input files with the -m flag"); + println!("Please provide an input folder"); std::process::exit(1); } - for file in in_mboxes { + + // Maps thread msg id to all items in the thread + let mut threads: HashMap<String, Vec<RawEmail>> = HashMap::new(); + + for file in std::fs::read_dir(&in_mboxes[0])? { // assuming one email per file for now let mut buffer = Vec::new(); - let mut f = File::open(&file)?; + let mut f = File::open(&file?.path())?; f.read_to_end(&mut buffer)?; - let email = parse_mail(&buffer)?; - println!("{}", email_to_html(email)); + let (headers, _) = parse_headers(&buffer)?; + let msg_id = headers.get_first_value("message-id").unwrap(); // TODO error + let in_reply_to = headers.get_first_value("in-reply-to"); + // Note that date can be forged by the client + let date = dateparse( + &headers + .get_first_value("date") + .unwrap_or(String::from("-1")), + )?; + + let message = RawEmail { + date: date, + data: buffer, + }; + + // TODO clean message id + match in_reply_to { + Some(irt) => { + if threads.get(&irt).is_none() { + threads.insert(irt, vec![message]); + } else { + threads.get_mut(&irt).unwrap().push(message); + } + } + None => { + threads.insert(msg_id, vec![message]); + } + } } - if pargs.contains(["-t", "--threads"]) { - // TODO + + // sort items in each thread by date + for (key, mut value) in &mut threads { + value.sort_by(|a, b| a.date.cmp(&b.date)); } + // TODO generate thread list sorted by most recent email in thread std::fs::create_dir(&out_dir).ok(); - write_index(&out_dir)?; + let thread_dir = &out_dir.join("threads"); + std::fs::create_dir(thread_dir).ok(); + let mut file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(out_dir.join("index.html"))?; + let thread_list = ThreadList { + thread_ids: threads.keys().collect(), + }; + file.write(thread_list.render()?.as_bytes()); + // TODO prevent path traversal bug from ./.. in message id + for (key, value) in threads { + let mut file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(thread_dir.join(&key))?; + let thread = Thread { + messages: value.iter().map(|m| parse_mail(&m.data).unwrap()).collect(), + }; + file.write(thread.render()?.as_bytes()); + } Ok(()) } @@ -58,57 +125,14 @@ fn parse_path(s: &std::ffi::OsStr) -> Result<std::path::PathBuf, &'static str> { Ok(s.into()) } -fn email_to_html(email: ParsedMail) -> String { - // Probably if I was better at Rust I could rewrite these in a more efficient way, - // avoiding unnecessary allocs. Could use some of the lower-level features of - // the mailparse library - // - // could definitely improve the api here - let get_header_alloc = |f| email.headers.get_first_value(f).unwrap_or("".to_string()); - - return format!( - r#" -<b>From</b>: {from}<br> -<b>Subject</b>: {subject}<br> -<b>Date</b>: {date}<br> -<b>Message-Id</b>: {message_id} -<div id="body"> {body} </div> - "#, - from = EscapedHTML(&get_header_alloc("from")), - subject = EscapedHTML(&get_header_alloc("subject")), - date = EscapedHTML(&get_header_alloc("date")), - message_id = EscapedHTML(&get_header_alloc("message-id")), - // TODO replace with get body raw to avoid unneeded alloc. same w/ headers - body = EscapedHTML(&email.get_body().unwrap_or("".to_string())) - ); +#[derive(Template)] +#[template(path = "thread.html")] // using the template in this path, relative +struct Thread<'a> { + messages: Vec<ParsedMail<'a>>, } -// TODO set lang, title, etc -const HEADER: &[u8] = br#"<!DOCTYPE html> -<html> -<head> -<meta charset="utf-8"> -<link rel="stylesheet" type="text/css" href="/style.css"> -</head> -<body> -<main> -"#; - -const FOOTER: &[u8] = br#" -</main> -</body> -</html> -"#; - -// TODO write wrapper -fn write_index(out_dir: &Path) -> Result<()> { - let mut file = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .open(out_dir.join("index.html"))?; - file.write_all(HEADER)?; - file.write_all(b"<h1>Hello world</h1>")?; - file.write_all(FOOTER)?; - Ok(()) +#[derive(Template)] +#[template(path = "threadlist.html")] // using the template in this path, relative +struct ThreadList<'a> { + thread_ids: Vec<&'a String>, } diff --git a/src/utils.rs b/src/utils.rs @@ -3,9 +3,7 @@ use std::io; use std::io::Write; // Derived from https://github.com/raphlinus/pulldown-cmark/blob/master/src/escape.rs -// Don't use single quotes (') in any of my attributes -// Homebrewing my html templating to minimize dependencies -// !!!WIP!!! -- still need to add tests, audit security, etc +// Don't use single quotes (') in any of your attributes const fn create_html_escape_table() -> [u8; 256] { let mut table = [0; 256]; @@ -20,6 +18,7 @@ static HTML_ESCAPE_TABLE: [u8; 256] = create_html_escape_table(); static HTML_ESCAPES: [&str; 5] = ["", "&quot;", "&amp;", "&lt;", "&gt;"]; +#[derive(Debug)] pub struct EscapedHTML<'a>(pub &'a str); impl fmt::Display for EscapedHTML<'_> { @@ -51,3 +50,16 @@ impl fmt::Display for EscapedHTML<'_> { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add() { + assert_eq!( + format!("{}", EscapedHTML("<b>'hello&world\"</b>")), + "&lt;b&gt;'hello&amp;world&quot;&lt;/b&gt;".to_string() + ); + } +} diff --git a/templates/base.html b/templates/base.html @@ -0,0 +1,22 @@ +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <meta http-equiv="Permissions-Policy" content="interest-cohort=()"/> + <link rel="stylesheet" type="text/css" href="/style.css" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0,user-scalable=0" /> + <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>📧</text></svg> + <meta name="description" content="Crabmail mailing list"> + <title>Crabmail Mailing List</title> + {% block head %}{% endblock %} + </head> + <body> + <div id="content"> + {% block content %}{% endblock %} + </div> + <hr class="thin"> + <div class="footer"> + Archive generated with <a href="https://git.alexwennerberg.com/crabmail">crabmail</a> + </div> + </body> +</html> diff --git a/templates/fullmail.html b/templates/fullmail.html diff --git a/templates/shortmail.html b/templates/shortmail.html diff --git a/templates/thread.html b/templates/thread.html @@ -0,0 +1,15 @@ +{% extends "base.html" %} + +{% block content %} + <div class="page-title"><h1>Some thread</h1></div> + <div> + <div class="message"> + {% for message in messages %} + <h3>{{message|get_header("subject")}}</h3> + <b>From: </b>{{message|get_header("from")}}<br> + <b>Date: </b>{{message|get_header("date")}}<br> + {{message|get_body}} + {% endfor %} + </div> + </div> +{% endblock %} diff --git a/templates/threadlist.html b/templates/threadlist.html @@ -0,0 +1,11 @@ +{% extends "base.html" %} + +{% block content %} + <div class="page-title"><h1>Crabmail Mailing List</h1></div> + <div> + {% for thread in thread_ids %} + <a href="threads/{{thread}}">{{thread}}</a> + <br> + {% endfor %} + </div> +{% endblock %}