commit edb3c8f47b2788687f08bc6823351229390103b3
parent 4d2c5c777a587721fb1f91e4e00bad1940cbeebb
Author: alex wennerberg <alex@alexwennerberg.com>
Date: Sat, 31 Jul 2021 16:48:52 -0700
Work on project skeleton
Diffstat:
11 files changed, 406 insertions(+), 71 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
@@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
+version = 3
+
[[package]]
name = "anyhow"
version = "1.0.40"
@@ -7,6 +9,64 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28b2cd92db5cbd74e8e5028f7e27dd7aa3090e89e4f2a197cc7c8dfb69c7063b"
[[package]]
+name = "arrayvec"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
+
+[[package]]
+name = "askama"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d298738b6e47e1034e560e5afe63aa488fea34e25ec11b855a76f0d7b8e73134"
+dependencies = [
+ "askama_derive",
+ "askama_escape",
+ "askama_shared",
+]
+
+[[package]]
+name = "askama_derive"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca2925c4c290382f9d2fa3d1c1b6a63fa1427099721ecca4749b154cc9c25522"
+dependencies = [
+ "askama_shared",
+ "proc-macro2",
+ "syn",
+]
+
+[[package]]
+name = "askama_escape"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90c108c1a94380c89d2215d0ac54ce09796823cca0fd91b299cfff3b33e346fb"
+
+[[package]]
+name = "askama_shared"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2582b77e0f3c506ec4838a25fa8a5f97b9bed72bb6d3d272ea1c031d8bd373bc"
+dependencies = [
+ "askama_escape",
+ "humansize",
+ "nom",
+ "num-traits",
+ "percent-encoding",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "syn",
+ "toml",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
+
+[[package]]
name = "base64"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -22,6 +82,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
[[package]]
+name = "bitflags"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
+
+[[package]]
+name = "bitvec"
+version = "0.19.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8942c8d352ae1838c9dda0b0ca2ab657696ef2232a20147cf1b30ae1a9cb4321"
+dependencies = [
+ "funty",
+ "radium",
+ "tap",
+ "wyz",
+]
+
+[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -48,6 +126,7 @@ name = "crabmail"
version = "0.1.0"
dependencies = [
"anyhow",
+ "askama",
"mailparse",
"pico-args",
]
@@ -62,10 +141,35 @@ dependencies = [
]
[[package]]
+name = "funty"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7"
+
+[[package]]
+name = "humansize"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02296996cb8796d7c6e3bc2d9211b7802812d36999a51bb754123ead7d37d026"
+
+[[package]]
+name = "lexical-core"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe"
+dependencies = [
+ "arrayvec",
+ "bitflags",
+ "cfg-if",
+ "ryu",
+ "static_assertions",
+]
+
+[[package]]
name = "mailparse"
-version = "0.13.4"
+version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62db73ff1a42b0e3a8858cf0d5c183bdfc23491f7294ae4a8200c83577457386"
+checksum = "c06f526fc13a50f46a3689a6f438cb833c59817c898bb40a3954f341ddf74ce1"
dependencies = [
"base64 0.13.0",
"charset",
@@ -73,13 +177,147 @@ dependencies = [
]
[[package]]
+name = "memchr"
+version = "2.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
+
+[[package]]
+name = "nom"
+version = "6.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c5c51b9083a3c620fa67a2a635d1ce7d95b897e957d6b28ff9a5da960a103a6"
+dependencies = [
+ "bitvec",
+ "funty",
+ "lexical-core",
+ "memchr",
+ "version_check",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
+
+[[package]]
name = "pico-args"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d7afeb98c5a10e0bffcc7fc16e105b04d06729fac5fd6384aebf7ff5cb5a67d"
[[package]]
+name = "proc-macro2"
+version = "1.0.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
name = "quoted_printable"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1238256b09923649ec89b08104c4dfe9f6cb2fea734a5db5384e44916d59e9c5"
+
+[[package]]
+name = "radium"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8"
+
+[[package]]
+name = "ryu"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
+
+[[package]]
+name = "serde"
+version = "1.0.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.126"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "syn"
+version = "1.0.74"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
+[[package]]
+name = "tap"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
+
+[[package]]
+name = "toml"
+version = "0.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
+
+[[package]]
+name = "version_check"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
+
+[[package]]
+name = "wyz"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214"
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,4 +11,5 @@ mailparse = "0.13"
# TODO feature flag html emails
# ammonia = "3"
pico-args = "0.4.1"
+askama = "0.10"
anyhow = "1.0"
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
# crabmail
An html mail archive, written in Rust. Similar to [Hypermail](https://github.com/hypermail-project/hypermail).
-Notes:
-* https://github.com/w3c/mailing-list-archives/issues/8
+THIS IS VERY EARLY IN DEVELOPMENT!
+Probably you don't want to work on this until it is more complete
diff --git a/src/filters.rs b/src/filters.rs
@@ -0,0 +1,12 @@
+use mailparse::{parse_mail, MailHeaderMap, ParsedMail};
+
+pub fn get_header(email: &&ParsedMail, header: &str) -> askama::Result<String> {
+ Ok(email
+ .headers
+ .get_first_value(header)
+ .unwrap_or("".to_string()))
+}
+
+pub fn get_body(email: &&ParsedMail) -> askama::Result<String> {
+ Ok(email.get_body().unwrap_or("".to_string()))
+}
diff --git a/src/main.rs b/src/main.rs
@@ -1,14 +1,16 @@
use anyhow::Result;
-use mailparse::{parse_mail, MailHeaderMap, ParsedMail};
+use askama::Template;
+use mailparse::{dateparse, parse_headers, parse_mail, MailHeaderMap, ParsedMail};
+use std::collections::HashMap;
use std::fs::{File, OpenOptions};
use std::io::prelude::*;
use std::path::Path;
-use utils::EscapedHTML;
+mod filters;
mod utils;
const HELP: &str = "\
-Usage: crabmail
+Usage: crabmail (THIS STRING IS JUNK)
FLAGS:
-h, --help Prints this help information and exits.
@@ -20,6 +22,16 @@ OPTIONS:
-m, --mbox Mbox file, files, or directories to read in
";
+// TODO be more clear about the expected input types
+// maildi
+
+#[derive(Debug)]
+struct RawEmail {
+ date: i64, // unix
+ data: Vec<u8>,
+}
+
+// TODO refactor
fn main() -> Result<()> {
let mut pargs = pico_args::Arguments::from_env();
@@ -31,26 +43,81 @@ fn main() -> Result<()> {
let out_dir = pargs
.opt_value_from_os_str(["-d", "--dir"], parse_path)?
.unwrap_or("site".into());
- // Create if does not exist
- let in_mboxes = pargs.values_from_os_str(["-m", "--mbox"], parse_path)?;
+ // this function doesnt do what I want
+ let in_mboxes = pargs.values_from_os_str(["-m", "--mail"], parse_path)?;
if in_mboxes.len() == 0 {
- println!("Please provide one or more input files with the -m flag");
+ println!("Please provide an input folder");
std::process::exit(1);
}
- for file in in_mboxes {
+
+ // Maps thread msg id to all items in the thread
+ let mut threads: HashMap<String, Vec<RawEmail>> = HashMap::new();
+
+ for file in std::fs::read_dir(&in_mboxes[0])? {
// assuming one email per file for now
let mut buffer = Vec::new();
- let mut f = File::open(&file)?;
+ let mut f = File::open(&file?.path())?;
f.read_to_end(&mut buffer)?;
- let email = parse_mail(&buffer)?;
- println!("{}", email_to_html(email));
+ let (headers, _) = parse_headers(&buffer)?;
+ let msg_id = headers.get_first_value("message-id").unwrap(); // TODO error
+ let in_reply_to = headers.get_first_value("in-reply-to");
+ // Note that date can be forged by the client
+ let date = dateparse(
+ &headers
+ .get_first_value("date")
+ .unwrap_or(String::from("-1")),
+ )?;
+
+ let message = RawEmail {
+ date: date,
+ data: buffer,
+ };
+
+ // TODO clean message id
+ match in_reply_to {
+ Some(irt) => {
+ if threads.get(&irt).is_none() {
+ threads.insert(irt, vec![message]);
+ } else {
+ threads.get_mut(&irt).unwrap().push(message);
+ }
+ }
+ None => {
+ threads.insert(msg_id, vec![message]);
+ }
+ }
}
- if pargs.contains(["-t", "--threads"]) {
- // TODO
+
+ // sort items in each thread by date
+ for (key, mut value) in &mut threads {
+ value.sort_by(|a, b| a.date.cmp(&b.date));
}
+ // TODO generate thread list sorted by most recent email in thread
std::fs::create_dir(&out_dir).ok();
- write_index(&out_dir)?;
+ let thread_dir = &out_dir.join("threads");
+ std::fs::create_dir(thread_dir).ok();
+ let mut file = OpenOptions::new()
+ .create(true)
+ .write(true)
+ .truncate(true)
+ .open(out_dir.join("index.html"))?;
+ let thread_list = ThreadList {
+ thread_ids: threads.keys().collect(),
+ };
+ file.write(thread_list.render()?.as_bytes());
+ // TODO prevent path traversal bug from ./.. in message id
+ for (key, value) in threads {
+ let mut file = OpenOptions::new()
+ .create(true)
+ .write(true)
+ .truncate(true)
+ .open(thread_dir.join(&key))?;
+ let thread = Thread {
+ messages: value.iter().map(|m| parse_mail(&m.data).unwrap()).collect(),
+ };
+ file.write(thread.render()?.as_bytes());
+ }
Ok(())
}
@@ -58,57 +125,14 @@ fn parse_path(s: &std::ffi::OsStr) -> Result<std::path::PathBuf, &'static str> {
Ok(s.into())
}
-fn email_to_html(email: ParsedMail) -> String {
- // Probably if I was better at Rust I could rewrite these in a more efficient way,
- // avoiding unnecessary allocs. Could use some of the lower-level features of
- // the mailparse library
- //
- // could definitely improve the api here
- let get_header_alloc = |f| email.headers.get_first_value(f).unwrap_or("".to_string());
-
- return format!(
- r#"
-<b>From</b>: {from}<br>
-<b>Subject</b>: {subject}<br>
-<b>Date</b>: {date}<br>
-<b>Message-Id</b>: {message_id}
-<div id="body"> {body} </div>
- "#,
- from = EscapedHTML(&get_header_alloc("from")),
- subject = EscapedHTML(&get_header_alloc("subject")),
- date = EscapedHTML(&get_header_alloc("date")),
- message_id = EscapedHTML(&get_header_alloc("message-id")),
- // TODO replace with get body raw to avoid unneeded alloc. same w/ headers
- body = EscapedHTML(&email.get_body().unwrap_or("".to_string()))
- );
+#[derive(Template)]
+#[template(path = "thread.html")] // using the template in this path, relative
+struct Thread<'a> {
+ messages: Vec<ParsedMail<'a>>,
}
-// TODO set lang, title, etc
-const HEADER: &[u8] = br#"<!DOCTYPE html>
-<html>
-<head>
-<meta charset="utf-8">
-<link rel="stylesheet" type="text/css" href="/style.css">
-</head>
-<body>
-<main>
-"#;
-
-const FOOTER: &[u8] = br#"
-</main>
-</body>
-</html>
-"#;
-
-// TODO write wrapper
-fn write_index(out_dir: &Path) -> Result<()> {
- let mut file = OpenOptions::new()
- .create(true)
- .write(true)
- .truncate(true)
- .open(out_dir.join("index.html"))?;
- file.write_all(HEADER)?;
- file.write_all(b"<h1>Hello world</h1>")?;
- file.write_all(FOOTER)?;
- Ok(())
+#[derive(Template)]
+#[template(path = "threadlist.html")] // using the template in this path, relative
+struct ThreadList<'a> {
+ thread_ids: Vec<&'a String>,
}
diff --git a/src/utils.rs b/src/utils.rs
@@ -3,9 +3,7 @@ use std::io;
use std::io::Write;
// Derived from https://github.com/raphlinus/pulldown-cmark/blob/master/src/escape.rs
-// Don't use single quotes (') in any of my attributes
-// Homebrewing my html templating to minimize dependencies
-// !!!WIP!!! -- still need to add tests, audit security, etc
+// Don't use single quotes (') in any of your attributes
const fn create_html_escape_table() -> [u8; 256] {
let mut table = [0; 256];
@@ -20,6 +18,7 @@ static HTML_ESCAPE_TABLE: [u8; 256] = create_html_escape_table();
static HTML_ESCAPES: [&str; 5] = ["", """, "&", "<", ">"];
+#[derive(Debug)]
pub struct EscapedHTML<'a>(pub &'a str);
impl fmt::Display for EscapedHTML<'_> {
@@ -51,3 +50,16 @@ impl fmt::Display for EscapedHTML<'_> {
Ok(())
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_add() {
+ assert_eq!(
+ format!("{}", EscapedHTML("<b>'hello&world\"</b>")),
+ "<b>'hello&world"</b>".to_string()
+ );
+ }
+}
diff --git a/templates/base.html b/templates/base.html
@@ -0,0 +1,22 @@
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <meta http-equiv="Permissions-Policy" content="interest-cohort=()"/>
+ <link rel="stylesheet" type="text/css" href="/style.css" />
+ <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0,user-scalable=0" />
+ <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>📧</text></svg>
+ <meta name="description" content="Crabmail mailing list">
+ <title>Crabmail Mailing List</title>
+ {% block head %}{% endblock %}
+ </head>
+ <body>
+ <div id="content">
+ {% block content %}{% endblock %}
+ </div>
+ <hr class="thin">
+ <div class="footer">
+ Archive generated with <a href="https://git.alexwennerberg.com/crabmail">crabmail</a>
+ </div>
+ </body>
+</html>
diff --git a/templates/fullmail.html b/templates/fullmail.html
diff --git a/templates/shortmail.html b/templates/shortmail.html
diff --git a/templates/thread.html b/templates/thread.html
@@ -0,0 +1,15 @@
+{% extends "base.html" %}
+
+{% block content %}
+ <div class="page-title"><h1>Some thread</h1></div>
+ <div>
+ <div class="message">
+ {% for message in messages %}
+ <h3>{{message|get_header("subject")}}</h3>
+ <b>From: </b>{{message|get_header("from")}}<br>
+ <b>Date: </b>{{message|get_header("date")}}<br>
+ {{message|get_body}}
+ {% endfor %}
+ </div>
+ </div>
+{% endblock %}
diff --git a/templates/threadlist.html b/templates/threadlist.html
@@ -0,0 +1,11 @@
+{% extends "base.html" %}
+
+{% block content %}
+ <div class="page-title"><h1>Crabmail Mailing List</h1></div>
+ <div>
+ {% for thread in thread_ids %}
+ <a href="threads/{{thread}}">{{thread}}</a>
+ <br>
+ {% endfor %}
+ </div>
+{% endblock %}