gourami

[UNMAINTAINED] Activitypub server in Rust
Log | Files | Refs | README | LICENSE

commit ba1c8128a4433105b59336440718bd6caf3240ac
parent d909e02a622208408365608fb8c1cc229f1b92f1
Author: alex wennerberg <alex@alexwennerberg.com>
Date:   Sun, 19 Apr 2020 20:43:28 -0500

HTML sanitation and adding links to posts

Diffstat:
MCargo.lock | 212++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
MCargo.toml | 4+++-
MTODO | 4+++-
Msrc/db/status.rs | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/db/user.rs | 10++++++++++
Msrc/lib.rs | 7++++---
Mstatic/css/style.css | 8++++++--
Mtemplates/noteslist.html | 2++
Mtemplates/single_note.html | 12+++++++++---
Mtemplates/timeline.html | 2+-
10 files changed, 341 insertions(+), 19 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -36,6 +36,21 @@ dependencies = [ ] [[package]] +name = "ammonia" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89eac85170f4b3fb3dc5e442c1cfb036cb8eecf9dbbd431a161ffad15d90ea3b" +dependencies = [ + "html5ever", + "lazy_static", + "maplit", + "markup5ever_rcdom", + "matches", + "tendril", + "url", +] + +[[package]] name = "askama" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -406,6 +421,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" [[package]] +name = "futf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" +dependencies = [ + "mac", + "new_debug_unreachable", +] + +[[package]] name = "futures" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -532,6 +557,7 @@ name = "gourami_social" version = "0.1.0" dependencies = [ "activitystreams", + "ammonia", "askama", "bcrypt", "chrono", @@ -542,6 +568,7 @@ dependencies = [ "lazy_static", "log 0.4.8", "rand 0.7.3", + "regex", "reqwest", "serde", "serde_json", @@ -603,6 +630,20 @@ dependencies = [ ] [[package]] +name = "html5ever" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" +dependencies = [ + "log 0.4.8", + "mac", + "markup5ever", + "proc-macro2 1.0.10", + "quote 1.0.3", + "syn 1.0.17", +] + +[[package]] name = "http" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -816,6 +857,47 @@ dependencies = [ ] [[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "markup5ever" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab" +dependencies = [ + "log 0.4.8", + "phf 0.8.0", + "phf_codegen 0.8.0", + "serde", + "serde_derive", + "serde_json", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f015da43bcd8d4f144559a3423f4591d69b8ce0652c905374da7205df336ae2b" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "xml5ever", +] + +[[package]] name = "matches" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -849,8 +931,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "216929a5ee4dd316b1702eedf5e74548c123d370f47841ceaac38ca154690ca3" dependencies = [ "mime 0.2.6", - "phf", - "phf_codegen", + "phf 0.7.24", + "phf_codegen 0.7.24", "unicase 1.4.2", ] @@ -943,6 +1025,12 @@ dependencies = [ ] [[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + +[[package]] name = "nom" version = "4.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1046,7 +1134,16 @@ version = "0.7.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18" dependencies = [ - "phf_shared", + "phf_shared 0.7.24", +] + +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_shared 0.8.0", ] [[package]] @@ -1055,8 +1152,18 @@ version = "0.7.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.7.24", + "phf_shared 0.7.24", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", ] [[package]] @@ -1065,21 +1172,40 @@ version = "0.7.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662" dependencies = [ - "phf_shared", + "phf_shared 0.7.24", "rand 0.6.5", ] [[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared 0.8.0", + "rand 0.7.3", +] + +[[package]] name = "phf_shared" version = "0.7.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0" dependencies = [ - "siphasher", + "siphasher 0.2.3", "unicase 1.4.2", ] [[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher 0.3.2", +] + +[[package]] name = "pin-project" version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1124,6 +1250,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" [[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] name = "proc-macro-hack" version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1202,7 +1334,7 @@ dependencies = [ "rand_isaac", "rand_jitter", "rand_os", - "rand_pcg", + "rand_pcg 0.1.2", "rand_xorshift", "winapi 0.3.8", ] @@ -1218,6 +1350,7 @@ dependencies = [ "rand_chacha 0.2.2", "rand_core 0.5.1", "rand_hc 0.2.0", + "rand_pcg 0.2.1", ] [[package]] @@ -1327,6 +1460,15 @@ dependencies = [ ] [[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] name = "rand_xorshift" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1581,6 +1723,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" [[package]] +name = "siphasher" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e88f89a550c01e4cd809f3df4f52dc9e939f3273a2017eabd5c6d12fd98bb23" + +[[package]] name = "slab" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1648,6 +1796,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0" [[package]] +name = "string_cache" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2940c75beb4e3bf3a494cef919a747a2cb81e52571e212bfbd185074add7208a" +dependencies = [ + "lazy_static", + "new_debug_unreachable", + "phf_shared 0.8.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro2 1.0.10", + "quote 1.0.3", +] + +[[package]] name = "syn" version = "0.15.44" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1684,6 +1857,17 @@ dependencies = [ ] [[package]] +name = "tendril" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] name = "termcolor" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2184,3 +2368,15 @@ dependencies = [ "winapi 0.2.8", "winapi-build", ] + +[[package]] +name = "xml5ever" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b1b52e6e8614d4a58b8e70cf51ec0cc21b256ad8206708bcff8139b5bbd6a59" +dependencies = [ + "log 0.4.8", + "mac", + "markup5ever", + "time 0.1.42", +] diff --git a/Cargo.toml b/Cargo.toml @@ -20,6 +20,8 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" tokio = { version = "0.2", features = ["macros"] } warp = "0.2" -hyper = "*" +hyper = "0.13" +regex = "1.3" +ammonia = "3" [dev-dependencies] diff --git a/TODO b/TODO @@ -1,4 +1,4 @@ -Plain text statuses +sanitize on write to db understand fn vs async fn in tokio @@ -44,6 +44,8 @@ used https://github.com/kaj/warp-diesel-ructe-sample Webfinger +Cite this https://git.cypr.io/oz/autolink-rust/src/branch/master/src/lib.rs + Profiles: Follow diff --git a/src/db/status.rs b/src/db/status.rs @@ -1,10 +1,13 @@ use chrono; +use std::collections::HashSet; use activitystreams::object::streams; use diesel::sqlite::SqliteConnection; use diesel::deserialize::{Queryable}; use super::schema::notes; use diesel::prelude::*; use serde::{Deserialize, Serialize}; +use regex::Regex; +use ammonia; // Statuses are note activitystream object @@ -28,3 +31,99 @@ pub struct NoteInput { pub content: String, // can we make this a slice? // pub published: chrono::NaiveDateTime, } + +impl Note { + pub fn parse_note_text(mut self) -> Self { + self.content = parse_note_text(&self.content); + self + } +} + +/// Parse links -- stolen from https://git.cypr.io/oz/autolink-rust/src/branch/master/src/lib.rs +fn parse_note_text(text: &str) -> String { + // dont hack me + let html_clean = ammonia::clean_text(text); + if text.len() == 0 { + return String::new(); + } + let re = Regex::new( + r"(?ix) + \b(([\w-]+:&#47;&#47;?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/))) + ", + ) + .unwrap(); + let replace_str = "<a href=\"$0\">$0</a>"; + let urls_parsed = re.replace_all(&html_clean, &replace_str as &str).to_string(); + let note_regex = Regex::new( + r"\B(📝|&gt;&gt;)(\d+)", + ).unwrap(); + let replace_str = "<a href=\"/note/$2\">$0</a>"; + let notes_parsed = note_regex.replace_all(&urls_parsed, &replace_str as &str).to_string(); + let person_regex = Regex::new( + r"\B(@)(\w+)").unwrap(); + let replace_str = "<a href=\"/user/$2\">$0</a>"; + let people_parsed = person_regex.replace_all(&notes_parsed, &replace_str as &str).to_string(); + // TODO get mentions too + return people_parsed; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_string() { + assert!(parse_note_text("") == "") + } + + fn test_escape_html() { + assert!(parse_note_text("<script>haxxor</script>hi>") == "hi") + } + + #[test] + fn test_string_without_urls() { + let src = "<p>Some HTML</p>"; + assert!(parse_note_text(src) == "Some HTML") + } + + #[test] + fn test_string_with_http_urls() { + let src = "Check this out: https://doc.rust-lang.org/\n + https://fr.wikipedia.org/wiki/Caf%C3%A9ine"; + let linked = "Check this out: <a href=\"https://doc.rust-lang.org/\">https://doc.rust-lang.org/</a>\n + <a href=\"https://fr.wikipedia.org/wiki/Caf%C3%A9ine\">https://fr.wikipedia.org/wiki/Caf%C3%A9ine</a>"; + assert!(parse_note_text(src) == linked) + } + + #[test] + fn test_string_with_mailto_urls() { + let src = "Send spam to mailto://oz@cypr.io"; + assert!( + parse_note_text(src) + == "Send spam to <a href=\"mailto://oz@cypr.io\">mailto://oz@cypr.io</a>" + ) + } + + #[test] + fn test_string_with_trailing_chars() { + let src = "I love https://cat-bounce.com!\n + Have you seen https://en.wikipedia.org/wiki/Cat_(disambiguation)?"; + let linked = "I love <a href=\"https://cat-bounce.com\">https://cat-bounce.com</a>!\n + Have you seen <a href=\"https://en.wikipedia.org/wiki/Cat_(disambiguation)\">https://en.wikipedia.org/wiki/Cat_(disambiguation)</a>?"; + assert!(parse_note_text(src) == linked) + } + + #[test] + fn test_user_replace() { + let src = "@joe whats up @sally"; + let linked = "<a href=\"/user/joe\">@joe</a> whats up <a href=\"/user/sally\">@sally</a>"; + assert!(parse_note_text(src) == linked) + } + + #[test] + fn test_note_replace() { + let src = "📝123 cool post >>456"; + let linked = "<a href=\"/note/123\">📝123</a> cool post <a href=\"/note/456\">&gt;&gt;456</a>"; + assert!(parse_note_text(src) == linked) + } +} diff --git a/src/db/user.rs b/src/db/user.rs @@ -54,3 +54,13 @@ pub struct NewUser<'a> { pub password: &'a str, pub email: &'a str, } + +// impl validate +fn validate_username() { +} + +fn validate_password() { +} + +fn validate_email() { +} diff --git a/src/lib.rs b/src/lib.rs @@ -242,10 +242,11 @@ fn render_timeline(session: Option<Session>) -> impl Reply { .limit(250) .load::<Note>(&POOL.get().unwrap()) .expect("Error loading posts"); + let parsed = results.into_iter().map(|n| n.parse_note_text()).collect(); render_template(&TimelineTemplate{ page: "timeline", global: global, - notes: results, + notes: parsed, }) } @@ -368,7 +369,7 @@ pub async fn run_server() { .and(form()) .map(do_login); - let do_login = path("logout") + let do_logout = path("logout") .and(session_filter()) .map(do_logout); @@ -394,7 +395,7 @@ pub async fn run_server() { // used for api based authentication // let api_filter = session::create_session_filter(&POOL.get()); let html_renders = home.or(login_page).or(register_page).or(user_page).or(note_page).or(server_info_page); - let forms = login_page.or(do_register).or(do_login).or(create_note).or(delete_note); + let forms = login_page.or(do_register).or(do_login).or(create_note).or(delete_note).or(do_logout); // let api // catch all for any other paths let not_found = warp::any().map(|| "404 not found"); diff --git a/static/css/style.css b/static/css/style.css @@ -9,6 +9,7 @@ text-decoration: underline; cursor: pointer; font-size: 1em; + font-family: "courier", monospace } .link-button:focus { outline: none; @@ -25,15 +26,18 @@ font-weight: 900; } +.bold { + font-weight: 900; +} .navbar { border: 1px solid black; font-size:2rem; } -.status { +.note { border-top: 1px solid black; border-bottom: 1px solid black; - margin: 1rem; + margin: .5rem; padding: 3px; } diff --git a/templates/noteslist.html b/templates/noteslist.html @@ -1,3 +1,5 @@ +<div class="container"> {% for note in notes %} {% include "single_note.html" %} {% endfor %} +</div> diff --git a/templates/single_note.html b/templates/single_note.html @@ -1,9 +1,15 @@ +<script> +function reply(note_id) +{ + document.getElementById("note_input").value='📝' + note_id + ' '; + document.getElementById("note_input").focus(); +}</script> <div class="row"> <div class="note"> - <a href="/note/{{note.id}}">>{{note.id}}</a> {{note.created_time}} <a - href="/user/{{note.creator_username}}">@{{note.creator_username}}</a> | {{note.content}} + <a href="/note/{{note.id}}">📝{{note.id}}</a> {{note.created_time}} <a class="bold" + href="/user/{{note.creator_username}}">@{{note.creator_username}}</a> ▶ {{note.content|safe}} {% if note.creator_id == global.user.id %} - <form method="post" action="/{{note.id}}/delete" class="inline"> + <a href="#" onclick="reply({{note.id}})">↪</a> <input type="hidden" name="extra_submit_param" value="extra_submit_value"> <button type="submit" name="submit_param" value="submit_value" class="link-button"> x diff --git a/templates/timeline.html b/templates/timeline.html @@ -4,7 +4,7 @@ <div class="container"> {% if global.logged_in %} <form action="/create_note" method="POST"> -<textarea name="note_input" rows=3 placeholder="note"></textarea> +<textarea id="note_input" name="note_input" rows=3 placeholder="note"></textarea> <br> <button id="post">create note</button> </form>