nanohtml2text

Lightweight html to text converter in Rust
git clone git://git.alexwennerberg.com/nanohtml2text
Log | Files | Refs | README | LICENSE

commit 8ad91b23d6caddd1c5b627f8060c99a71ce9cea9
parent 06549d688cea183d3c4918aafe2219ac442a18a2
Author: alex wennerberg <alex@alexwennerberg.com>
Date:   Mon,  3 Jan 2022 08:57:38 -0800

Update README, get started

Diffstat:
ACargo.lock | 7+++++++
MCargo.toml | 4+---
AREADME | 13+++++++++++++
DREADME.md | 3---
Asrc/lib.rs | 1+
Msrc/main.rs | 32++++++++++++++++++++++++++++++++
6 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "nanohtml2text" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml @@ -1,8 +1,6 @@ [package] -name = "html2text-lite" +name = "nanohtml2text" version = "0.1.0" edition = "2018" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] diff --git a/README b/README @@ -0,0 +1,13 @@ +nanohtml2text +============= + +0-dependency library to convert HTML to text; an alternative to https://crates.io/crates/html2text that doesn't use a full browser-grade HTML parser + +Based on https://github.com/k3a/html2text -- basically a line-for-line rewreite + +Useful for displaying HTML emails + +Usage: + +let s = "<b>Hacker mode</b>" +html2text(s); diff --git a/README.md b/README.md @@ -1,3 +0,0 @@ -# HTML to text - -Like html2text, but doesn't require a full browser-grade html parser diff --git a/src/lib.rs b/src/lib.rs @@ -0,0 +1 @@ +fn html2text() {} diff --git a/src/main.rs b/src/main.rs @@ -1,3 +1,35 @@ +// almost a line for line rewrite of https://github.com/k3a/html2text/blob/master/html2text.go fn main() { println!("Hello, world!"); } + +fn write_space(s: &mut String) {} + +fn html2text(input: &str) -> String { + let in_len = input.len(); + let mut tag_start = 0; + let mut in_ent = false; + let mut bad_tag_stack_depth = 0; + let mut should_output = true; + let mut can_print_new_line = false; + let mut out_buf = String::new(); + for (i, r) in input.chars().enumerate() { + if in_len > 0 && i == in_len - 1 { + can_print_new_line = false + } + if r.is_whitespace() { + if should_output && bad_tag_stack_depth == 0 && !in_ent { + write_space(&mut out_buf); + } + continue; + } else if r == ';' && in_ent { + in_ent = false; + continue; + } else if r == '&' && should_output { + let mut ent_name = String::new(); + in_ent = false; + // parse the entity name, max 10 chars + } + } + out_buf +}