commit b65548e2ca3ef706571ce0c4e28042e0df7448be
parent 8ad91b23d6caddd1c5b627f8060c99a71ce9cea9
Author: alex wennerberg <alex@alexwennerberg.com>
Date: Mon, 3 Jan 2022 09:57:11 -0800
rewriting more code
Diffstat:
M | src/main.rs | | | 46 | ++++++++++++++++++++++++++++++++++++++++------ |
1 file changed, 40 insertions(+), 6 deletions(-)
diff --git a/src/main.rs b/src/main.rs
@@ -3,17 +3,24 @@ fn main() {
println!("Hello, world!");
}
-fn write_space(s: &mut String) {}
+fn parse_html_entity(ent_name: &str) {}
-fn html2text(input: &str) -> String {
- let in_len = input.len();
+fn write_space(s: &mut String) {
+ let b = s.as_bytes();
+ if b.len() > 0 && b[b.len() - 1] != b' ' {
+ s.push(' ');
+ }
+}
+
+fn html2text(html: &str) -> String {
+ let in_len = html.len();
let mut tag_start = 0;
let mut in_ent = false;
let mut bad_tag_stack_depth = 0;
let mut should_output = true;
let mut can_print_new_line = false;
let mut out_buf = String::new();
- for (i, r) in input.chars().enumerate() {
+ for (i, r) in html.chars().enumerate() {
if in_len > 0 && i == in_len - 1 {
can_print_new_line = false
}
@@ -27,8 +34,35 @@ fn html2text(input: &str) -> String {
continue;
} else if r == '&' && should_output {
let mut ent_name = String::new();
- in_ent = false;
- // parse the entity name, max 10 chars
+ let mut is_ent = false;
+ let mut chars = 10;
+ for er in html[i + 1..].chars() {
+ if er == ';' {
+ is_ent = true;
+ break;
+ } else {
+ ent_name.push(er);
+ }
+ chars += 1;
+ if chars == 10 {
+ break;
+ }
+ }
+ if is_ent {
+ // parseHTMLentity TODO
+ }
+ } else if r == '<' {
+ // start of tag
+ tag_start = i + 1;
+ should_output = false;
+ continue;
+ } else if r == '>' { // end of tag
+ // TODO
+ }
+
+ if should_output && bad_tag_stack_depth == 0 && !in_ent {
+ can_print_new_line = true;
+ out_buf.push(r);
}
}
out_buf