commit f429492afe605d05b43aad2ba6fe9bc499a989f9
parent e65eee6a7ca08e9563bafa3c7d0879e54b1c1578
Author: alex wennerberg <alex@alexwennerberg.com>
Date: Sat, 8 Jan 2022 21:32:58 -0800
Refactor
Diffstat:
M | src/lib.rs | | | 66 | +++++++++++++++++++++++++++++++++++++++++++----------------------- |
1 file changed, 43 insertions(+), 23 deletions(-)
diff --git a/src/lib.rs b/src/lib.rs
@@ -39,8 +39,6 @@ struct EnrichedTextParser<'a> {
cursor: usize,
data: &'a [u8],
in_tag: bool,
- CRLF: bool,
- LT: bool,
}
// escape only <
@@ -50,8 +48,6 @@ impl<'a> EnrichedTextParser<'a> {
cursor: 0,
data: s,
in_tag: false,
- CRLF: false,
- LT: false,
};
}
}
@@ -65,7 +61,11 @@ impl<'a> Iterator for EnrichedTextParser<'a> {
}
// awkard
if self.in_tag {
- while self.data[self.cursor] != b'>' && self.cursor < self.data.len() {
+ if self.data[self.cursor] == b'<' {
+ self.cursor += 1
+ }
+ let start = self.cursor;
+ while self.cursor < self.data.len() && self.data[self.cursor] != b'>' {
self.cursor += 1;
}
let tag = &self.data[start..self.cursor];
@@ -99,20 +99,20 @@ impl<'a> Iterator for EnrichedTextParser<'a> {
}
}
loop {
- if self.cursor == self.data.len() || self.cursor > start {
- println!("{}", self.cursor);
+ if self.cursor >= self.data.len() || self.in_tag {
let mut out = Vec::new();
- let data = &self.data[start..self.cursor];
+ self.cursor -= 1;
+ let data = &self.data[start..=self.cursor];
let mut crlf_count = 0;
- let mut skip = true;
+ let mut skip = false;
for (idx, window) in data.windows(2).enumerate() {
if skip {
skip = false;
continue;
}
- if crlf_count >= 1 {
+ if window[0] != b'\r' && crlf_count >= 1 {
if crlf_count >= 2 {
- for _ in 1..crlf_count - 1 {
+ for _ in 0..crlf_count - 1 {
out.push(b'\r');
out.push(b'\n');
}
@@ -123,21 +123,33 @@ impl<'a> Iterator for EnrichedTextParser<'a> {
}
if window[0] == b'\r' && window[1] == b'\n' {
crlf_count += 1;
- } else if window[0] == b'<' && window[1] == b'<' {
skip = true;
+ } else if window[0] == b'<' && window[1] == b'<' {
+ continue;
} else {
out.push(window[0]);
}
+ if idx == data.len() - 2 {
+ out.push(window[1]);
+ }
}
- return Some(Token::Text(out));
- }
- if self.data[self.cursor] == b'<' {
- if self.cursor < self.data.len() - 1 && self.data[self.cursor + 1] == b'<' {
- } else if self.data[self.cursor] == b'<' {
- self.in_tag = true;
+ self.cursor += 1;
+ if out.len() > 0 {
+ return Some(Token::Text(out));
}
+ return self.next();
+ }
+ if (self.cursor + 1 < self.data.len()
+ && self.data[self.cursor] == b'<'
+ && self.data[self.cursor + 1] == b'<')
+ {
+ self.cursor += 2;
+ }
+ if self.cursor < self.data.len() && self.data[self.cursor] == b'<' {
+ self.in_tag = true;
+ } else {
+ self.cursor += 1;
}
- self.cursor += 1;
}
}
}
@@ -155,6 +167,13 @@ mod tests {
}
#[test]
+ fn test_empty() {
+ let data = b"";
+ let p = EnrichedTextParser::new(data);
+ assert_eq!(p.collect::<Vec<Token>>(), vec![]);
+ }
+
+ #[test]
fn test_tag() {
let data = b"Hello <Bold>world!</Bold>";
let p = EnrichedTextParser::new(data);
@@ -187,12 +206,13 @@ mod tests {
}
#[test]
fn test_incomplete_tag() {
- let data = b"Stay <cool>cool";
+ let data = b"Stay</broken><cool>cool";
let p = EnrichedTextParser::new(data);
assert_eq!(
p.collect::<Vec<Token>>(),
vec![
- Token::Text(b"Stay ".to_vec()),
+ Token::Text(b"Stay".to_vec()),
+ Token::End(Tag::Unrecognized),
Token::Start(Tag::Unrecognized),
Token::Text(b"cool".to_vec()),
]
@@ -200,11 +220,11 @@ mod tests {
}
#[test]
fn test_escapes() {
- let data = b"Hello\r\nWorld<<Universe";
+ let data = b"Hello\r\nWorld<<\r\n\r\nUniverse\n";
let p = EnrichedTextParser::new(data);
assert_eq!(
p.collect::<Vec<Token>>(),
- vec![Token::Text(b"Hello World<Universe".to_vec()),]
+ vec![Token::Text(b"Hello World<\r\nUniverse\n".to_vec()),]
);
}
}