enriched-text

Rust parser for text/enriched as defined by rfc1523
git clone git://git.alexwennerberg.com/enriched-text
Log | Files | Refs | README | LICENSE

commit f429492afe605d05b43aad2ba6fe9bc499a989f9
parent e65eee6a7ca08e9563bafa3c7d0879e54b1c1578
Author: alex wennerberg <alex@alexwennerberg.com>
Date:   Sat,  8 Jan 2022 21:32:58 -0800

Refactor

Diffstat:
Msrc/lib.rs | 66+++++++++++++++++++++++++++++++++++++++++++-----------------------
1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs @@ -39,8 +39,6 @@ struct EnrichedTextParser<'a> { cursor: usize, data: &'a [u8], in_tag: bool, - CRLF: bool, - LT: bool, } // escape only < @@ -50,8 +48,6 @@ impl<'a> EnrichedTextParser<'a> { cursor: 0, data: s, in_tag: false, - CRLF: false, - LT: false, }; } } @@ -65,7 +61,11 @@ impl<'a> Iterator for EnrichedTextParser<'a> { } // awkard if self.in_tag { - while self.data[self.cursor] != b'>' && self.cursor < self.data.len() { + if self.data[self.cursor] == b'<' { + self.cursor += 1 + } + let start = self.cursor; + while self.cursor < self.data.len() && self.data[self.cursor] != b'>' { self.cursor += 1; } let tag = &self.data[start..self.cursor]; @@ -99,20 +99,20 @@ impl<'a> Iterator for EnrichedTextParser<'a> { } } loop { - if self.cursor == self.data.len() || self.cursor > start { - println!("{}", self.cursor); + if self.cursor >= self.data.len() || self.in_tag { let mut out = Vec::new(); - let data = &self.data[start..self.cursor]; + self.cursor -= 1; + let data = &self.data[start..=self.cursor]; let mut crlf_count = 0; - let mut skip = true; + let mut skip = false; for (idx, window) in data.windows(2).enumerate() { if skip { skip = false; continue; } - if crlf_count >= 1 { + if window[0] != b'\r' && crlf_count >= 1 { if crlf_count >= 2 { - for _ in 1..crlf_count - 1 { + for _ in 0..crlf_count - 1 { out.push(b'\r'); out.push(b'\n'); } @@ -123,21 +123,33 @@ impl<'a> Iterator for EnrichedTextParser<'a> { } if window[0] == b'\r' && window[1] == b'\n' { crlf_count += 1; - } else if window[0] == b'<' && window[1] == b'<' { skip = true; + } else if window[0] == b'<' && window[1] == b'<' { + continue; } else { out.push(window[0]); } + if idx == data.len() - 2 { + out.push(window[1]); + } } - return Some(Token::Text(out)); - } - if self.data[self.cursor] == b'<' { - if self.cursor < self.data.len() - 1 && self.data[self.cursor + 1] == b'<' { - } else if self.data[self.cursor] == b'<' { - self.in_tag = true; + self.cursor += 1; + if out.len() > 0 { + return Some(Token::Text(out)); } + return self.next(); + } + if (self.cursor + 1 < self.data.len() + && self.data[self.cursor] == b'<' + && self.data[self.cursor + 1] == b'<') + { + self.cursor += 2; + } + if self.cursor < self.data.len() && self.data[self.cursor] == b'<' { + self.in_tag = true; + } else { + self.cursor += 1; } - self.cursor += 1; } } } @@ -155,6 +167,13 @@ mod tests { } #[test] + fn test_empty() { + let data = b""; + let p = EnrichedTextParser::new(data); + assert_eq!(p.collect::<Vec<Token>>(), vec![]); + } + + #[test] fn test_tag() { let data = b"Hello <Bold>world!</Bold>"; let p = EnrichedTextParser::new(data); @@ -187,12 +206,13 @@ mod tests { } #[test] fn test_incomplete_tag() { - let data = b"Stay <cool>cool"; + let data = b"Stay</broken><cool>cool"; let p = EnrichedTextParser::new(data); assert_eq!( p.collect::<Vec<Token>>(), vec![ - Token::Text(b"Stay ".to_vec()), + Token::Text(b"Stay".to_vec()), + Token::End(Tag::Unrecognized), Token::Start(Tag::Unrecognized), Token::Text(b"cool".to_vec()), ] @@ -200,11 +220,11 @@ mod tests { } #[test] fn test_escapes() { - let data = b"Hello\r\nWorld<<Universe"; + let data = b"Hello\r\nWorld<<\r\n\r\nUniverse\n"; let p = EnrichedTextParser::new(data); assert_eq!( p.collect::<Vec<Token>>(), - vec![Token::Text(b"Hello World<Universe".to_vec()),] + vec![Token::Text(b"Hello World<\r\nUniverse\n".to_vec()),] ); } }