commit e65eee6a7ca08e9563bafa3c7d0879e54b1c1578
parent 63fdfbdbbbc6d80c3fa9dbd8558b6ce57e365322
Author: alex wennerberg <alex@alexwennerberg.com>
Date: Sat, 8 Jan 2022 20:12:20 -0800
WIP
Diffstat:
M | src/lib.rs | | | 93 | +++++++++++++++++++++++++++++++++++++------------------------------------------ |
1 file changed, 44 insertions(+), 49 deletions(-)
diff --git a/src/lib.rs b/src/lib.rs
@@ -2,11 +2,9 @@
//! https://datatracker.ietf.org/doc/html/rfc1523
#[derive(Debug, Eq, PartialEq)]
-pub enum Token<'a> {
+pub enum Token {
// returns bytes in order to be encoding-agnostic
- Text(&'a [u8]),
- CRLF,
- LT,
+ Text(Vec<u8>),
End(Tag),
Start(Tag),
}
@@ -59,23 +57,13 @@ impl<'a> EnrichedTextParser<'a> {
}
impl<'a> Iterator for EnrichedTextParser<'a> {
- type Item = Token<'a>;
- fn next(&mut self) -> Option<Token<'a>> {
+ type Item = Token;
+ fn next(&mut self) -> Option<Token> {
let start = self.cursor;
if self.data.len() == self.cursor {
return None;
}
// awkard
- if self.CRLF {
- self.CRLF = false;
- self.cursor += 1;
- return Some(Token::CRLF);
- }
- if self.LT {
- self.LT = false;
- self.cursor += 1;
- return Some(Token::LT);
- }
if self.in_tag {
while self.data[self.cursor] != b'>' && self.cursor < self.data.len() {
self.cursor += 1;
@@ -111,32 +99,45 @@ impl<'a> Iterator for EnrichedTextParser<'a> {
}
}
loop {
- if self.cursor == self.data.len() {
- return Some(Token::Text(&self.data[start..self.cursor]));
- }
- if self.data[self.cursor] == b'\r'
- && self.cursor < self.data.len() + 1
- && self.data[self.cursor + 1] == b'\n'
- {
- self.CRLF = true;
+ if self.cursor == self.data.len() || self.cursor > start {
+ println!("{}", self.cursor);
+ let mut out = Vec::new();
+ let data = &self.data[start..self.cursor];
+ let mut crlf_count = 0;
+ let mut skip = true;
+ for (idx, window) in data.windows(2).enumerate() {
+ if skip {
+ skip = false;
+ continue;
+ }
+ if crlf_count >= 1 {
+ if crlf_count >= 2 {
+ for _ in 1..crlf_count - 1 {
+ out.push(b'\r');
+ out.push(b'\n');
+ }
+ } else {
+ out.push(b' ');
+ }
+ crlf_count = 0;
+ }
+ if window[0] == b'\r' && window[1] == b'\n' {
+ crlf_count += 1;
+ } else if window[0] == b'<' && window[1] == b'<' {
+ skip = true;
+ } else {
+ out.push(window[0]);
+ }
+ }
+ return Some(Token::Text(out));
}
if self.data[self.cursor] == b'<' {
if self.cursor < self.data.len() - 1 && self.data[self.cursor + 1] == b'<' {
- self.LT = true;
} else if self.data[self.cursor] == b'<' {
self.in_tag = true;
}
- self.cursor += 1;
- if self.cursor - 1 > start {
- return Some(Token::Text(&self.data[start..self.cursor - 1]));
- } else {
- return self.next();
- }
}
self.cursor += 1;
- if self.CRLF || self.LT {
- return Some(Token::Text(&self.data[start..self.cursor - 1]));
- }
}
}
}
@@ -150,7 +151,7 @@ mod tests {
fn test_simple() {
let data = b"Hello world!";
let p = EnrichedTextParser::new(data);
- assert_eq!(p.collect::<Vec<Token>>(), vec![Token::Text(data)]);
+ assert_eq!(p.collect::<Vec<Token>>(), vec![Token::Text(data.to_vec())]);
}
#[test]
@@ -160,9 +161,9 @@ mod tests {
assert_eq!(
p.collect::<Vec<Token>>(),
vec![
- Token::Text(b"Hello "),
+ Token::Text(b"Hello ".to_vec()),
Token::Start(Tag::Bold),
- Token::Text(b"world!"),
+ Token::Text(b"world!".to_vec()),
Token::End(Tag::Bold)
]
);
@@ -174,11 +175,11 @@ mod tests {
assert_eq!(
p.collect::<Vec<Token>>(),
vec![
- Token::Text(b"Hello "),
+ Token::Text(b"Hello ".to_vec()),
Token::Start(Tag::Indent),
- Token::Text(b"beautiful "),
+ Token::Text(b"beautiful ".to_vec()),
Token::Start(Tag::Bold),
- Token::Text(b"world!"),
+ Token::Text(b"world!".to_vec()),
Token::End(Tag::Bold),
Token::End(Tag::Indent)
]
@@ -191,9 +192,9 @@ mod tests {
assert_eq!(
p.collect::<Vec<Token>>(),
vec![
- Token::Text(b"Stay "),
+ Token::Text(b"Stay ".to_vec()),
Token::Start(Tag::Unrecognized),
- Token::Text(b"cool"),
+ Token::Text(b"cool".to_vec()),
]
)
}
@@ -203,13 +204,7 @@ mod tests {
let p = EnrichedTextParser::new(data);
assert_eq!(
p.collect::<Vec<Token>>(),
- vec![
- Token::Text(b"Hello"),
- Token::CRLF,
- Token::Text(b"World"),
- Token::LT,
- Token::Text(b"Universe")
- ]
+ vec![Token::Text(b"Hello World<Universe".to_vec()),]
);
}
}