threading.rs (2476B) - raw
1 // Simple threading algorithm based on https://datatracker.ietf.org/doc/html/rfc8621 2 // Only threads based on subject 3 // An alternative is implementing https://www.jwz.org/doc/threading.html which is a PITA 4 // A thread is a collection of messages sorted by date. 5 // Assumes msg can be found on disk at `path` -- should be made more abstract to handle other mail 6 // stores 7 8 use mail_parser::parsers::fields::thread::thread_name; 9 use mail_parser::Message; 10 use std::collections::HashMap; 11 use std::path::PathBuf; 12 13 pub type MessageId = String; 14 15 pub struct Msg { 16 pub id: MessageId, 17 pub path: PathBuf, 18 pub time: i64, 19 } 20 21 impl Msg {} 22 23 #[derive(Default)] 24 pub struct ThreadIdx { 25 pub threads: Vec<Vec<Msg>>, 26 id_index: HashMap<MessageId, usize>, 27 subject_index: HashMap<String, usize>, 28 } 29 30 impl ThreadIdx { 31 pub fn new() -> Self { 32 ThreadIdx::default() 33 } 34 35 // Todo enumerate errors or something 36 // TODO should be format agnostic (use internal representation of email) 37 pub fn add_email(&mut self, msg: &Message, path: PathBuf) { 38 let msg_id = match msg.get_message_id() { 39 Some(m) => m, 40 None => return, 41 }; 42 let t = match msg 43 .get_received() 44 .as_datetime_ref() 45 .or_else(|| msg.get_date()) 46 { 47 Some(t) => t, 48 None => return, 49 }; 50 if self.id_index.get(msg_id).is_some() { 51 // TODO handle duplicate msg case. Don't allow overwrites 52 return; 53 } 54 // TODO fix unwrap 55 let time = t.to_timestamp().unwrap_or(-1); // todo unwrap. shouldnt occur. trying to change upstream https://github.com/stalwartlabs/mail-parser/pull/15 56 let thread_name = thread_name(msg.get_subject().unwrap_or("(No Subject)")); 57 58 let msg = Msg { 59 id: msg_id.to_owned(), 60 path, 61 time, 62 }; 63 let idx = self.subject_index.get(thread_name); 64 65 let id = match idx { 66 Some(i) => { 67 self.threads[*i].push(msg); 68 *i 69 } 70 None => { 71 self.threads.push(vec![msg]); 72 self.threads.len() - 1 73 } 74 }; 75 self.id_index.insert(msg_id.to_string(), id); 76 self.subject_index.insert(thread_name.to_string(), id); 77 } 78 79 pub fn finalize(&mut self) { 80 for t in &mut self.threads { 81 t.sort_by_key(|a| a.time); 82 } 83 } 84 }