diff options
Diffstat (limited to 'src/rfc822.rs')
-rw-r--r-- | src/rfc822.rs | 548 |
1 files changed, 548 insertions, 0 deletions
diff --git a/src/rfc822.rs b/src/rfc822.rs new file mode 100644 index 0000000..f6e9287 --- /dev/null +++ b/src/rfc822.rs @@ -0,0 +1,548 @@ +use chrono::{DateTime, NaiveDateTime, Utc}; +use mailparse::{addrparse_header, body::Body, dateparse, DispositionType, ParsedMail}; +use serde::{ser::SerializeSeq, Serialize, Serializer}; + +use crate::error::Error; + +#[derive(Serialize, Eq, Ord, Debug)] +pub struct MailAddr { + pub display_name: String, + pub address: String, +} + +impl PartialEq for MailAddr { + fn eq(&self, r: &Self) -> bool { + self.address == r.address + } +} + +impl PartialOrd for MailAddr { + fn partial_cmp(&self, r: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(r)) + } +} + +fn parse_mail_addrs( + inp: &mailparse::MailHeader, +) -> Result<Vec<MailAddr>, mailparse::MailParseError> { + let mut mal = addrparse_header(inp)?; + + Ok(mal + .drain(..) + .flat_map(|mail_addr| match mail_addr { + mailparse::MailAddr::Group(mut g) => g + .addrs + .drain(..) + .map(|s| MailAddr { + display_name: s.display_name.unwrap_or_default(), + address: s.addr, + }) + .collect(), + mailparse::MailAddr::Single(s) => vec![MailAddr { + display_name: s.display_name.unwrap_or_default(), + address: s.addr, + }], + }) + .collect()) +} + +// ---------------- + +fn serialize_date_time<S>(dt: &DateTime<Utc>, s: S) -> Result<S::Ok, S::Error> +where + S: Serializer, +{ + s.serialize_str(&dt.to_rfc3339()) +} + +fn serialize_sender<S>(oma: &Option<MailAddr>, s: S) -> Result<S::Ok, S::Error> +where + S: Serializer, +{ + if let Some(ma) = oma { + let mut seq = s.serialize_seq(Some(1))?; + seq.serialize_element(ma)?; + seq.end() + } else { + let seq = s.serialize_seq(Some(0))?; + seq.end() + } +} + +#[derive(Serialize, Debug)] +pub struct MailHeader { + #[serde(serialize_with = "serialize_date_time")] + #[serde(rename = "date")] + pub orig_date: DateTime<Utc>, + + // originator fields + pub from: Vec<MailAddr>, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(serialize_with = "serialize_sender")] + pub sender: Option<MailAddr>, + #[serde(skip_serializing_if = "Option::is_none")] + reply_to: Option<Vec<MailAddr>>, + + // destination fields + #[serde(skip_serializing_if = "Vec::is_empty")] + to: Vec<MailAddr>, + #[serde(skip_serializing_if = "Vec::is_empty")] + cc: Vec<MailAddr>, + #[serde(skip_serializing_if = "Option::is_none")] + bcc: Option<Vec<MailAddr>>, + + /* identification fields + #[serde(skip_serializing_if = "String::is_empty")] + message_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + in_reply_to: Option<String>, + #[serde(skip_serializing_if = "Option::is_none")] + references: Option<String>, + */ + // informational fields + pub subject: String, + #[serde(skip_serializing_if = "Vec::is_empty")] + comments: Vec<String>, + #[serde(skip_serializing_if = "Vec::is_empty")] + keywords: Vec<String>, + + mime: MIMEHeader, +} + +#[derive(Serialize, Debug)] +pub struct MIMEHeader { + #[serde(rename = "content_maintype")] + pub maintype: String, + #[serde(rename = "content_subtype")] + pub subtype: String, + #[serde(skip_serializing_if = "String::is_empty")] + pub content_disposition: String, + #[serde(skip_serializing_if = "String::is_empty")] + pub filename: String, +} + +enum ContentDisposition { + None, + Inline, + Attachment { filename: Option<String> }, +} + +#[derive(Serialize)] +pub struct MIMEPart { + pub head: MIMEHeader, + body: MailBody, +} + +#[derive(Serialize)] +#[serde(untagged)] +pub enum MailBody { + Discrete(String), + Multipart { + #[serde(skip_serializing_if = "String::is_empty")] + preamble: String, + parts: Vec<MIMEPart>, + #[serde(skip_serializing_if = "String::is_empty")] + epilogue: String, + }, + Message(Box<Mail>), +} + +#[derive(Serialize)] +pub struct Mail { + head: MailHeader, + pub body: MailBody, +} + +#[derive(Serialize, Debug)] +pub struct TopMailHeader { + byte_size: u64, + unread: bool, + #[serde(serialize_with = "serialize_date_time")] + pub date_received: DateTime<Utc>, + message_handle: String, + pub head: MailHeader, +} + +fn get_received(me: &mut maildir::MailEntry) -> i64 { + me.received().unwrap_or_else(|_| { + let mut id = me.id(); + id = &id[..id.find('.').unwrap()]; + id.parse().unwrap_or_default() + }) +} + +impl TryFrom<maildir::MailEntry> for TopMailHeader { + type Error = Error; + + fn try_from(mut me: maildir::MailEntry) -> Result<Self, Self::Error> { + Ok(TopMailHeader { + byte_size: me.path().metadata()?.len(), + unread: !me.is_seen(), + date_received: DateTime::<Utc>::from_utc( + NaiveDateTime::from_timestamp_opt(get_received(&mut me), 0).unwrap(), + Utc, + ), + message_handle: me.id().to_owned(), + head: parse_mail_header(&me.parsed()?)?, + }) + } +} + +pub fn parse_mail_content(v: &ParsedMail) -> Result<MIMEHeader, maildir::MailEntryError> { + let mut c = MIMEHeader { + maintype: String::new(), + subtype: String::new(), + content_disposition: String::new(), + filename: String::new(), + }; + + { + let mut val = v.ctype.mimetype.clone(); + if let Some(i) = val.find(';') { + val.truncate(i); + } + let j = val.find('/').unwrap(); + c.subtype = val.split_off(j + 1); + val.pop(); + c.maintype = val; + } + + match v.get_content_disposition().disposition { + DispositionType::Inline => c.content_disposition = "inline".to_owned(), + DispositionType::Attachment => { + c.content_disposition = "attachment".to_owned(); + if let Some(fname) = v.get_content_disposition().params.remove("filename") { + c.filename = fname; + } + } + _ => {} + } + + for h in &v.headers { + let mut key = h.get_key(); + let val = h.get_value(); + + key.make_ascii_lowercase(); + + match key.as_ref() { + "filename" => { + c.filename = val; + } + _ => {} + } + } + + Ok(c) +} + +fn parse_mail_header(pm: &ParsedMail) -> Result<MailHeader, maildir::MailEntryError> { + let v = &pm.headers; + + let mut mh = MailHeader { + orig_date: Utc::now(), + from: Vec::new(), + sender: None, + reply_to: None, + to: Vec::new(), + cc: Vec::new(), + bcc: None, + subject: String::new(), + comments: Vec::new(), + keywords: Vec::new(), + mime: MIMEHeader { + maintype: String::new(), + subtype: String::new(), + content_disposition: String::new(), + filename: String::new(), + }, + }; + + { + let mut val = pm.ctype.mimetype.clone(); + if let Some(i) = val.find(';') { + val.truncate(i); + } + let j = val.find('/').unwrap(); + mh.mime.subtype = val.split_off(j + 1); + val.pop(); + mh.mime.maintype = val; + } + + let mut key = String::new(); + + for y in v { + key.push_str(&y.get_key_ref()); + let mut val = y.get_value(); + + key.make_ascii_lowercase(); + + match key.as_str() { + "date" => { + mh.orig_date = DateTime::<Utc>::from_utc( + NaiveDateTime::from_timestamp_opt(dateparse(&val)?, 0).unwrap(), + Utc, + ) + } + "from" => { + if !mh.from.is_empty() { + return Err("from already set".into()); + } + mh.from = parse_mail_addrs(y)? + } + "sender" => mh.sender = parse_mail_addrs(y)?.drain(0..1).next(), + "reply-to" => mh.reply_to = Some(parse_mail_addrs(y)?), + "to" => mh.to = parse_mail_addrs(y)?, + "cc" => mh.cc = parse_mail_addrs(y)?, + "bcc" => mh.bcc = Some(parse_mail_addrs(y)?), + "subject" => { + mh.subject = val; + } + "comments" => { + mh.comments.push(val); + } + "keywords" => { + mh.keywords.push(val); + } + "mime-version" => { + strip_comments(&mut val); + if val.trim() != "1.0" { + return Err(maildir::MailEntryError::DateError("unknown mime version")); + } + } + "content-disposition" => { + mh.mime.content_disposition = val; + } + "filename" => { + mh.mime.filename = val; + } + _ => {} + }; + + key.clear(); + } + + Ok(mh) +} + +fn parse_mail_body(pm: &ParsedMail) -> Result<MailBody, maildir::MailEntryError> { + let body = if pm.ctype.mimetype.starts_with("message/") { + MailBody::Message(Box::new( + mailparse::parse_mail(pm.get_body()?.as_ref())?.try_into()?, + )) + } else if pm.subparts.is_empty() && pm.ctype.mimetype.starts_with("text/") { + let b = pm.get_body()?; + MailBody::Discrete(b) + } else if pm.subparts.is_empty() { + let b = match pm.get_body_encoded() { + Body::Base64(eb) => { + let db = eb.get_raw(); + if db.len() < 512 * 1024 { + String::from_utf8_lossy(db).into_owned() + } else { + String::new() + } + } + Body::SevenBit(eb) => eb.get_as_string()?, + _ => todo!(), + }; + MailBody::Discrete(b) + } else { + MailBody::Multipart { + preamble: String::new(), + parts: pm + .subparts + .iter() + .map(|part| { + Ok(MIMEPart { + head: parse_mail_content(part)?, + body: parse_mail_body(part)?, + }) + }) + .filter_map(|p: Result<MIMEPart, maildir::MailEntryError>| p.ok()) + .collect(), + epilogue: String::new(), + } + }; + Ok(body) +} + +enum FindMatchParen { + Open, + Close, +} + +impl FindMatchParen { + fn value(&self) -> char { + match self { + FindMatchParen::Open => '(', + FindMatchParen::Close => ')', + } + } + + fn len(&self) -> usize { + 1 + } + + fn of_char(c: char) -> Option<Self> { + match c { + '(' => Some(FindMatchParen::Open), + ')' => Some(FindMatchParen::Close), + _ => None, + } + } +} + +fn find_in_header(s: &str, f: FindMatchParen) -> Option<usize> { + let mut in_q = false; + let mut q_pair = false; + let mut open_p = 0; + + for (i, c) in s.char_indices() { + if q_pair { + q_pair = false; + continue; + } + match c { + '\\' => { + q_pair = true; + } + '"' => { + in_q = !in_q; + } + _ if !in_q => { + if open_p == 0 { + if c == f.value() { + return Some(i); + } + if c == FindMatchParen::Open.value() { + open_p += 1; + } + } else { + match FindMatchParen::of_char(c) { + Some(FindMatchParen::Open) => open_p += 1, + Some(FindMatchParen::Close) => open_p -= 1, + None => {} + } + } + } + _ => {} + }; + } + None +} + +fn find_pair(offset: usize, s: &str) -> Option<std::ops::Range<usize>> { + if let Some(open) = find_in_header(s, FindMatchParen::Open) { + if let Some(mut close) = find_in_header( + &s[open + FindMatchParen::Open.len()..], + FindMatchParen::Close, + ) { + close += open + FindMatchParen::Open.len(); + Some(offset + open..offset + close + FindMatchParen::Close.len()) + } else { + find_pair( + offset + open + FindMatchParen::Open.len(), + &s[open + FindMatchParen::Open.len()..], + ) + } + } else { + None + } +} + +fn strip_comments(s: &mut String) { + let mut off = 0; + loop { + if let Some(r) = find_pair(off, &s[off..]) { + s.drain(r.clone()); + off = r.start; + } else { + break; + } + } +} + +impl TryFrom<ParsedMail<'_>> for Mail { + type Error = maildir::MailEntryError; + + fn try_from(m: ParsedMail) -> Result<Self, Self::Error> { + let head = parse_mail_header(&m)?; + let body = parse_mail_body(&m)?; + + Ok(Mail { head, body }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn comment() { + let mut x = r#"(this is ((some) text)) a "some text with (comment \" in) quotes)(" (example) \( included) (xx)b()"#.to_owned(); + strip_comments(&mut x); + assert_eq!( + &x, + r#" a "some text with (comment \" in) quotes)(" \( included) b"# + ); + } + + #[test] + fn unclosed_comment() { + let mut x = "(this is (some text) example b".to_owned(); + strip_comments(&mut x); + assert_eq!(&x, "(this is example b"); + } + + #[test] + fn find_first_pair() { + let mut r = find_pair(0, "abc def"); + assert_eq!(r, None); + + r = find_pair(0, "abc ( def"); + assert_eq!(r, None); + + r = find_pair(0, "abc ) def"); + assert_eq!(r, None); + + let s = "(abc) def"; + if let Some(i) = find_pair(0, s) { + assert_eq!(i, 0..5); + assert_eq!(&s[i], "(abc)"); + } else { + assert!(false, "Got None expected Some!"); + } + + let s = "abc (def) ghi"; + if let Some(i) = find_pair(0, s) { + assert_eq!(i, 4..9); + assert_eq!(&s[i], "(def)"); + } else { + assert!(false, "Got None expected Some!"); + } + + let s = "(abc (def) ghi"; + if let Some(i) = find_pair(0, s) { + assert_eq!(i, 5..10); + assert_eq!(&s[i], "(def)"); + } else { + assert!(false, "Got None expected Some!"); + } + + let s = "abc ((def) ghi)"; + if let Some(i) = find_pair(0, s) { + assert_eq!(i, 4..15); + assert_eq!(&s[i], "((def) ghi)"); + } else { + assert!(false, "Got None expected Some!"); + } + + let s = r#" a "some text with (comment \" in) quotes)(" (example)"#; + if let Some(i) = find_pair(0, s) { + assert_eq!(i, 45..54); + assert_eq!(&s[i], "(example)"); + } else { + assert!(false, "Got None expected Some!"); + } + } +} |