binc/doxygen/mime-parsefull_8cc_source.html

#include "mime.h"

#include "mime-utils.h"

#include "mime-inputsource.h"

#include "convert.h"

#include <string>

#include <vector>

#include <map>

#include <exception>

#include <iostream>


#include <string.h>

#include <ctype.h>

#include <stdio.h>

#include <errno.h>


Binc::MimeInputSource *mimeSource = 0;


using namespace ::std;


//------------------------------------------------------------------------

void Binc::MimeDocument::parseFull(int fd) const

{

  if (allIsParsed)

    return;


  allIsParsed = true;


  if (!mimeSource || mimeSource->getFileDescriptor() != fd) {

    delete mimeSource;

    mimeSource = new MimeInputSource(fd);

  } else {

    mimeSource->reset();

  }


  headerstartoffsetcrlf = 0;

  headerlength = 0;

  bodystartoffsetcrlf = 0;

  bodylength = 0;

  size = 0;

  messagerfc822 = false;

  multipart = false;


  int bsize = 0;

  string bound;

  MimePart::parseFull(bound, bsize);


  // eat any trailing junk to get the correct size

  char c;

  while (mimeSource->getChar(&c));


  size = mimeSource->getOffset();

}


//------------------------------------------------------------------------

static bool parseOneHeaderLine(Binc::Header *header, unsigned int *nlines)

{

  using namespace ::Binc;

  char c;

  bool eof = false;

  char cqueue[4];

  string name;

  string content;


  while (mimeSource->getChar(&c)) {

    // If we encounter a \r before we got to the first ':', then

    // rewind back to the start of the line and assume we're at the

    // start of the body.

    if (c == '\r') {

      for (int i = 0; i < (int) name.length() + 1; ++i)

        mimeSource->ungetChar();

      return false;

    }


    // A colon marks the end of the header name

    if (c == ':') break;


    // Otherwise add to the header name

    name += c;

  }


  cqueue[0] = '\0';

  cqueue[1] = '\0';

  cqueue[2] = '\0';

  cqueue[3] = '\0';


  // Read until the end of the header.

  bool endOfHeaders = false;

  while (!endOfHeaders) {

    if (!mimeSource->getChar(&c)) {

      eof = true;

      break;

    }


    if (c == '\n') ++*nlines;


    for (int i = 0; i < 3; ++i)

      cqueue[i] = cqueue[i + 1];

    cqueue[3] = c;


    if (strncmp(cqueue, "\r\n\r\n", 4) == 0) {

      endOfHeaders = true;

      break;

    }


    // If the last character was a newline, and the first now is not

    // whitespace, then rewind one character and store the current

    // key,value pair.

    if (cqueue[2] == '\n' && c != ' ' && c != '\t') {

      if (content.length() > 2)

        content.resize(content.length() - 2);


      trim(content);

      header->add(name, content);


      if (c != '\r') {

        mimeSource->ungetChar();

        if (c == '\n') --*nlines;

        return true;

      }


      mimeSource->getChar(&c);

      return false;

    }


    content += c;

  }


  if (name != "") {

    if (content.length() > 2)

      content.resize(content.length() - 2);

    header->add(name, content);

  }


  return !(eof || endOfHeaders);

}


//------------------------------------------------------------------------

static void parseHeader(Binc::Header *header, unsigned int *nlines)

{

  while (parseOneHeaderLine(header, nlines))

  { }

}


//------------------------------------------------------------------------

static void analyzeHeader(Binc::Header *header, bool *multipart,

                          bool *messagerfc822, string *subtype, string *boundary)

{

  using namespace ::Binc;


  // Do simple parsing of headers to determine the

  // type of message (multipart,messagerfc822 etc)

  HeaderItem ctype;

  if (header->getFirstHeader("content-type", ctype)) {

    vector<string> types;

    split(ctype.getValue(), ";", types);


    if (types.size() > 0) {

      // first element should describe content type

      string tmp = types[0];

      trim(tmp);

      vector<string> v;

      split(tmp, "/", v);

      string key, value;


      key = (v.size() > 0) ? v[0] : "text";

      value = (v.size() > 1) ? v[1] : "plain";

      lowercase(key);


      if (key == "multipart") {

        *multipart = true;

        lowercase(value);

        *subtype = value;

      } else if (key == "message") {

        lowercase(value);

        if (value == "rfc822")

          *messagerfc822 = true;

      }

    }


    for (vector<string>::const_iterator i = types.begin();

      i != types.end(); ++i) {

      string element = *i;

      trim(element);


      if (element.find("=") != string::npos) {

        string::size_type pos = element.find('=');

        string key = element.substr(0, pos);

        string value = element.substr(pos + 1);


        lowercase(key);

        trim(key);


        if (key == "boundary") {

          trim(value, " \"");

          *boundary = value;

        }

      }

    }

  }

}


static void parseMessageRFC822(vector<Binc::MimePart> *members,

                               bool *foundendofpart,

                               unsigned int *bodylength,

                               unsigned int *nbodylines,

                               const string &toboundary)

{

  using namespace ::Binc;


  // message rfc822 means a completely enclosed mime document. we

  // call the parser recursively, and pass on the boundary string

  // that we got. when parse() finds this boundary, it returns 0. if

  // it finds the end boundary (boundary + "--"), it returns != 0.

  MimePart m;


  unsigned int bodystartoffsetcrlf = mimeSource->getOffset();


  // parsefull returns the number of bytes that need to be removed

  // from the body because of the terminating boundary string.

  int bsize = 0;

  if (m.parseFull(toboundary, bsize))

    *foundendofpart = true;


  // make sure bodylength doesn't overflow

  *bodylength = mimeSource->getOffset();

  if (*bodylength >= bodystartoffsetcrlf) {

    *bodylength -= bodystartoffsetcrlf;

    if (*bodylength >= (unsigned int) bsize) {

      *bodylength -= (unsigned int) bsize;

    } else {

      *bodylength = 0;

    }

  } else {

    *bodylength = 0;

  }


  *nbodylines += m.getNofLines();


  members->push_back(m);

}


static bool skipUntilBoundary(const string &delimiter,

                              unsigned int *nlines, bool *eof)

{

  int endpos = delimiter.length();

  char *delimiterqueue = 0;

  int delimiterpos = 0;

  const char *delimiterStr = delimiter.c_str();

  if (delimiter != "") {

    delimiterqueue = new char[endpos];

    memset(delimiterqueue, 0, endpos);

  }


  // first, skip to the first delimiter string. Anything between the

  // header and the first delimiter string is simply ignored (it's

  // usually a text message intended for non-mime clients)

  char c;


  bool foundBoundary = false;

  for (;;) {

    if (!mimeSource->getChar(&c)) {

      *eof = true;

      break;

    }


    if (c == '\n') ++*nlines;


    // if there is no delimiter, we just read until the end of the

    // file.

    if (!delimiterqueue) continue;


    delimiterqueue[delimiterpos++ % endpos] = c;


    if (compareStringToQueue(delimiterStr, delimiterqueue,

        delimiterpos, endpos)) {

      foundBoundary = true;

      break;

    }

  }


  delete[] delimiterqueue;

  delimiterqueue = 0;


  return foundBoundary;

}


static void parseMultipart(const string &boundary,

                           const string &toboundary,

                           bool *eof,

                           unsigned int *nlines,

                           int *boundarysize,

                           bool *foundendofpart,

                           unsigned int *bodylength,

                           vector<Binc::MimePart> *members)

{

  using namespace ::Binc;

  unsigned int bodystartoffsetcrlf = mimeSource->getOffset();


  // multipart parsing starts with skipping to the first

  // boundary. then we call parse() for all parts. the last parse()

  // command will return a code indicating that it found the last

  // boundary of this multipart. Note that the first boundary does

  // not have to start with CRLF.

  string delimiter = "--" + boundary;


  skipUntilBoundary(delimiter, nlines, eof);


  if (!eof) *boundarysize = delimiter.size();


  // Read two more characters. This may be CRLF, it may be "--" and

  // it may be any other two characters.


  char a;

  if (!mimeSource->getChar(&a)) *eof = true;

  if (a == '\n') ++*nlines;


  char b;

  if (!mimeSource->getChar(&b)) *eof = true;

  if (b == '\n') ++*nlines;


  // If we find two dashes after the boundary, then this is the end

  // of boundary marker.

  if (!*eof) {

    if (a == '-' && b == '-') {

      *foundendofpart = true;

      *boundarysize += 2;


      if (!mimeSource->getChar(&a)) *eof = true;

      if (a == '\n') ++*nlines;

      if (!mimeSource->getChar(&b)) *eof = true;

      if (b == '\n') ++*nlines;

    }


    if (a == '\r' && b == '\n') {

      // This exception is to handle a special case where the

      // delimiter of one part is not followed by CRLF, but

      // immediately followed by a CRLF prefixed delimiter.

      if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))

        *eof = true;

      else if (a == '-' && b == '-') {

        mimeSource->ungetChar();

        mimeSource->ungetChar();

        mimeSource->ungetChar();

        mimeSource->ungetChar();

      } else {

        mimeSource->ungetChar();

        mimeSource->ungetChar();

      }


      *boundarysize += 2;

    } else {

      mimeSource->ungetChar();

      mimeSource->ungetChar();

    }

  }


  // read all mime parts.

  if (!*foundendofpart && !*eof) {

    bool quit = false;

    do {

      MimePart m;


      // If parseFull returns != 0, then it encountered the multipart's

      // final boundary.

      int bsize = 0;

      if (m.parseFull(boundary, bsize)) {

        quit = true;

        *boundarysize = bsize;

      }


      members->push_back(m);


    } while (!quit);

  }


  if (!*foundendofpart && !*eof) {

    // multipart parsing starts with skipping to the first

    // boundary. then we call parse() for all parts. the last parse()

    // command will return a code indicating that it found the last

    // boundary of this multipart. Note that the first boundary does

    // not have to start with CRLF.

    string delimiter = "\r\n--" + toboundary;


    skipUntilBoundary(delimiter, nlines, eof);


    if (!*eof) *boundarysize = delimiter.size();


    // Read two more characters. This may be CRLF, it may be "--" and

    // it may be any other two characters.


    char a = '\0';

    if (!mimeSource->getChar(&a)) *eof = true;

    if (a == '\n') ++*nlines;


    char b = '\0';

    if (!mimeSource->getChar(&b)) *eof = true;

    if (b == '\n') ++*nlines;


    // If we find two dashes after the boundary, then this is the end

    // of boundary marker.

    if (!*eof) {

      if (a == '-' && b == '-') {

        *foundendofpart = true;

        *boundarysize += 2;

        if (!mimeSource->getChar(&a))

          *eof = true;

        if (a == '\n')

          ++*nlines;

        if (!mimeSource->getChar(&b))

          *eof = true;

        if (b == '\n')

          ++*nlines;

      }


      if (a == '\r' && b == '\n') {

    // This exception is to handle a special case where the

    // delimiter of one part is not followed by CRLF, but

    // immediately followed by a CRLF prefixed delimiter.

        if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))

          *eof = true;

        else if (a == '-' && b == '-') {

          mimeSource->ungetChar();

          mimeSource->ungetChar();

          mimeSource->ungetChar();

          mimeSource->ungetChar();

        } else {

          mimeSource->ungetChar();

          mimeSource->ungetChar();

        }


        *boundarysize += 2;

      } else {

        mimeSource->ungetChar();

        mimeSource->ungetChar();

      }

    }

  }


  // make sure bodylength doesn't overflow

  *bodylength = mimeSource->getOffset();

  if (*bodylength >= bodystartoffsetcrlf) {

    *bodylength -= bodystartoffsetcrlf;

    if (*bodylength >= (unsigned int) *boundarysize) {

      *bodylength -= (unsigned int) *boundarysize;

    } else {

      *bodylength = 0;

    }

  } else {

    *bodylength = 0;

  }

}


static void parseSinglePart(const string &toboundary,

                           int *boundarysize,

                           unsigned int *nbodylines,

                           unsigned int *nlines,

                           bool *eof, bool *foundendofpart,

                           unsigned int *bodylength)

{

  using namespace ::Binc;

  unsigned int bodystartoffsetcrlf = mimeSource->getOffset();


  // If toboundary is empty, then we read until the end of the

  // file. Otherwise we will read until we encounter toboundary.

  string _toboundary;

  if (toboundary != "") {

    _toboundary = "\r\n--";

    _toboundary += toboundary;

  }


  //  if (skipUntilBoundary(_toboundary, nlines, eof))

  //    *boundarysize = _toboundary.length();


  char *boundaryqueue = 0;

  int endpos = _toboundary.length();

  if (toboundary != "") {

    boundaryqueue = new char[endpos];

    memset(boundaryqueue, 0, endpos);

  }

  int boundarypos = 0;


  *boundarysize = 0;


  const char *_toboundaryStr = _toboundary.c_str();

  string line;

  bool toboundaryIsEmpty = (toboundary == "");

  char c;

  while (mimeSource->getChar(&c)) {

    if (c == '\n') { ++*nbodylines; ++*nlines; }

    if (toboundaryIsEmpty) continue;


    // find boundary

    boundaryqueue[boundarypos++ % endpos] = c;


    if (compareStringToQueue(_toboundaryStr, boundaryqueue,

        boundarypos, endpos)) {

      *boundarysize = _toboundary.length();

      break;

    }

  }


  delete[] boundaryqueue;


  if (toboundary != "") {


    char a;

    if (!mimeSource->getChar(&a)) *eof = true;

    if (a == '\n') ++*nlines;


    char b;

    if (!mimeSource->getChar(&b)) *eof = true;

    if (b == '\n') ++*nlines;


    if (a == '-' && b == '-') {

      *boundarysize += 2;

      *foundendofpart = true;

      if (!mimeSource->getChar(&a)) *eof = true;

      if (a == '\n') ++*nlines;

      if (!mimeSource->getChar(&b)) *eof = true;

      if (b == '\n') ++*nlines;

    }


    if (a == '\r' && b == '\n') {

      // This exception is to handle a special case where the

      // delimiter of one part is not followed by CRLF, but

      // immediately followed by a CRLF prefixed delimiter.

      if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))

         *eof = true;

      else if (a == '-' && b == '-') {

         mimeSource->ungetChar();

         mimeSource->ungetChar();

         mimeSource->ungetChar();

         mimeSource->ungetChar();

      } else {

        mimeSource->ungetChar();

        mimeSource->ungetChar();

      }


      *boundarysize += 2;

    } else {

      mimeSource->ungetChar();

      mimeSource->ungetChar();

    }

  }


  // make sure bodylength doesn't overflow

  *bodylength = mimeSource->getOffset();

  if (*bodylength >= bodystartoffsetcrlf) {

    *bodylength -= bodystartoffsetcrlf;

    if (*bodylength >= (unsigned int) *boundarysize) {

      *bodylength -= (unsigned int) *boundarysize;

    } else {

      *bodylength = 0;

    }

  } else {

    *bodylength = 0;

  }


}


//------------------------------------------------------------------------

int Binc::MimePart::parseFull(const string &toboundary,

                              int &boundarysize) const

{

  headerstartoffsetcrlf = mimeSource->getOffset();


  // Parse the header of this mime part.

  parseHeader(&h, &nlines);


  // Headerlength includes the seperating CRLF. Body starts after the

  // CRLF.

  headerlength = mimeSource->getOffset() - headerstartoffsetcrlf;

  bodystartoffsetcrlf = mimeSource->getOffset();

  bodylength = 0;


  // Determine the type of mime part by looking at fields in the

  // header.

  analyzeHeader(&h, &multipart, &messagerfc822, &subtype, &boundary);


  bool eof = false;

  bool foundendofpart = false;


  if (messagerfc822) {

    parseMessageRFC822(&members, &foundendofpart, &bodylength,

                       &nbodylines, toboundary);


  } else if (multipart) {

    parseMultipart(boundary, toboundary, &eof, &nlines, &boundarysize,

                  &foundendofpart, &bodylength, &members);

  } else {

    parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines,

                    &eof, &foundendofpart, &bodylength);

  }


  return (eof || foundendofpart) ? 1 : 0;

}

Binc::Header
Definition: mime.h:31

Binc::Header::add
void add(const std::string &name, const std::string &content)
Definition: mime.cc:131

Binc::Header::getFirstHeader
bool getFirstHeader(const std::string &key, HeaderItem &dest) const
Definition: mime.cc:89

Binc::MimeDocument::parseFull
void parseFull(int fd) const
Definition: mime-parsefull.cc:27

Binc::MimeInputSource
Definition: mime-inputsource.h:15

Binc::MimeInputSource::reset
virtual void reset(void)
Definition: mime-inputsource.h:90

Binc::MimeInputSource::getFileDescriptor
int getFileDescriptor(void) const
Definition: mime-inputsource.h:128

Binc::MimeInputSource::getOffset
unsigned int getOffset(void) const
Definition: mime-inputsource.h:133

Binc::MimeInputSource::getChar
bool getChar(char *c)
Definition: mime-inputsource.h:112

Binc::MimeInputSource::ungetChar
void ungetChar(void)
Definition: mime-inputsource.h:122

Binc::MimePart::parseFull
virtual int parseFull(const std::string &toboundary, int &boundarysize) const
Definition: mime-parsefull.cc:569

Binc::MimePart::headerstartoffsetcrlf
unsigned int headerstartoffsetcrlf
Definition: mime.h:57

Binc::MimePart::multipart
bool multipart
Definition: mime.h:52

Binc::MimePart::messagerfc822
bool messagerfc822
Definition: mime.h:53

Binc::MimePart::bodylength
unsigned int bodylength
Definition: mime.h:61

Binc::MimePart::bodystartoffsetcrlf
unsigned int bodystartoffsetcrlf
Definition: mime.h:60

Binc::MimePart::size
unsigned int size
Definition: mime.h:64

Binc::MimePart::headerlength
unsigned int headerlength
Definition: mime.h:58

convert.h
Declaration of miscellaneous convertion functions.

mime-inputsource.h
The base class of the MIME input source.

mimeSource
Binc::MimeInputSource * mimeSource
Definition: mime-parsefull.cc:22

mimeSource
Binc::MimeInputSource * mimeSource
Definition: mime-parsefull.cc:22

mime-utils.h

compareStringToQueue
bool compareStringToQueue(const char *s_in, char *bqueue, int pos, int size)
Definition: mime-utils.h:17

mime.h
Declaration of main mime parser components.

Binc
Definition: bincimapd.cc:9

Binc::split
void split(const std::string &s_in, const std::string &delim, std::vector< std::string > &dest, bool skipempty=true)
Definition: convert.h:177

Binc::lowercase
void lowercase(std::string &input)
Definition: convert.h:122

Binc::trim
void trim(std::string &s_in, const std::string &chars=" \t\r\n")
Definition: convert.h:137