Bincimap 2.0.16
Easy Imapping
Loading...
Searching...
No Matches
mime-parsefull.cc
Go to the documentation of this file.
1
7#include "mime.h"
8#include "mime-utils.h"
9#include "mime-inputsource.h"
10#include "convert.h"
11#include <string>
12#include <vector>
13#include <map>
14#include <exception>
15#include <iostream>
16
17#include <string.h>
18#include <ctype.h>
19#include <stdio.h>
20#include <errno.h>
21
23
24using namespace ::std;
25
26//------------------------------------------------------------------------
28{
29 if (allIsParsed)
30 return;
31
32 allIsParsed = true;
33
34 if (!mimeSource || mimeSource->getFileDescriptor() != fd) {
35 delete mimeSource;
37 } else {
39 }
40
42 headerlength = 0;
44 bodylength = 0;
45 size = 0;
46 messagerfc822 = false;
47 multipart = false;
48
49 int bsize = 0;
50 string bound;
51 MimePart::parseFull(bound, bsize);
52
53 // eat any trailing junk to get the correct size
54 char c;
55 while (mimeSource->getChar(&c));
56
58}
59
60//------------------------------------------------------------------------
61static bool parseOneHeaderLine(Binc::Header *header, unsigned int *nlines)
62{
63 using namespace ::Binc;
64 char c;
65 bool eof = false;
66 char cqueue[4];
67 string name;
68 string content;
69
70 while (mimeSource->getChar(&c)) {
71 // If we encounter a \r before we got to the first ':', then
72 // rewind back to the start of the line and assume we're at the
73 // start of the body.
74 if (c == '\r') {
75 for (int i = 0; i < (int) name.length() + 1; ++i)
77 return false;
78 }
79
80 // A colon marks the end of the header name
81 if (c == ':') break;
82
83 // Otherwise add to the header name
84 name += c;
85 }
86
87 cqueue[0] = '\0';
88 cqueue[1] = '\0';
89 cqueue[2] = '\0';
90 cqueue[3] = '\0';
91
92 // Read until the end of the header.
93 bool endOfHeaders = false;
94 while (!endOfHeaders) {
95 if (!mimeSource->getChar(&c)) {
96 eof = true;
97 break;
98 }
99
100 if (c == '\n') ++*nlines;
101
102 for (int i = 0; i < 3; ++i)
103 cqueue[i] = cqueue[i + 1];
104 cqueue[3] = c;
105
106 if (strncmp(cqueue, "\r\n\r\n", 4) == 0) {
107 endOfHeaders = true;
108 break;
109 }
110
111 // If the last character was a newline, and the first now is not
112 // whitespace, then rewind one character and store the current
113 // key,value pair.
114 if (cqueue[2] == '\n' && c != ' ' && c != '\t') {
115 if (content.length() > 2)
116 content.resize(content.length() - 2);
117
118 trim(content);
119 header->add(name, content);
120
121 if (c != '\r') {
123 if (c == '\n') --*nlines;
124 return true;
125 }
126
127 mimeSource->getChar(&c);
128 return false;
129 }
130
131 content += c;
132 }
133
134 if (name != "") {
135 if (content.length() > 2)
136 content.resize(content.length() - 2);
137 header->add(name, content);
138 }
139
140 return !(eof || endOfHeaders);
141}
142
143//------------------------------------------------------------------------
144static void parseHeader(Binc::Header *header, unsigned int *nlines)
145{
146 while (parseOneHeaderLine(header, nlines))
147 { }
148}
149
150//------------------------------------------------------------------------
151static void analyzeHeader(Binc::Header *header, bool *multipart,
152 bool *messagerfc822, string *subtype, string *boundary)
153{
154 using namespace ::Binc;
155
156 // Do simple parsing of headers to determine the
157 // type of message (multipart,messagerfc822 etc)
158 HeaderItem ctype;
159 if (header->getFirstHeader("content-type", ctype)) {
160 vector<string> types;
161 split(ctype.getValue(), ";", types);
162
163 if (types.size() > 0) {
164 // first element should describe content type
165 string tmp = types[0];
166 trim(tmp);
167 vector<string> v;
168 split(tmp, "/", v);
169 string key, value;
170
171 key = (v.size() > 0) ? v[0] : "text";
172 value = (v.size() > 1) ? v[1] : "plain";
173 lowercase(key);
174
175 if (key == "multipart") {
176 *multipart = true;
177 lowercase(value);
178 *subtype = value;
179 } else if (key == "message") {
180 lowercase(value);
181 if (value == "rfc822")
182 *messagerfc822 = true;
183 }
184 }
185
186 for (vector<string>::const_iterator i = types.begin();
187 i != types.end(); ++i) {
188 string element = *i;
189 trim(element);
190
191 if (element.find("=") != string::npos) {
192 string::size_type pos = element.find('=');
193 string key = element.substr(0, pos);
194 string value = element.substr(pos + 1);
195
196 lowercase(key);
197 trim(key);
198
199 if (key == "boundary") {
200 trim(value, " \"");
201 *boundary = value;
202 }
203 }
204 }
205 }
206}
207
208static void parseMessageRFC822(vector<Binc::MimePart> *members,
209 bool *foundendofpart,
210 unsigned int *bodylength,
211 unsigned int *nbodylines,
212 const string &toboundary)
213{
214 using namespace ::Binc;
215
216 // message rfc822 means a completely enclosed mime document. we
217 // call the parser recursively, and pass on the boundary string
218 // that we got. when parse() finds this boundary, it returns 0. if
219 // it finds the end boundary (boundary + "--"), it returns != 0.
220 MimePart m;
221
222 unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
223
224 // parsefull returns the number of bytes that need to be removed
225 // from the body because of the terminating boundary string.
226 int bsize = 0;
227 if (m.parseFull(toboundary, bsize))
228 *foundendofpart = true;
229
230 // make sure bodylength doesn't overflow
231 *bodylength = mimeSource->getOffset();
232 if (*bodylength >= bodystartoffsetcrlf) {
233 *bodylength -= bodystartoffsetcrlf;
234 if (*bodylength >= (unsigned int) bsize) {
235 *bodylength -= (unsigned int) bsize;
236 } else {
237 *bodylength = 0;
238 }
239 } else {
240 *bodylength = 0;
241 }
242
243 *nbodylines += m.getNofLines();
244
245 members->push_back(m);
246}
247
248static bool skipUntilBoundary(const string &delimiter,
249 unsigned int *nlines, bool *eof)
250{
251 int endpos = delimiter.length();
252 char *delimiterqueue = 0;
253 int delimiterpos = 0;
254 const char *delimiterStr = delimiter.c_str();
255 if (delimiter != "") {
256 delimiterqueue = new char[endpos];
257 memset(delimiterqueue, 0, endpos);
258 }
259
260 // first, skip to the first delimiter string. Anything between the
261 // header and the first delimiter string is simply ignored (it's
262 // usually a text message intended for non-mime clients)
263 char c;
264
265 bool foundBoundary = false;
266 for (;;) {
267 if (!mimeSource->getChar(&c)) {
268 *eof = true;
269 break;
270 }
271
272 if (c == '\n') ++*nlines;
273
274 // if there is no delimiter, we just read until the end of the
275 // file.
276 if (!delimiterqueue) continue;
277
278 delimiterqueue[delimiterpos++ % endpos] = c;
279
280 if (compareStringToQueue(delimiterStr, delimiterqueue,
281 delimiterpos, endpos)) {
282 foundBoundary = true;
283 break;
284 }
285 }
286
287 delete[] delimiterqueue;
288 delimiterqueue = 0;
289
290 return foundBoundary;
291}
292
293
294static void parseMultipart(const string &boundary,
295 const string &toboundary,
296 bool *eof,
297 unsigned int *nlines,
298 int *boundarysize,
299 bool *foundendofpart,
300 unsigned int *bodylength,
301 vector<Binc::MimePart> *members)
302{
303 using namespace ::Binc;
304 unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
305
306 // multipart parsing starts with skipping to the first
307 // boundary. then we call parse() for all parts. the last parse()
308 // command will return a code indicating that it found the last
309 // boundary of this multipart. Note that the first boundary does
310 // not have to start with CRLF.
311 string delimiter = "--" + boundary;
312
313 skipUntilBoundary(delimiter, nlines, eof);
314
315 if (!eof) *boundarysize = delimiter.size();
316
317 // Read two more characters. This may be CRLF, it may be "--" and
318 // it may be any other two characters.
319
320 char a;
321 if (!mimeSource->getChar(&a)) *eof = true;
322 if (a == '\n') ++*nlines;
323
324 char b;
325 if (!mimeSource->getChar(&b)) *eof = true;
326 if (b == '\n') ++*nlines;
327
328 // If we find two dashes after the boundary, then this is the end
329 // of boundary marker.
330 if (!*eof) {
331 if (a == '-' && b == '-') {
332 *foundendofpart = true;
333 *boundarysize += 2;
334
335 if (!mimeSource->getChar(&a)) *eof = true;
336 if (a == '\n') ++*nlines;
337 if (!mimeSource->getChar(&b)) *eof = true;
338 if (b == '\n') ++*nlines;
339 }
340
341 if (a == '\r' && b == '\n') {
342 // This exception is to handle a special case where the
343 // delimiter of one part is not followed by CRLF, but
344 // immediately followed by a CRLF prefixed delimiter.
345 if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
346 *eof = true;
347 else if (a == '-' && b == '-') {
352 } else {
355 }
356
357 *boundarysize += 2;
358 } else {
361 }
362 }
363
364 // read all mime parts.
365 if (!*foundendofpart && !*eof) {
366 bool quit = false;
367 do {
368 MimePart m;
369
370 // If parseFull returns != 0, then it encountered the multipart's
371 // final boundary.
372 int bsize = 0;
373 if (m.parseFull(boundary, bsize)) {
374 quit = true;
375 *boundarysize = bsize;
376 }
377
378 members->push_back(m);
379
380 } while (!quit);
381 }
382
383 if (!*foundendofpart && !*eof) {
384 // multipart parsing starts with skipping to the first
385 // boundary. then we call parse() for all parts. the last parse()
386 // command will return a code indicating that it found the last
387 // boundary of this multipart. Note that the first boundary does
388 // not have to start with CRLF.
389 string delimiter = "\r\n--" + toboundary;
390
391 skipUntilBoundary(delimiter, nlines, eof);
392
393 if (!*eof) *boundarysize = delimiter.size();
394
395 // Read two more characters. This may be CRLF, it may be "--" and
396 // it may be any other two characters.
397
398 char a = '\0';
399 if (!mimeSource->getChar(&a)) *eof = true;
400 if (a == '\n') ++*nlines;
401
402 char b = '\0';
403 if (!mimeSource->getChar(&b)) *eof = true;
404 if (b == '\n') ++*nlines;
405
406 // If we find two dashes after the boundary, then this is the end
407 // of boundary marker.
408 if (!*eof) {
409 if (a == '-' && b == '-') {
410 *foundendofpart = true;
411 *boundarysize += 2;
412 if (!mimeSource->getChar(&a))
413 *eof = true;
414 if (a == '\n')
415 ++*nlines;
416 if (!mimeSource->getChar(&b))
417 *eof = true;
418 if (b == '\n')
419 ++*nlines;
420 }
421
422 if (a == '\r' && b == '\n') {
423 // This exception is to handle a special case where the
424 // delimiter of one part is not followed by CRLF, but
425 // immediately followed by a CRLF prefixed delimiter.
426 if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
427 *eof = true;
428 else if (a == '-' && b == '-') {
433 } else {
436 }
437
438 *boundarysize += 2;
439 } else {
442 }
443 }
444 }
445
446 // make sure bodylength doesn't overflow
447 *bodylength = mimeSource->getOffset();
448 if (*bodylength >= bodystartoffsetcrlf) {
449 *bodylength -= bodystartoffsetcrlf;
450 if (*bodylength >= (unsigned int) *boundarysize) {
451 *bodylength -= (unsigned int) *boundarysize;
452 } else {
453 *bodylength = 0;
454 }
455 } else {
456 *bodylength = 0;
457 }
458}
459
460static void parseSinglePart(const string &toboundary,
461 int *boundarysize,
462 unsigned int *nbodylines,
463 unsigned int *nlines,
464 bool *eof, bool *foundendofpart,
465 unsigned int *bodylength)
466{
467 using namespace ::Binc;
468 unsigned int bodystartoffsetcrlf = mimeSource->getOffset();
469
470 // If toboundary is empty, then we read until the end of the
471 // file. Otherwise we will read until we encounter toboundary.
472 string _toboundary;
473 if (toboundary != "") {
474 _toboundary = "\r\n--";
475 _toboundary += toboundary;
476 }
477
478 // if (skipUntilBoundary(_toboundary, nlines, eof))
479 // *boundarysize = _toboundary.length();
480
481 char *boundaryqueue = 0;
482 int endpos = _toboundary.length();
483 if (toboundary != "") {
484 boundaryqueue = new char[endpos];
485 memset(boundaryqueue, 0, endpos);
486 }
487 int boundarypos = 0;
488
489 *boundarysize = 0;
490
491 const char *_toboundaryStr = _toboundary.c_str();
492 string line;
493 bool toboundaryIsEmpty = (toboundary == "");
494 char c;
495 while (mimeSource->getChar(&c)) {
496 if (c == '\n') { ++*nbodylines; ++*nlines; }
497 if (toboundaryIsEmpty) continue;
498
499 // find boundary
500 boundaryqueue[boundarypos++ % endpos] = c;
501
502 if (compareStringToQueue(_toboundaryStr, boundaryqueue,
503 boundarypos, endpos)) {
504 *boundarysize = _toboundary.length();
505 break;
506 }
507 }
508
509 delete[] boundaryqueue;
510
511 if (toboundary != "") {
512
513 char a;
514 if (!mimeSource->getChar(&a)) *eof = true;
515 if (a == '\n') ++*nlines;
516
517 char b;
518 if (!mimeSource->getChar(&b)) *eof = true;
519 if (b == '\n') ++*nlines;
520
521 if (a == '-' && b == '-') {
522 *boundarysize += 2;
523 *foundendofpart = true;
524 if (!mimeSource->getChar(&a)) *eof = true;
525 if (a == '\n') ++*nlines;
526 if (!mimeSource->getChar(&b)) *eof = true;
527 if (b == '\n') ++*nlines;
528 }
529
530 if (a == '\r' && b == '\n') {
531 // This exception is to handle a special case where the
532 // delimiter of one part is not followed by CRLF, but
533 // immediately followed by a CRLF prefixed delimiter.
534 if (!mimeSource->getChar(&a) || !mimeSource->getChar(&b))
535 *eof = true;
536 else if (a == '-' && b == '-') {
541 } else {
544 }
545
546 *boundarysize += 2;
547 } else {
550 }
551 }
552
553 // make sure bodylength doesn't overflow
554 *bodylength = mimeSource->getOffset();
555 if (*bodylength >= bodystartoffsetcrlf) {
556 *bodylength -= bodystartoffsetcrlf;
557 if (*bodylength >= (unsigned int) *boundarysize) {
558 *bodylength -= (unsigned int) *boundarysize;
559 } else {
560 *bodylength = 0;
561 }
562 } else {
563 *bodylength = 0;
564 }
565
566}
567
568//------------------------------------------------------------------------
569int Binc::MimePart::parseFull(const string &toboundary,
570 int &boundarysize) const
571{
572 headerstartoffsetcrlf = mimeSource->getOffset();
573
574 // Parse the header of this mime part.
575 parseHeader(&h, &nlines);
576
577 // Headerlength includes the seperating CRLF. Body starts after the
578 // CRLF.
579 headerlength = mimeSource->getOffset() - headerstartoffsetcrlf;
580 bodystartoffsetcrlf = mimeSource->getOffset();
581 bodylength = 0;
582
583 // Determine the type of mime part by looking at fields in the
584 // header.
585 analyzeHeader(&h, &multipart, &messagerfc822, &subtype, &boundary);
586
587 bool eof = false;
588 bool foundendofpart = false;
589
590 if (messagerfc822) {
591 parseMessageRFC822(&members, &foundendofpart, &bodylength,
592 &nbodylines, toboundary);
593
594 } else if (multipart) {
595 parseMultipart(boundary, toboundary, &eof, &nlines, &boundarysize,
596 &foundendofpart, &bodylength, &members);
597 } else {
598 parseSinglePart(toboundary, &boundarysize, &nbodylines, &nlines,
599 &eof, &foundendofpart, &bodylength);
600 }
601
602 return (eof || foundendofpart) ? 1 : 0;
603}
void add(const std::string &name, const std::string &content)
Definition: mime.cc:131
bool getFirstHeader(const std::string &key, HeaderItem &dest) const
Definition: mime.cc:89
void parseFull(int fd) const
virtual void reset(void)
int getFileDescriptor(void) const
unsigned int getOffset(void) const
virtual int parseFull(const std::string &toboundary, int &boundarysize) const
unsigned int headerstartoffsetcrlf
Definition: mime.h:57
bool multipart
Definition: mime.h:52
bool messagerfc822
Definition: mime.h:53
unsigned int bodylength
Definition: mime.h:61
unsigned int bodystartoffsetcrlf
Definition: mime.h:60
unsigned int size
Definition: mime.h:64
unsigned int headerlength
Definition: mime.h:58
Declaration of miscellaneous convertion functions.
The base class of the MIME input source.
Binc::MimeInputSource * mimeSource
Binc::MimeInputSource * mimeSource
bool compareStringToQueue(const char *s_in, char *bqueue, int pos, int size)
Definition: mime-utils.h:17
Declaration of main mime parser components.
Definition: bincimapd.cc:9
void split(const std::string &s_in, const std::string &delim, std::vector< std::string > &dest, bool skipempty=true)
Definition: convert.h:177
void lowercase(std::string &input)
Definition: convert.h:122
void trim(std::string &s_in, const std::string &chars=" \t\r\n")
Definition: convert.h:137