diff options
Diffstat (limited to 'src/token822.c')
-rw-r--r-- | src/token822.c | 423 |
1 files changed, 259 insertions, 164 deletions
diff --git a/src/token822.c b/src/token822.c index 239887c..e1048c6 100644 --- a/src/token822.c +++ b/src/token822.c @@ -1,10 +1,11 @@ -#include "stralloc.h" +#include "token822.h" + #include "alloc.h" #include "genalloc.h" #include "str.h" -#include "token822.h" +#include "stralloc.h" -static struct token822 comma = { TOKEN822_COMMA }; +static struct token822 comma = {TOKEN822_COMMA}; void token822_reverse(token822_alloc *ta) { @@ -17,14 +18,15 @@ void token822_reverse(token822_alloc *ta) temp = ta->t[i]; ta->t[i] = ta->t[n - i]; ta->t[n - i] = temp; - } + } } -GEN_ALLOC_ready(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_ready) -GEN_ALLOC_readyplus(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus) -GEN_ALLOC_append(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus,token822_append) +GEN_ALLOC_ready(token822_alloc, struct token822, t, len, a, i, n, x, 30, token822_ready) + GEN_ALLOC_readyplus(token822_alloc, struct token822, t, len, a, i, n, x, 30, token822_readyplus) + GEN_ALLOC_append( + token822_alloc, struct token822, t, len, a, i, n, x, 30, token822_readyplus, token822_append) -static int needspace(int t1,int t2) + static int needspace(int t1, int t2) { if (!t1) return 0; if (t1 == TOKEN822_COLON) return 1; @@ -32,12 +34,15 @@ static int needspace(int t1,int t2) if (t2 == TOKEN822_LEFT) return 1; switch (t1) { - case TOKEN822_ATOM: case TOKEN822_LITERAL: - case TOKEN822_QUOTE: case TOKEN822_COMMENT: + case TOKEN822_ATOM: + case TOKEN822_LITERAL: + case TOKEN822_QUOTE: + case TOKEN822_COMMENT: switch (t2) { - case TOKEN822_ATOM: case TOKEN822_LITERAL: - case TOKEN822_QUOTE: case TOKEN822_COMMENT: - return 1; + case TOKEN822_ATOM: + case TOKEN822_LITERAL: + case TOKEN822_QUOTE: + case TOKEN822_COMMENT: return 1; } } return 0; @@ -46,11 +51,20 @@ static int needspace(int t1,int t2) static int atomok(char ch) { switch (ch) { - case ' ': case '\t': case '\r': case '\n': - case '(': case '[': case '"': - case '<': case '>': case ';': case ':': - case '@': case ',': case '.': - return 0; + case ' ': + case '\t': + case '\r': + case '\n': + case '(': + case '[': + case '"': + case '<': + case '>': + case ';': + case ':': + case '@': + case ',': + case '.': return 0; } return 1; } @@ -59,7 +73,7 @@ static void atomcheck(struct token822 *t) { int i; char ch; - + for (i = 0; i < t->slen; ++i) { ch = t->s[i]; if ((ch < 32) || (ch > 126) || (ch == ')') || (ch == ']') || (ch == '\\')) { @@ -69,7 +83,7 @@ static void atomcheck(struct token822 *t) } } -int token822_unparse(stralloc *sa,token822_alloc *ta,unsigned int linelen) +int token822_unparse(stralloc *sa, token822_alloc *ta, unsigned int linelen) { struct token822 *t; int len; @@ -88,29 +102,40 @@ int token822_unparse(stralloc *sa,token822_alloc *ta,unsigned int linelen) for (i = 0; i < ta->len; ++i) { t = ta->t + i; newtype = t->type; - if (needspace(lasttype,newtype)) ++len; + if (needspace(lasttype, newtype)) ++len; lasttype = newtype; switch (newtype) { - case TOKEN822_COMMA: - len += 3; break; - case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: case TOKEN822_RIGHT: - case TOKEN822_SEMI: case TOKEN822_COLON: - ++len; break; - case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT: + case TOKEN822_COMMA: len += 3; break; + case TOKEN822_AT: + case TOKEN822_DOT: + case TOKEN822_LEFT: + case TOKEN822_RIGHT: + case TOKEN822_SEMI: + case TOKEN822_COLON: ++len; break; + case TOKEN822_ATOM: + case TOKEN822_QUOTE: + case TOKEN822_LITERAL: + case TOKEN822_COMMENT: if (t->type != TOKEN822_ATOM) len += 2; - for (j = 0; j < t->slen; ++j) - switch (ch = t->s[j]) { - case '"': case '[': case ']': case '(': case ')': - case '\\': case '\r': case '\n': ++len; - default: ++len; + for (j = 0; j < t->slen; ++j) switch (ch = t->s[j]) + { + case '"': + case '[': + case ']': + case '(': + case ')': + case '\\': + case '\r': + case '\n': ++len; + default: ++len; } - break; + break; } } len += 2; - if (!stralloc_ready(sa,len)) return -1; + if (!stralloc_ready(sa, len)) return -1; s = sa->s; lineb = s; @@ -121,49 +146,64 @@ int token822_unparse(stralloc *sa,token822_alloc *ta,unsigned int linelen) for (i = 0; i < ta->len; ++i) { t = ta->t + i; newtype = t->type; - if (needspace(lasttype,newtype)) *s++ = ' '; + if (needspace(lasttype, newtype)) *s++ = ' '; lasttype = newtype; switch (newtype) { - case TOKEN822_COMMA: - *s++ = ','; -#define NSUW \ - s[0] = '\n'; s[1] = ' '; \ - if (linee && (!linelen || (s - lineb <= linelen))) \ - { while (linee < s) { linee[0] = linee[2]; ++linee; } linee -= 2; } \ - else { if (linee) lineb = linee + 1; linee = s; s += 2; } - NSUW - break; - case TOKEN822_AT: *s++ = '@'; break; - case TOKEN822_DOT: *s++ = '.'; break; - case TOKEN822_LEFT: *s++ = '<'; break; - case TOKEN822_RIGHT: *s++ = '>'; break; - case TOKEN822_SEMI: *s++ = ';'; break; - case TOKEN822_COLON: *s++ = ':'; break; - case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT: - if (t->type == TOKEN822_QUOTE) *s++ = '"'; - if (t->type == TOKEN822_LITERAL) *s++ = '['; - if (t->type == TOKEN822_COMMENT) *s++ = '('; - - for (j = 0; j < t->slen; ++j) - switch (ch = t->s[j]) { - case '"': case '[': case ']': case '(': case ')': - case '\\': case '\r': case '\n': *s++ = '\\'; - default: *s++ = ch; - } - if (t->type == TOKEN822_QUOTE) *s++ = '"'; - if (t->type == TOKEN822_LITERAL) *s++ = ']'; - if (t->type == TOKEN822_COMMENT) *s++ = ')'; - break; + case TOKEN822_COMMA: *s++ = ','; +#define NSUW \ + s[0] = '\n'; \ + s[1] = ' '; \ + if (linee && (!linelen || (s - lineb <= linelen))) { \ + while (linee < s) { \ + linee[0] = linee[2]; \ + ++linee; \ + } \ + linee -= 2; \ + } else { \ + if (linee) lineb = linee + 1; \ + linee = s; \ + s += 2; \ + } + NSUW break; + case TOKEN822_AT: *s++ = '@'; break; + case TOKEN822_DOT: *s++ = '.'; break; + case TOKEN822_LEFT: *s++ = '<'; break; + case TOKEN822_RIGHT: *s++ = '>'; break; + case TOKEN822_SEMI: *s++ = ';'; break; + case TOKEN822_COLON: *s++ = ':'; break; + case TOKEN822_ATOM: + case TOKEN822_QUOTE: + case TOKEN822_LITERAL: + case TOKEN822_COMMENT: + if (t->type == TOKEN822_QUOTE) *s++ = '"'; + if (t->type == TOKEN822_LITERAL) *s++ = '['; + if (t->type == TOKEN822_COMMENT) *s++ = '('; + + for (j = 0; j < t->slen; ++j) switch (ch = t->s[j]) + { + case '"': + case '[': + case ']': + case '(': + case ')': + case '\\': + case '\r': + case '\n': *s++ = '\\'; + default: *s++ = ch; + } + if (t->type == TOKEN822_QUOTE) *s++ = '"'; + if (t->type == TOKEN822_LITERAL) *s++ = ']'; + if (t->type == TOKEN822_COMMENT) *s++ = ')'; + break; } } - NSUW - --s; - sa->len = s - sa->s; - return 1; + NSUW-- s; + sa->len = s - sa->s; + return 1; } -int token822_unquote(stralloc *sa,token822_alloc *ta) +int token822_unquote(stralloc *sa, token822_alloc *ta) { struct token822 *t; int len; @@ -176,17 +216,20 @@ int token822_unquote(stralloc *sa,token822_alloc *ta) for (i = 0; i < ta->len; ++i) { t = ta->t + i; switch (t->type) { - case TOKEN822_COMMA: case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: - case TOKEN822_RIGHT: case TOKEN822_SEMI: case TOKEN822_COLON: - ++len; break; - case TOKEN822_LITERAL: - len += 2; - case TOKEN822_ATOM: case TOKEN822_QUOTE: - len += t->slen; + case TOKEN822_COMMA: + case TOKEN822_AT: + case TOKEN822_DOT: + case TOKEN822_LEFT: + case TOKEN822_RIGHT: + case TOKEN822_SEMI: + case TOKEN822_COLON: ++len; break; + case TOKEN822_LITERAL: len += 2; + case TOKEN822_ATOM: + case TOKEN822_QUOTE: len += t->slen; } } - if (!stralloc_ready(sa,len)) return -1; + if (!stralloc_ready(sa, len)) return -1; s = sa->s; @@ -194,16 +237,17 @@ int token822_unquote(stralloc *sa,token822_alloc *ta) t = ta->t + i; switch (t->type) { case TOKEN822_COMMA: *s++ = ','; break; - case TOKEN822_AT: *s++ = '@'; break; - case TOKEN822_DOT: *s++ = '.'; break; - case TOKEN822_LEFT: *s++ = '<'; break; + case TOKEN822_AT: *s++ = '@'; break; + case TOKEN822_DOT: *s++ = '.'; break; + case TOKEN822_LEFT: *s++ = '<'; break; case TOKEN822_RIGHT: *s++ = '>'; break; - case TOKEN822_SEMI: *s++ = ';'; break; + case TOKEN822_SEMI: *s++ = ';'; break; case TOKEN822_COLON: *s++ = ':'; break; - case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: + case TOKEN822_ATOM: + case TOKEN822_QUOTE: + case TOKEN822_LITERAL: if (t->type == TOKEN822_LITERAL) *s++ = '['; - for (j = 0; j < t->slen; ++j) - *s++ = t->s[j]; + for (j = 0; j < t->slen; ++j) *s++ = t->s[j]; if (t->type == TOKEN822_LITERAL) *s++ = ']'; break; case TOKEN822_COMMENT: break; @@ -213,7 +257,7 @@ int token822_unquote(stralloc *sa,token822_alloc *ta) return 1; } -int token822_parse(token822_alloc *ta,stralloc *sa,stralloc *buf) +int token822_parse(token822_alloc *ta, stralloc *sa, stralloc *buf) { int i; int salen; @@ -228,13 +272,23 @@ int token822_parse(token822_alloc *ta,stralloc *sa,stralloc *buf) numchars = 0; numtoks = 0; - for (i = 0; i < salen; ++i) - switch (sa->s[i]) { - case '.': case ',': case '@': case '<': case '>': case ':': case ';': - ++numtoks; break; - case ' ': case '\t': case '\r': case '\n': break; - case ')': case ']': return 0; - /* other control chars and non-ASCII chars are also bad, in theory */ + for (i = 0; i < salen; ++i) switch (sa->s[i]) + { + case '.': + case ',': + case '@': + case '<': + case '>': + case ':': + case ';': ++numtoks; break; + case ' ': + case '\t': + case '\r': + case '\n': break; + case ')': + case ']': + return 0; + /* other control chars and non-ASCII chars are also bad, in theory */ case '(': level = 1; while (level) { @@ -242,8 +296,9 @@ int token822_parse(token822_alloc *ta,stralloc *sa,stralloc *buf) switch (sa->s[i]) { case '(': ++level; break; case ')': --level; break; - case '\\': if (++i >= salen) return 0; - default: ++numchars; + case '\\': + if (++i >= salen) return 0; + default: ++numchars; } } ++numtoks; @@ -254,7 +309,8 @@ int token822_parse(token822_alloc *ta,stralloc *sa,stralloc *buf) if (++i >= salen) return 0; switch (sa->s[i]) { case '"': --level; break; - case '\\': if (++i >= salen) return 0; + case '\\': + if (++i >= salen) return 0; default: ++numchars; } } @@ -266,84 +322,120 @@ int token822_parse(token822_alloc *ta,stralloc *sa,stralloc *buf) if (++i >= salen) return 0; switch (sa->s[i]) { case ']': --level; break; - case '\\': if (++i >= salen) return 0; - default: ++numchars; + case '\\': + if (++i >= salen) return 0; + default: ++numchars; } } ++numtoks; break; default: do { - if (sa->s[i] == '\\') if (++i >= salen) break; + if (sa->s[i] == '\\') + if (++i >= salen) break; ++numchars; - if (++i >= salen) break; + if (++i >= salen) break; } while (atomok(sa->s[i])); --i; ++numtoks; } - if (!token822_ready(ta,numtoks)) return -1; - if (!stralloc_ready(buf,numchars)) return -1; + if (!token822_ready(ta, numtoks)) return -1; + if (!stralloc_ready(buf, numchars)) return -1; cbuf = buf->s; ta->len = numtoks; t = ta->t; - for (i = 0; i < salen; ++i) - switch (sa->s[i]) { - case '.': t->type = TOKEN822_DOT; ++t; break; - case ',': t->type = TOKEN822_COMMA; ++t; break; - case '@': t->type = TOKEN822_AT; ++t; break; - case '<': t->type = TOKEN822_LEFT; ++t; break; - case '>': t->type = TOKEN822_RIGHT; ++t; break; - case ':': t->type = TOKEN822_COLON; ++t; break; - case ';': t->type = TOKEN822_SEMI; ++t; break; - case ' ': case '\t': case '\r': case '\n': break; + for (i = 0; i < salen; ++i) switch (sa->s[i]) + { + case '.': + t->type = TOKEN822_DOT; + ++t; + break; + case ',': + t->type = TOKEN822_COMMA; + ++t; + break; + case '@': + t->type = TOKEN822_AT; + ++t; + break; + case '<': + t->type = TOKEN822_LEFT; + ++t; + break; + case '>': + t->type = TOKEN822_RIGHT; + ++t; + break; + case ':': + t->type = TOKEN822_COLON; + ++t; + break; + case ';': + t->type = TOKEN822_SEMI; + ++t; + break; + case ' ': + case '\t': + case '\r': + case '\n': break; case '(': - t->type = TOKEN822_COMMENT; t->s = cbuf; t->slen = 0; + t->type = TOKEN822_COMMENT; + t->s = cbuf; + t->slen = 0; level = 1; while (level) { ++i; /* assert: < salen */ switch (sa->s[i]) { - case '(': ++level; break; - case ')': --level; break; + case '(': ++level; break; + case ')': --level; break; case '\\': ++i; /* assert: < salen */ - default: *cbuf++ = sa->s[i]; ++t->slen; + default: *cbuf++ = sa->s[i]; ++t->slen; } } ++t; break; case '"': - t->type = TOKEN822_QUOTE; t->s = cbuf; t->slen = 0; + t->type = TOKEN822_QUOTE; + t->s = cbuf; + t->slen = 0; level = 1; while (level) { ++i; /* assert: < salen */ switch (sa->s[i]) { - case '"': --level; break; + case '"': --level; break; case '\\': ++i; /* assert: < salen */ - default: *cbuf++ = sa->s[i]; ++t->slen; + default: *cbuf++ = sa->s[i]; ++t->slen; } } ++t; break; case '[': - t->type = TOKEN822_LITERAL; t->s = cbuf; t->slen = 0; + t->type = TOKEN822_LITERAL; + t->s = cbuf; + t->slen = 0; level = 1; while (level) { ++i; /* assert: < salen */ switch (sa->s[i]) { - case ']': --level; break; + case ']': --level; break; case '\\': ++i; /* assert: < salen */ - default: *cbuf++ = sa->s[i]; ++t->slen; + default: *cbuf++ = sa->s[i]; ++t->slen; } } ++t; break; default: - t->type = TOKEN822_ATOM; t->s = cbuf; t->slen = 0; + t->type = TOKEN822_ATOM; + t->s = cbuf; + t->slen = 0; do { - if (sa->s[i] == '\\') if (++i >= salen) break; - *cbuf++ = sa->s[i]; ++t->slen; + if (sa->s[i] == '\\') + if (++i >= salen) break; + *cbuf++ = sa->s[i]; + ++t->slen; if (++i >= salen) break; } while (atomok(sa->s[i])); atomcheck(t); @@ -353,24 +445,22 @@ int token822_parse(token822_alloc *ta,stralloc *sa,stralloc *buf) return 1; } -static int gotaddr(token822_alloc *taout,token822_alloc *taaddr,int (*callback)()) +static int gotaddr(token822_alloc *taout, token822_alloc *taaddr, int (*callback)()) { int i; - if (callback(taaddr) != 1) - return 0; + if (callback(taaddr) != 1) return 0; + + if (!token822_readyplus(taout, taaddr->len)) return 0; - if (!token822_readyplus(taout,taaddr->len)) - return 0; - - for (i = 0; i < taaddr->len; ++i) - taout->t[taout->len++] = taaddr->t[i]; + for (i = 0; i < taaddr->len; ++i) taout->t[taout->len++] = taaddr->t[i]; taaddr->len = 0; return 1; } -int token822_addrlist(token822_alloc *taout,token822_alloc *taaddr,token822_alloc *ta,int (*callback)()) +int token822_addrlist( + token822_alloc *taout, token822_alloc *taaddr, token822_alloc *ta, int (*callback)()) { struct token822 *t; struct token822 *beginning; @@ -380,23 +470,29 @@ int token822_addrlist(token822_alloc *taout,token822_alloc *taaddr,token822_allo taout->len = 0; taaddr->len = 0; - if (!token822_readyplus(taout,1)) return -1; - if (!token822_readyplus(taaddr,1)) return -1; - + if (!token822_readyplus(taout, 1)) return -1; + if (!token822_readyplus(taaddr, 1)) return -1; + ingroup = 0; wordok = 1; beginning = ta->t + 2; t = ta->t + ta->len - 1; - /* rfc 822 address lists are easy to parse from right to left */ + /* rfc 822 address lists are easy to parse from right to left */ -#define FLUSH if (taaddr->len) if (!gotaddr(taout,taaddr,callback)) return -1; -#define FLUSHCOMMA if (taaddr->len) { \ -if (!gotaddr(taout,taaddr,callback)) return -1; \ -if (!token822_append(taout,&comma)) return -1; } -#define ADDRLEFT if (!token822_append(taaddr,t--)) return -1; -#define OUTLEFT if (!token822_append(taout,t--)) return -1; +#define FLUSH \ + if (taaddr->len) \ + if (!gotaddr(taout, taaddr, callback)) return -1; +#define FLUSHCOMMA \ + if (taaddr->len) { \ + if (!gotaddr(taout, taaddr, callback)) return -1; \ + if (!token822_append(taout, &comma)) return -1; \ + } +#define ADDRLEFT \ + if (!token822_append(taaddr, t--)) return -1; +#define OUTLEFT \ + if (!token822_append(taout, t--)) return -1; while (t >= beginning) { switch (t->type) { @@ -410,35 +506,34 @@ if (!token822_append(taout,&comma)) return -1; } FLUSH if (!ingroup) return 0; ingroup = 0; - while ((t >= beginning) && (t->type != TOKEN822_COMMA)) - OUTLEFT - if (t >= beginning) - OUTLEFT + while ((t >= beginning) && (t->type != TOKEN822_COMMA)) OUTLEFT + if (t >= beginning) OUTLEFT wordok = 1; continue; case TOKEN822_RIGHT: FLUSHCOMMA OUTLEFT - while ((t >= beginning) && (t->type != TOKEN822_LEFT)) - ADDRLEFT - /* important to use address here even if it's empty: <> */ - if (!gotaddr(taout,taaddr,callback)) return -1; + while ((t >= beginning) && (t->type != TOKEN822_LEFT)) ADDRLEFT + /* important to use address here even if it's empty: <> */ + if (!gotaddr(taout, taaddr, callback)) return -1; if (t < beginning) return 0; OUTLEFT - while ((t >= beginning) && ((t->type == TOKEN822_COMMENT) || - (t->type == TOKEN822_ATOM) || (t->type == TOKEN822_QUOTE) || - (t->type == TOKEN822_AT) || (t->type == TOKEN822_DOT))) - OUTLEFT + while ( + (t >= beginning) + && ((t->type == TOKEN822_COMMENT) || (t->type == TOKEN822_ATOM) + || (t->type == TOKEN822_QUOTE) || (t->type == TOKEN822_AT) || (t->type == TOKEN822_DOT))) + OUTLEFT wordok = 0; continue; - case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: - if (!wordok) - FLUSHCOMMA + case TOKEN822_ATOM: + case TOKEN822_QUOTE: + case TOKEN822_LITERAL: + if (!wordok) FLUSHCOMMA wordok = 0; ADDRLEFT continue; case TOKEN822_COMMENT: - /* comment is lexically a space; shouldn't affect wordok */ + /* comment is lexically a space; shouldn't affect wordok */ break; case TOKEN822_COMMA: FLUSH @@ -448,13 +543,13 @@ if (!token822_append(taout,&comma)) return -1; } wordok = 1; ADDRLEFT continue; - } - OUTLEFT - } + } + OUTLEFT + } FLUSH ++t; while (t > ta->t) - if (!token822_append(taout,--t)) return -1; + if (!token822_append(taout, --t)) return -1; token822_reverse(taout); return 1; |