diff options
Diffstat (limited to 'src/token822.c')
-rw-r--r-- | src/token822.c | 461 |
1 files changed, 461 insertions, 0 deletions
diff --git a/src/token822.c b/src/token822.c new file mode 100644 index 0000000..239887c --- /dev/null +++ b/src/token822.c @@ -0,0 +1,461 @@ +#include "stralloc.h" +#include "alloc.h" +#include "genalloc.h" +#include "str.h" +#include "token822.h" + +static struct token822 comma = { TOKEN822_COMMA }; + +void token822_reverse(token822_alloc *ta) +{ + int i; + int n; + struct token822 temp; + + n = ta->len - 1; + for (i = 0; i + i < n; ++i) { + temp = ta->t[i]; + ta->t[i] = ta->t[n - i]; + ta->t[n - i] = temp; + } +} + +GEN_ALLOC_ready(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_ready) +GEN_ALLOC_readyplus(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus) +GEN_ALLOC_append(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus,token822_append) + +static int needspace(int t1,int t2) +{ + if (!t1) return 0; + if (t1 == TOKEN822_COLON) return 1; + if (t1 == TOKEN822_COMMA) return 1; + if (t2 == TOKEN822_LEFT) return 1; + + switch (t1) { + case TOKEN822_ATOM: case TOKEN822_LITERAL: + case TOKEN822_QUOTE: case TOKEN822_COMMENT: + switch (t2) { + case TOKEN822_ATOM: case TOKEN822_LITERAL: + case TOKEN822_QUOTE: case TOKEN822_COMMENT: + return 1; + } + } + return 0; +} + +static int atomok(char ch) +{ + switch (ch) { + case ' ': case '\t': case '\r': case '\n': + case '(': case '[': case '"': + case '<': case '>': case ';': case ':': + case '@': case ',': case '.': + return 0; + } + return 1; +} + +static void atomcheck(struct token822 *t) +{ + int i; + char ch; + + for (i = 0; i < t->slen; ++i) { + ch = t->s[i]; + if ((ch < 32) || (ch > 126) || (ch == ')') || (ch == ']') || (ch == '\\')) { + t->type = TOKEN822_QUOTE; + return; + } + } +} + +int token822_unparse(stralloc *sa,token822_alloc *ta,unsigned int linelen) +{ + struct token822 *t; + int len; + int ch; + int i; + int j; + int lasttype; + int newtype; + char *s; + char *lineb; + char *linee; + + len = 0; + lasttype = 0; + + for (i = 0; i < ta->len; ++i) { + t = ta->t + i; + newtype = t->type; + if (needspace(lasttype,newtype)) ++len; + lasttype = newtype; + + switch (newtype) { + case TOKEN822_COMMA: + len += 3; break; + case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: case TOKEN822_RIGHT: + case TOKEN822_SEMI: case TOKEN822_COLON: + ++len; break; + case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT: + if (t->type != TOKEN822_ATOM) len += 2; + for (j = 0; j < t->slen; ++j) + switch (ch = t->s[j]) { + case '"': case '[': case ']': case '(': case ')': + case '\\': case '\r': case '\n': ++len; + default: ++len; + } + break; + } + } + len += 2; + + if (!stralloc_ready(sa,len)) return -1; + + s = sa->s; + lineb = s; + linee = 0; + + lasttype = 0; + + for (i = 0; i < ta->len; ++i) { + t = ta->t + i; + newtype = t->type; + if (needspace(lasttype,newtype)) *s++ = ' '; + lasttype = newtype; + + switch (newtype) { + case TOKEN822_COMMA: + *s++ = ','; +#define NSUW \ + s[0] = '\n'; s[1] = ' '; \ + if (linee && (!linelen || (s - lineb <= linelen))) \ + { while (linee < s) { linee[0] = linee[2]; ++linee; } linee -= 2; } \ + else { if (linee) lineb = linee + 1; linee = s; s += 2; } + NSUW + break; + case TOKEN822_AT: *s++ = '@'; break; + case TOKEN822_DOT: *s++ = '.'; break; + case TOKEN822_LEFT: *s++ = '<'; break; + case TOKEN822_RIGHT: *s++ = '>'; break; + case TOKEN822_SEMI: *s++ = ';'; break; + case TOKEN822_COLON: *s++ = ':'; break; + case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT: + if (t->type == TOKEN822_QUOTE) *s++ = '"'; + if (t->type == TOKEN822_LITERAL) *s++ = '['; + if (t->type == TOKEN822_COMMENT) *s++ = '('; + + for (j = 0; j < t->slen; ++j) + switch (ch = t->s[j]) { + case '"': case '[': case ']': case '(': case ')': + case '\\': case '\r': case '\n': *s++ = '\\'; + default: *s++ = ch; + } + if (t->type == TOKEN822_QUOTE) *s++ = '"'; + if (t->type == TOKEN822_LITERAL) *s++ = ']'; + if (t->type == TOKEN822_COMMENT) *s++ = ')'; + break; + } + } + NSUW + --s; + sa->len = s - sa->s; + return 1; +} + +int token822_unquote(stralloc *sa,token822_alloc *ta) +{ + struct token822 *t; + int len; + int i; + int j; + char *s; + + len = 0; + + for (i = 0; i < ta->len; ++i) { + t = ta->t + i; + switch (t->type) { + case TOKEN822_COMMA: case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: + case TOKEN822_RIGHT: case TOKEN822_SEMI: case TOKEN822_COLON: + ++len; break; + case TOKEN822_LITERAL: + len += 2; + case TOKEN822_ATOM: case TOKEN822_QUOTE: + len += t->slen; + } + } + + if (!stralloc_ready(sa,len)) return -1; + + s = sa->s; + + for (i = 0; i < ta->len; ++i) { + t = ta->t + i; + switch (t->type) { + case TOKEN822_COMMA: *s++ = ','; break; + case TOKEN822_AT: *s++ = '@'; break; + case TOKEN822_DOT: *s++ = '.'; break; + case TOKEN822_LEFT: *s++ = '<'; break; + case TOKEN822_RIGHT: *s++ = '>'; break; + case TOKEN822_SEMI: *s++ = ';'; break; + case TOKEN822_COLON: *s++ = ':'; break; + case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: + if (t->type == TOKEN822_LITERAL) *s++ = '['; + for (j = 0; j < t->slen; ++j) + *s++ = t->s[j]; + if (t->type == TOKEN822_LITERAL) *s++ = ']'; + break; + case TOKEN822_COMMENT: break; + } + } + sa->len = s - sa->s; + return 1; +} + +int token822_parse(token822_alloc *ta,stralloc *sa,stralloc *buf) +{ + int i; + int salen; + int level; + struct token822 *t; + int numtoks; + int numchars; + char *cbuf; + + salen = sa->len; + + numchars = 0; + numtoks = 0; + + for (i = 0; i < salen; ++i) + switch (sa->s[i]) { + case '.': case ',': case '@': case '<': case '>': case ':': case ';': + ++numtoks; break; + case ' ': case '\t': case '\r': case '\n': break; + case ')': case ']': return 0; + /* other control chars and non-ASCII chars are also bad, in theory */ + case '(': + level = 1; + while (level) { + if (++i >= salen) return 0; + switch (sa->s[i]) { + case '(': ++level; break; + case ')': --level; break; + case '\\': if (++i >= salen) return 0; + default: ++numchars; + } + } + ++numtoks; + break; + case '"': + level = 1; + while (level) { + if (++i >= salen) return 0; + switch (sa->s[i]) { + case '"': --level; break; + case '\\': if (++i >= salen) return 0; + default: ++numchars; + } + } + ++numtoks; + break; + case '[': + level = 1; + while (level) { + if (++i >= salen) return 0; + switch (sa->s[i]) { + case ']': --level; break; + case '\\': if (++i >= salen) return 0; + default: ++numchars; + } + } + ++numtoks; + break; + default: + do { + if (sa->s[i] == '\\') if (++i >= salen) break; + ++numchars; + if (++i >= salen) break; + } while (atomok(sa->s[i])); + --i; + ++numtoks; + } + + if (!token822_ready(ta,numtoks)) return -1; + if (!stralloc_ready(buf,numchars)) return -1; + cbuf = buf->s; + ta->len = numtoks; + + t = ta->t; + + for (i = 0; i < salen; ++i) + switch (sa->s[i]) { + case '.': t->type = TOKEN822_DOT; ++t; break; + case ',': t->type = TOKEN822_COMMA; ++t; break; + case '@': t->type = TOKEN822_AT; ++t; break; + case '<': t->type = TOKEN822_LEFT; ++t; break; + case '>': t->type = TOKEN822_RIGHT; ++t; break; + case ':': t->type = TOKEN822_COLON; ++t; break; + case ';': t->type = TOKEN822_SEMI; ++t; break; + case ' ': case '\t': case '\r': case '\n': break; + case '(': + t->type = TOKEN822_COMMENT; t->s = cbuf; t->slen = 0; + level = 1; + while (level) { + ++i; /* assert: < salen */ + switch (sa->s[i]) { + case '(': ++level; break; + case ')': --level; break; + case '\\': ++i; /* assert: < salen */ + default: *cbuf++ = sa->s[i]; ++t->slen; + } + } + ++t; + break; + case '"': + t->type = TOKEN822_QUOTE; t->s = cbuf; t->slen = 0; + level = 1; + while (level) { + ++i; /* assert: < salen */ + switch (sa->s[i]) { + case '"': --level; break; + case '\\': ++i; /* assert: < salen */ + default: *cbuf++ = sa->s[i]; ++t->slen; + } + } + ++t; + break; + case '[': + t->type = TOKEN822_LITERAL; t->s = cbuf; t->slen = 0; + level = 1; + while (level) { + ++i; /* assert: < salen */ + switch (sa->s[i]) { + case ']': --level; break; + case '\\': ++i; /* assert: < salen */ + default: *cbuf++ = sa->s[i]; ++t->slen; + } + } + ++t; + break; + default: + t->type = TOKEN822_ATOM; t->s = cbuf; t->slen = 0; + do { + if (sa->s[i] == '\\') if (++i >= salen) break; + *cbuf++ = sa->s[i]; ++t->slen; + if (++i >= salen) break; + } while (atomok(sa->s[i])); + atomcheck(t); + --i; + ++t; + } + return 1; +} + +static int gotaddr(token822_alloc *taout,token822_alloc *taaddr,int (*callback)()) +{ + int i; + + if (callback(taaddr) != 1) + return 0; + + if (!token822_readyplus(taout,taaddr->len)) + return 0; + + for (i = 0; i < taaddr->len; ++i) + taout->t[taout->len++] = taaddr->t[i]; + + taaddr->len = 0; + return 1; +} + +int token822_addrlist(token822_alloc *taout,token822_alloc *taaddr,token822_alloc *ta,int (*callback)()) +{ + struct token822 *t; + struct token822 *beginning; + int ingroup; + int wordok; + + taout->len = 0; + taaddr->len = 0; + + if (!token822_readyplus(taout,1)) return -1; + if (!token822_readyplus(taaddr,1)) return -1; + + ingroup = 0; + wordok = 1; + + beginning = ta->t + 2; + t = ta->t + ta->len - 1; + + /* rfc 822 address lists are easy to parse from right to left */ + +#define FLUSH if (taaddr->len) if (!gotaddr(taout,taaddr,callback)) return -1; +#define FLUSHCOMMA if (taaddr->len) { \ +if (!gotaddr(taout,taaddr,callback)) return -1; \ +if (!token822_append(taout,&comma)) return -1; } +#define ADDRLEFT if (!token822_append(taaddr,t--)) return -1; +#define OUTLEFT if (!token822_append(taout,t--)) return -1; + + while (t >= beginning) { + switch (t->type) { + case TOKEN822_SEMI: + FLUSHCOMMA + if (ingroup) return 0; + ingroup = 1; + wordok = 1; + break; + case TOKEN822_COLON: + FLUSH + if (!ingroup) return 0; + ingroup = 0; + while ((t >= beginning) && (t->type != TOKEN822_COMMA)) + OUTLEFT + if (t >= beginning) + OUTLEFT + wordok = 1; + continue; + case TOKEN822_RIGHT: + FLUSHCOMMA + OUTLEFT + while ((t >= beginning) && (t->type != TOKEN822_LEFT)) + ADDRLEFT + /* important to use address here even if it's empty: <> */ + if (!gotaddr(taout,taaddr,callback)) return -1; + if (t < beginning) return 0; + OUTLEFT + while ((t >= beginning) && ((t->type == TOKEN822_COMMENT) || + (t->type == TOKEN822_ATOM) || (t->type == TOKEN822_QUOTE) || + (t->type == TOKEN822_AT) || (t->type == TOKEN822_DOT))) + OUTLEFT + wordok = 0; + continue; + case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: + if (!wordok) + FLUSHCOMMA + wordok = 0; + ADDRLEFT + continue; + case TOKEN822_COMMENT: + /* comment is lexically a space; shouldn't affect wordok */ + break; + case TOKEN822_COMMA: + FLUSH + wordok = 1; + break; + default: + wordok = 1; + ADDRLEFT + continue; + } + OUTLEFT + } + FLUSH + ++t; + while (t > ta->t) + if (!token822_append(taout,--t)) return -1; + + token822_reverse(taout); + return 1; +} |