summaryrefslogtreecommitdiff
path: root/src/token822.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/token822.c')
-rw-r--r--src/token822.c461
1 files changed, 461 insertions, 0 deletions
diff --git a/src/token822.c b/src/token822.c
new file mode 100644
index 0000000..239887c
--- /dev/null
+++ b/src/token822.c
@@ -0,0 +1,461 @@
+#include "stralloc.h"
+#include "alloc.h"
+#include "genalloc.h"
+#include "str.h"
+#include "token822.h"
+
+static struct token822 comma = { TOKEN822_COMMA };
+
+void token822_reverse(token822_alloc *ta)
+{
+ int i;
+ int n;
+ struct token822 temp;
+
+ n = ta->len - 1;
+ for (i = 0; i + i < n; ++i) {
+ temp = ta->t[i];
+ ta->t[i] = ta->t[n - i];
+ ta->t[n - i] = temp;
+ }
+}
+
+GEN_ALLOC_ready(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_ready)
+GEN_ALLOC_readyplus(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus)
+GEN_ALLOC_append(token822_alloc,struct token822,t,len,a,i,n,x,30,token822_readyplus,token822_append)
+
+static int needspace(int t1,int t2)
+{
+ if (!t1) return 0;
+ if (t1 == TOKEN822_COLON) return 1;
+ if (t1 == TOKEN822_COMMA) return 1;
+ if (t2 == TOKEN822_LEFT) return 1;
+
+ switch (t1) {
+ case TOKEN822_ATOM: case TOKEN822_LITERAL:
+ case TOKEN822_QUOTE: case TOKEN822_COMMENT:
+ switch (t2) {
+ case TOKEN822_ATOM: case TOKEN822_LITERAL:
+ case TOKEN822_QUOTE: case TOKEN822_COMMENT:
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int atomok(char ch)
+{
+ switch (ch) {
+ case ' ': case '\t': case '\r': case '\n':
+ case '(': case '[': case '"':
+ case '<': case '>': case ';': case ':':
+ case '@': case ',': case '.':
+ return 0;
+ }
+ return 1;
+}
+
+static void atomcheck(struct token822 *t)
+{
+ int i;
+ char ch;
+
+ for (i = 0; i < t->slen; ++i) {
+ ch = t->s[i];
+ if ((ch < 32) || (ch > 126) || (ch == ')') || (ch == ']') || (ch == '\\')) {
+ t->type = TOKEN822_QUOTE;
+ return;
+ }
+ }
+}
+
+int token822_unparse(stralloc *sa,token822_alloc *ta,unsigned int linelen)
+{
+ struct token822 *t;
+ int len;
+ int ch;
+ int i;
+ int j;
+ int lasttype;
+ int newtype;
+ char *s;
+ char *lineb;
+ char *linee;
+
+ len = 0;
+ lasttype = 0;
+
+ for (i = 0; i < ta->len; ++i) {
+ t = ta->t + i;
+ newtype = t->type;
+ if (needspace(lasttype,newtype)) ++len;
+ lasttype = newtype;
+
+ switch (newtype) {
+ case TOKEN822_COMMA:
+ len += 3; break;
+ case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: case TOKEN822_RIGHT:
+ case TOKEN822_SEMI: case TOKEN822_COLON:
+ ++len; break;
+ case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT:
+ if (t->type != TOKEN822_ATOM) len += 2;
+ for (j = 0; j < t->slen; ++j)
+ switch (ch = t->s[j]) {
+ case '"': case '[': case ']': case '(': case ')':
+ case '\\': case '\r': case '\n': ++len;
+ default: ++len;
+ }
+ break;
+ }
+ }
+ len += 2;
+
+ if (!stralloc_ready(sa,len)) return -1;
+
+ s = sa->s;
+ lineb = s;
+ linee = 0;
+
+ lasttype = 0;
+
+ for (i = 0; i < ta->len; ++i) {
+ t = ta->t + i;
+ newtype = t->type;
+ if (needspace(lasttype,newtype)) *s++ = ' ';
+ lasttype = newtype;
+
+ switch (newtype) {
+ case TOKEN822_COMMA:
+ *s++ = ',';
+#define NSUW \
+ s[0] = '\n'; s[1] = ' '; \
+ if (linee && (!linelen || (s - lineb <= linelen))) \
+ { while (linee < s) { linee[0] = linee[2]; ++linee; } linee -= 2; } \
+ else { if (linee) lineb = linee + 1; linee = s; s += 2; }
+ NSUW
+ break;
+ case TOKEN822_AT: *s++ = '@'; break;
+ case TOKEN822_DOT: *s++ = '.'; break;
+ case TOKEN822_LEFT: *s++ = '<'; break;
+ case TOKEN822_RIGHT: *s++ = '>'; break;
+ case TOKEN822_SEMI: *s++ = ';'; break;
+ case TOKEN822_COLON: *s++ = ':'; break;
+ case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT:
+ if (t->type == TOKEN822_QUOTE) *s++ = '"';
+ if (t->type == TOKEN822_LITERAL) *s++ = '[';
+ if (t->type == TOKEN822_COMMENT) *s++ = '(';
+
+ for (j = 0; j < t->slen; ++j)
+ switch (ch = t->s[j]) {
+ case '"': case '[': case ']': case '(': case ')':
+ case '\\': case '\r': case '\n': *s++ = '\\';
+ default: *s++ = ch;
+ }
+ if (t->type == TOKEN822_QUOTE) *s++ = '"';
+ if (t->type == TOKEN822_LITERAL) *s++ = ']';
+ if (t->type == TOKEN822_COMMENT) *s++ = ')';
+ break;
+ }
+ }
+ NSUW
+ --s;
+ sa->len = s - sa->s;
+ return 1;
+}
+
+int token822_unquote(stralloc *sa,token822_alloc *ta)
+{
+ struct token822 *t;
+ int len;
+ int i;
+ int j;
+ char *s;
+
+ len = 0;
+
+ for (i = 0; i < ta->len; ++i) {
+ t = ta->t + i;
+ switch (t->type) {
+ case TOKEN822_COMMA: case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT:
+ case TOKEN822_RIGHT: case TOKEN822_SEMI: case TOKEN822_COLON:
+ ++len; break;
+ case TOKEN822_LITERAL:
+ len += 2;
+ case TOKEN822_ATOM: case TOKEN822_QUOTE:
+ len += t->slen;
+ }
+ }
+
+ if (!stralloc_ready(sa,len)) return -1;
+
+ s = sa->s;
+
+ for (i = 0; i < ta->len; ++i) {
+ t = ta->t + i;
+ switch (t->type) {
+ case TOKEN822_COMMA: *s++ = ','; break;
+ case TOKEN822_AT: *s++ = '@'; break;
+ case TOKEN822_DOT: *s++ = '.'; break;
+ case TOKEN822_LEFT: *s++ = '<'; break;
+ case TOKEN822_RIGHT: *s++ = '>'; break;
+ case TOKEN822_SEMI: *s++ = ';'; break;
+ case TOKEN822_COLON: *s++ = ':'; break;
+ case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL:
+ if (t->type == TOKEN822_LITERAL) *s++ = '[';
+ for (j = 0; j < t->slen; ++j)
+ *s++ = t->s[j];
+ if (t->type == TOKEN822_LITERAL) *s++ = ']';
+ break;
+ case TOKEN822_COMMENT: break;
+ }
+ }
+ sa->len = s - sa->s;
+ return 1;
+}
+
+int token822_parse(token822_alloc *ta,stralloc *sa,stralloc *buf)
+{
+ int i;
+ int salen;
+ int level;
+ struct token822 *t;
+ int numtoks;
+ int numchars;
+ char *cbuf;
+
+ salen = sa->len;
+
+ numchars = 0;
+ numtoks = 0;
+
+ for (i = 0; i < salen; ++i)
+ switch (sa->s[i]) {
+ case '.': case ',': case '@': case '<': case '>': case ':': case ';':
+ ++numtoks; break;
+ case ' ': case '\t': case '\r': case '\n': break;
+ case ')': case ']': return 0;
+ /* other control chars and non-ASCII chars are also bad, in theory */
+ case '(':
+ level = 1;
+ while (level) {
+ if (++i >= salen) return 0;
+ switch (sa->s[i]) {
+ case '(': ++level; break;
+ case ')': --level; break;
+ case '\\': if (++i >= salen) return 0;
+ default: ++numchars;
+ }
+ }
+ ++numtoks;
+ break;
+ case '"':
+ level = 1;
+ while (level) {
+ if (++i >= salen) return 0;
+ switch (sa->s[i]) {
+ case '"': --level; break;
+ case '\\': if (++i >= salen) return 0;
+ default: ++numchars;
+ }
+ }
+ ++numtoks;
+ break;
+ case '[':
+ level = 1;
+ while (level) {
+ if (++i >= salen) return 0;
+ switch (sa->s[i]) {
+ case ']': --level; break;
+ case '\\': if (++i >= salen) return 0;
+ default: ++numchars;
+ }
+ }
+ ++numtoks;
+ break;
+ default:
+ do {
+ if (sa->s[i] == '\\') if (++i >= salen) break;
+ ++numchars;
+ if (++i >= salen) break;
+ } while (atomok(sa->s[i]));
+ --i;
+ ++numtoks;
+ }
+
+ if (!token822_ready(ta,numtoks)) return -1;
+ if (!stralloc_ready(buf,numchars)) return -1;
+ cbuf = buf->s;
+ ta->len = numtoks;
+
+ t = ta->t;
+
+ for (i = 0; i < salen; ++i)
+ switch (sa->s[i]) {
+ case '.': t->type = TOKEN822_DOT; ++t; break;
+ case ',': t->type = TOKEN822_COMMA; ++t; break;
+ case '@': t->type = TOKEN822_AT; ++t; break;
+ case '<': t->type = TOKEN822_LEFT; ++t; break;
+ case '>': t->type = TOKEN822_RIGHT; ++t; break;
+ case ':': t->type = TOKEN822_COLON; ++t; break;
+ case ';': t->type = TOKEN822_SEMI; ++t; break;
+ case ' ': case '\t': case '\r': case '\n': break;
+ case '(':
+ t->type = TOKEN822_COMMENT; t->s = cbuf; t->slen = 0;
+ level = 1;
+ while (level) {
+ ++i; /* assert: < salen */
+ switch (sa->s[i]) {
+ case '(': ++level; break;
+ case ')': --level; break;
+ case '\\': ++i; /* assert: < salen */
+ default: *cbuf++ = sa->s[i]; ++t->slen;
+ }
+ }
+ ++t;
+ break;
+ case '"':
+ t->type = TOKEN822_QUOTE; t->s = cbuf; t->slen = 0;
+ level = 1;
+ while (level) {
+ ++i; /* assert: < salen */
+ switch (sa->s[i]) {
+ case '"': --level; break;
+ case '\\': ++i; /* assert: < salen */
+ default: *cbuf++ = sa->s[i]; ++t->slen;
+ }
+ }
+ ++t;
+ break;
+ case '[':
+ t->type = TOKEN822_LITERAL; t->s = cbuf; t->slen = 0;
+ level = 1;
+ while (level) {
+ ++i; /* assert: < salen */
+ switch (sa->s[i]) {
+ case ']': --level; break;
+ case '\\': ++i; /* assert: < salen */
+ default: *cbuf++ = sa->s[i]; ++t->slen;
+ }
+ }
+ ++t;
+ break;
+ default:
+ t->type = TOKEN822_ATOM; t->s = cbuf; t->slen = 0;
+ do {
+ if (sa->s[i] == '\\') if (++i >= salen) break;
+ *cbuf++ = sa->s[i]; ++t->slen;
+ if (++i >= salen) break;
+ } while (atomok(sa->s[i]));
+ atomcheck(t);
+ --i;
+ ++t;
+ }
+ return 1;
+}
+
+static int gotaddr(token822_alloc *taout,token822_alloc *taaddr,int (*callback)())
+{
+ int i;
+
+ if (callback(taaddr) != 1)
+ return 0;
+
+ if (!token822_readyplus(taout,taaddr->len))
+ return 0;
+
+ for (i = 0; i < taaddr->len; ++i)
+ taout->t[taout->len++] = taaddr->t[i];
+
+ taaddr->len = 0;
+ return 1;
+}
+
+int token822_addrlist(token822_alloc *taout,token822_alloc *taaddr,token822_alloc *ta,int (*callback)())
+{
+ struct token822 *t;
+ struct token822 *beginning;
+ int ingroup;
+ int wordok;
+
+ taout->len = 0;
+ taaddr->len = 0;
+
+ if (!token822_readyplus(taout,1)) return -1;
+ if (!token822_readyplus(taaddr,1)) return -1;
+
+ ingroup = 0;
+ wordok = 1;
+
+ beginning = ta->t + 2;
+ t = ta->t + ta->len - 1;
+
+ /* rfc 822 address lists are easy to parse from right to left */
+
+#define FLUSH if (taaddr->len) if (!gotaddr(taout,taaddr,callback)) return -1;
+#define FLUSHCOMMA if (taaddr->len) { \
+if (!gotaddr(taout,taaddr,callback)) return -1; \
+if (!token822_append(taout,&comma)) return -1; }
+#define ADDRLEFT if (!token822_append(taaddr,t--)) return -1;
+#define OUTLEFT if (!token822_append(taout,t--)) return -1;
+
+ while (t >= beginning) {
+ switch (t->type) {
+ case TOKEN822_SEMI:
+ FLUSHCOMMA
+ if (ingroup) return 0;
+ ingroup = 1;
+ wordok = 1;
+ break;
+ case TOKEN822_COLON:
+ FLUSH
+ if (!ingroup) return 0;
+ ingroup = 0;
+ while ((t >= beginning) && (t->type != TOKEN822_COMMA))
+ OUTLEFT
+ if (t >= beginning)
+ OUTLEFT
+ wordok = 1;
+ continue;
+ case TOKEN822_RIGHT:
+ FLUSHCOMMA
+ OUTLEFT
+ while ((t >= beginning) && (t->type != TOKEN822_LEFT))
+ ADDRLEFT
+ /* important to use address here even if it's empty: <> */
+ if (!gotaddr(taout,taaddr,callback)) return -1;
+ if (t < beginning) return 0;
+ OUTLEFT
+ while ((t >= beginning) && ((t->type == TOKEN822_COMMENT) ||
+ (t->type == TOKEN822_ATOM) || (t->type == TOKEN822_QUOTE) ||
+ (t->type == TOKEN822_AT) || (t->type == TOKEN822_DOT)))
+ OUTLEFT
+ wordok = 0;
+ continue;
+ case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL:
+ if (!wordok)
+ FLUSHCOMMA
+ wordok = 0;
+ ADDRLEFT
+ continue;
+ case TOKEN822_COMMENT:
+ /* comment is lexically a space; shouldn't affect wordok */
+ break;
+ case TOKEN822_COMMA:
+ FLUSH
+ wordok = 1;
+ break;
+ default:
+ wordok = 1;
+ ADDRLEFT
+ continue;
+ }
+ OUTLEFT
+ }
+ FLUSH
+ ++t;
+ while (t > ta->t)
+ if (!token822_append(taout,--t)) return -1;
+
+ token822_reverse(taout);
+ return 1;
+}