[dovecot-cvs] dovecot/src/lib-mail Makefile.am, 1.10, 1.11 message-address.c, 1.8, 1.9 message-address.h, 1.2, 1.3 rfc822-parser.c, NONE, 1.1 rfc822-parser.h, NONE, 1.1

cras at dovecot.org cras at dovecot.org
Thu Jan 6 20:14:31 EET 2005


Update of /var/lib/cvs/dovecot/src/lib-mail
In directory talvi:/tmp/cvs-serv30594/lib-mail

Modified Files:
	Makefile.am message-address.c message-address.h 
Added Files:
	rfc822-parser.c rfc822-parser.h 
Log Message:
Added rfc822 parser which will probably replace message_tokenizer at some
point. Rewrote address parsing with the new parser. This fixes various bugs
in it.



Index: Makefile.am
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib-mail/Makefile.am,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -d -r1.10 -r1.11
--- Makefile.am	20 Oct 2004 23:05:56 -0000	1.10
+++ Makefile.am	6 Jan 2005 18:14:28 -0000	1.11
@@ -17,7 +17,8 @@
 	message-send.c \
 	message-size.c \
 	message-tokenize.c \
-	quoted-printable.c
+	quoted-printable.c \
+	rfc822-parser.c
 
 noinst_HEADERS = \
 	istream-header-filter.h \
@@ -33,4 +34,5 @@
 	message-send.h \
 	message-size.h \
 	message-tokenize.h \
-	quoted-printable.h
+	quoted-printable.h \
+	rfc822-parser.h

Index: message-address.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib-mail/message-address.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- message-address.c	21 Sep 2003 16:21:37 -0000	1.8
+++ message-address.c	6 Jan 2005 18:14:28 -0000	1.9
@@ -1,223 +1,284 @@
-/* Copyright (C) 2002 Timo Sirainen */
+/* Copyright (C) 2002-2005 Timo Sirainen */
 
 #include "lib.h"
 #include "str.h"
-#include "message-tokenize.h"
+#include "message-parser.h"
 #include "message-address.h"
+#include "rfc822-parser.h"
 
-static struct message_address *
-new_address(pool_t pool, struct message_address ***next_addr)
+struct message_address_parser_context {
+	pool_t pool;
+	struct rfc822_parser_context parser;
+
+	struct message_address *first_addr, *last_addr, addr;
+	string_t *str;
+};
+
+static void add_address(struct message_address_parser_context *ctx)
 {
 	struct message_address *addr;
 
-	addr = p_new(pool, struct message_address, 1);
+	addr = p_new(ctx->pool, struct message_address, 1);
 
-	**next_addr = addr;
-	*next_addr = &addr->next;
+	memcpy(addr, &ctx->addr, sizeof(ctx->addr));
+	memset(&ctx->addr, 0, sizeof(ctx->addr));
 
-	return addr;
+	if (ctx->first_addr == NULL)
+		ctx->first_addr = addr;
+	else
+		ctx->last_addr->next = addr;
+	ctx->last_addr = addr;
 }
 
-struct message_address *
-message_address_parse(pool_t pool, const unsigned char *data, size_t size,
-		      unsigned int max_addresses)
+static int parse_local_part(struct message_address_parser_context *ctx)
 {
-	static const enum message_token stop_tokens_init[] =
-		{ ',', '@', '<', ':', TOKEN_LAST };
-	static const enum message_token stop_tokens_group[] =
-		{ ',', '@', '<', ';', TOKEN_LAST };
-	static const enum message_token stop_tokens_domain[] =
-		{ ',', '<', TOKEN_LAST };
-	static const enum message_token stop_tokens_domain_group[] =
-		{ ',', '<', ';', TOKEN_LAST };
-	static const enum message_token stop_tokens_post_addr[] =
-		{ ',', TOKEN_LAST };
-	static const enum message_token stop_tokens_post_addr_group[] =
-		{ ',', ';', TOKEN_LAST };
-	static const enum message_token stop_tokens_addr_route[] =
-		{ ':', '>', TOKEN_LAST };
-	static const enum message_token stop_tokens_addr_mailbox[] =
-		{ '@', '>', TOKEN_LAST };
-	static const enum message_token stop_tokens_addr_domain[] =
-		{ '>', TOKEN_LAST };
+	int ret;
 
-	struct message_address *first_addr, **next_addr, *addr;
-	struct message_tokenizer *tok;
-	const enum message_token *stop_tokens;
-	enum message_token token;
-	string_t *mailbox, *domain, *route, *name, *comment, *next_phrase;
-	size_t len;
-	int ingroup, stop;
+	/*
+	   local-part      = dot-atom / quoted-string / obs-local-part
+	   obs-local-part  = word *("." word)
+	*/
+	if (ctx->parser.data == ctx->parser.end)
+		return 0;
 
-	if (size == 0)
-		return NULL;
+	str_truncate(ctx->str, 0);
+	if ((ret = rfc822_parse_dot_atom(&ctx->parser, ctx->str)) < 0)
+		return -1;
 
-	first_addr = NULL;
-	next_addr = &first_addr;
+	ctx->addr.mailbox = p_strdup(ctx->pool, str_c(ctx->str));
+	return ret;
+}
 
-	/* 1) name <@route:mailbox at domain>, ...
-	   2) mailbox at domain (name), ...
-	   3) group: name <box at domain>, box2 at domain2 (name2), ... ;, ...
+static int parse_domain(struct message_address_parser_context *ctx)
+{
+	int ret;
 
-	   ENVELOPE wants groups to be stored like (NIL, NIL, group, NIL),
-	   ..., (NIL, NIL, NIL, NIL)
-	*/
-	tok = message_tokenize_init(data, size, NULL, NULL);
-	message_tokenize_skip_comments(tok, FALSE);
-        message_tokenize_dot_token(tok, FALSE);
+	str_truncate(ctx->str, 0);
+	if ((ret = rfc822_parse_domain(&ctx->parser, ctx->str)) < 0)
+		return -1;
 
-	if (!pool->datastack_pool)
-		t_push();
-	mailbox = t_str_new(128);
-	domain = t_str_new(256);
-	route = t_str_new(128);
-	name = t_str_new(256);
-	comment = t_str_new(256);
+	ctx->addr.domain = p_strdup(ctx->pool, str_c(ctx->str));
+	return ret;
+}
 
-	ingroup = FALSE; len = 0;
-	stop_tokens = stop_tokens_init;
+static int parse_domain_list(struct message_address_parser_context *ctx)
+{
+	int ret;
 
-	if (max_addresses == 0)
-		max_addresses = (unsigned int)-1;
+	/* obs-domain-list = "@" domain *(*(CFWS / "," ) [CFWS] "@" domain) */
+	str_truncate(ctx->str, 0);
+	for (;;) {
+		if (ctx->parser.data == ctx->parser.end)
+			return 0;
 
-	next_phrase = mailbox; stop = FALSE;
-	while (!stop && max_addresses > 0) {
-		if (next_phrase == name && str_len(name) > 0) {
-			/* continuing previously started name,
-			   separate it from us with space */
-			str_append_c(name, ' ');
-			len = str_len(name);
-		} else {
-			len = 0;
-		}
-		message_tokenize_get_string(tok, next_phrase, comment,
-					    stop_tokens);
+		if (*ctx->parser.data != '@')
+			break;
 
-		if (next_phrase == name && len > 0 && len == str_len(name)) {
-			/* nothing appeneded, remove the space */
-			str_truncate(name, len-1);
+		if (str_len(ctx->str) > 0)
+			str_append_c(ctx->str, ',');
+
+		str_append_c(ctx->str, '@');
+		if ((ret = rfc822_parse_domain(&ctx->parser, ctx->str)) <= 0)
+			return ret;
+
+		while (rfc822_skip_lwsp(&ctx->parser) &&
+		       *ctx->parser.data == ',')
+			ctx->parser.data++;
+	}
+	ctx->addr.route = p_strdup(ctx->pool, str_c(ctx->str));
+	return 1;
+}
+
+static int parse_angle_addr(struct message_address_parser_context *ctx)
+{
+	int ret;
+
+	/* "<" [ "@" route ":" ] local-part "@" domain ">" */
+	i_assert(*ctx->parser.data == '<');
+	ctx->parser.data++;
+
+	if ((ret = rfc822_skip_lwsp(&ctx->parser)) <= 0)
+		return ret;
+
+	if (*ctx->parser.data == '@') {
+		if (parse_domain_list(ctx) <= 0 || *ctx->parser.data != ':') {
+			ctx->addr.route = p_strdup(ctx->pool, "INVALID_ROUTE");
+			return -1;
 		}
+		ctx->parser.data++;
+		if ((ret = rfc822_skip_lwsp(&ctx->parser)) <= 0)
+			return ret;
+	}
 
-		token = message_tokenize_get(tok);
-		switch (token) {
-		case TOKEN_LAST:
-		case ',':
-		case ';':
-			/* end of address */
-			if (str_len(mailbox) > 0 || str_len(domain) > 0 ||
-			    str_len(route) > 0 || str_len(name) > 0) {
-				addr = new_address(pool, &next_addr);
-				max_addresses--;
-				addr->mailbox = p_strdup(pool, str_c(mailbox));
-				addr->domain = str_len(domain) == 0 ? NULL :
-					p_strdup(pool, str_c(domain));
-				addr->route = str_len(route) == 0 ? NULL :
-					p_strdup(pool, str_c(route));
-				addr->name = next_phrase == name ?
-					p_strdup_empty(pool, str_c(name)) :
-					p_strdup_empty(pool, str_c(comment));
-			}
+	if ((ret = parse_local_part(ctx)) <= 0)
+		return ret;
+	if (*ctx->parser.data == '@') {
+		if ((ret = parse_domain(ctx)) <= 0)
+			return ret;
+	}
 
-			if (ingroup && token == ';') {
-				/* end of group - add end of group marker */
-				ingroup = FALSE;
-				(void)new_address(pool, &next_addr);
-				max_addresses--;
-			}
+	if (*ctx->parser.data != '>')
+		return -1;
+	ctx->parser.data++;
 
-			if (token == TOKEN_LAST) {
-				stop = TRUE;
-				break;
-			}
+	return rfc822_skip_lwsp(&ctx->parser);
+}
 
-			stop_tokens = ingroup ? stop_tokens_group :
-				stop_tokens_init;
+static int parse_name_addr(struct message_address_parser_context *ctx)
+{
+	/*
+	   name-addr       = [display-name] angle-addr
+	   display-name    = phrase
+	*/
+	str_truncate(ctx->str, 0);
+	if (rfc822_parse_phrase(&ctx->parser, ctx->str) <= 0 ||
+	    *ctx->parser.data != '<')
+		return -1;
 
-			str_truncate(mailbox, 0);
-			str_truncate(domain, 0);
-			str_truncate(route, 0);
-			str_truncate(name, 0);
-			str_truncate(comment, 0);
+	ctx->addr.name = p_strdup(ctx->pool, str_c(ctx->str));
+	if (parse_angle_addr(ctx) < 0) {
+		/* broken */
+		ctx->addr.domain = p_strdup(ctx->pool, "SYNTAX_ERROR");
+	}
+	return ctx->parser.data != ctx->parser.end;
+}
 
-			next_phrase = mailbox;
-			break;
-		case '@':
-			/* domain part comes next */
-			next_phrase = domain;
-			stop_tokens = ingroup ? stop_tokens_domain_group :
-				stop_tokens_domain;
-			break;
-		case '<':
-			/* route-addr */
+static int parse_addr_spec(struct message_address_parser_context *ctx)
+{
+	/* addr-spec       = local-part "@" domain */
+	int ret;
 
-			/* mailbox/domain name so far has actually
-			   been the real name */
-			str_append_str(name, mailbox);
-			str_truncate(mailbox, 0);
+	str_truncate(ctx->parser.last_comment, 0);
 
-			if (str_len(domain) > 0) {
-                                str_append_c(name, '@');
-				str_append_str(name, domain);
-				str_truncate(domain, 0);
-			}
+	if ((ret = parse_local_part(ctx)) < 0)
+		return ret;
+	if (ret > 0 && *ctx->parser.data == '@') {
+		if ((ret = parse_domain(ctx)) < 0)
+			return ret;
+	}
 
-			/* mailbox */
-			message_tokenize_get_string(tok, mailbox, NULL,
-						    stop_tokens_addr_mailbox);
+	if (str_len(ctx->parser.last_comment) > 0) {
+		ctx->addr.name =
+			p_strdup(ctx->pool, str_c(ctx->parser.last_comment));
+	}
+	return ret;
+}
 
-			if (message_tokenize_get(tok) == '@' &&
-			    str_len(mailbox) == 0) {
-				/* route is given */
-				message_tokenize_get_string(tok,
-					route, NULL, stop_tokens_addr_route);
+static int parse_mailbox(struct message_address_parser_context *ctx)
+{
+	const unsigned char *start;
+	int ret;
 
-				if (message_tokenize_get(tok) == ':') {
-					/* mailbox comes next */
-					message_tokenize_get_string(tok,
-						mailbox, NULL,
-						stop_tokens_addr_mailbox);
-				}
-			}
+	if (ctx->parser.data == ctx->parser.end)
+		return 0;
 
-			if (message_tokenize_get(tok) == '@') {
-				/* domain */
-				message_tokenize_get_string(tok,
-					domain, NULL, stop_tokens_addr_domain);
-			}
+	/* mailbox         = name-addr / addr-spec */
+	start = ctx->parser.data;
+	if ((ret = parse_name_addr(ctx)) < 0) {
+		/* nope, should be addr-spec */
+		ctx->parser.data = start;
+		if ((ret = parse_addr_spec(ctx)) < 0)
+			return -1;
+	}
 
-			token = message_tokenize_get(tok);
-			i_assert(token == '>' || token == TOKEN_LAST);
+	if (ctx->addr.mailbox == NULL)
+		ctx->addr.domain = p_strdup(ctx->pool, "MISSING_MAILBOX");
+	if (ctx->addr.domain == NULL)
+		ctx->addr.domain = p_strdup(ctx->pool, "MISSING_DOMAIN");
+	add_address(ctx);
 
-			next_phrase = name;
-			stop_tokens = ingroup ? stop_tokens_post_addr_group :
-				stop_tokens_post_addr;
+	return ret;
+}
+
+static int parse_mailbox_list(struct message_address_parser_context *ctx)
+{
+	int ret;
+
+	/* mailbox-list    = (mailbox *("," mailbox)) / obs-mbox-list */
+	while ((ret = parse_mailbox(ctx)) > 0) {
+		if (*ctx->parser.data != ',')
 			break;
-		case ':':
-			/* beginning of group */
-			addr = new_address(pool, &next_addr);
-			max_addresses--;
-			addr->mailbox = p_strdup(pool, str_c(mailbox));
+		ctx->parser.data++;
+		rfc822_skip_lwsp(&ctx->parser);
+	}
+	return ret;
+}
 
-			str_truncate(mailbox, 0);
-			str_truncate(comment, 0);
+static int parse_group(struct message_address_parser_context *ctx)
+{
+	/*
+	   group           = display-name ":" [mailbox-list / CFWS] ";" [CFWS]
+	   display-name    = phrase
+	*/
+	str_truncate(ctx->str, 0);
+	if (rfc822_parse_phrase(&ctx->parser, ctx->str) <= 0 ||
+	    *ctx->parser.data != ':')
+		return -1;
 
-			ingroup = TRUE;
-			stop_tokens = stop_tokens_group;
+	/* from now on don't return -1 even if there are problems, so that
+	   the caller knows this is a group */
+	ctx->parser.data++;
+	(void)rfc822_skip_lwsp(&ctx->parser);
+
+	ctx->addr.mailbox = p_strdup(ctx->pool, str_c(ctx->str));
+	add_address(ctx);
+
+	if (parse_mailbox_list(ctx) > 0) {
+		if (*ctx->parser.data == ';') {
+			ctx->parser.data++;
+			(void)rfc822_skip_lwsp(&ctx->parser);
+		}
+	}
+
+	add_address(ctx);
+	return 1;
+}
+
+static int parse_address(struct message_address_parser_context *ctx)
+{
+	const unsigned char *start;
+	int ret;
+
+	/* address         = mailbox / group */
+	start = ctx->parser.data;
+	if ((ret = parse_group(ctx)) < 0) {
+		/* not a group, try mailbox */
+		ctx->parser.data = start;
+		ret = parse_mailbox(ctx);
+	}
+
+	return ret;
+}
+
+static void parse_address_list(struct message_address_parser_context *ctx,
+			       unsigned int max_addresses)
+{
+	/* address-list    = (address *("," address)) / obs-addr-list */
+	while (max_addresses-- > 0 && parse_address(ctx) > 0) {
+		if (*ctx->parser.data != ',')
 			break;
-		default:
-			i_unreached();
+		ctx->parser.data++;
+		if (rfc822_skip_lwsp(&ctx->parser) <= 0)
 			break;
-		}
 	}
+}
 
-	if (ingroup)
-		(void)new_address(pool, &next_addr);
+struct message_address *
+message_address_parse(pool_t pool, const unsigned char *data, size_t size,
+		      unsigned int max_addresses)
+{
+	struct message_address_parser_context ctx;
 
-	if (!pool->datastack_pool)
-		t_pop();
-	message_tokenize_deinit(tok);
+	t_push();
+	memset(&ctx, 0, sizeof(ctx));
 
-	return first_addr;
+	rfc822_parser_init(&ctx.parser, data, size, t_str_new(128));
+	ctx.pool = pool;
+	ctx.str = t_str_new(128);
+
+	(void)parse_address_list(&ctx, max_addresses);
+	t_pop();
+	return ctx.first_addr;
 }
 
 void message_address_write(string_t *str, const struct message_address *addr)
@@ -234,14 +295,14 @@
 		else
 			str_append(str, ", ");
 
-		if (addr->mailbox == NULL && addr->domain == NULL) {
+		if (addr->domain == NULL) {
 			if (!in_group) {
-				if (addr->name != NULL)
-					str_append(str, addr->name);
+				if (addr->mailbox != NULL)
+					str_append(str, addr->mailbox);
 				str_append(str, ": ");
 				first = TRUE;
 			} else {
-				i_assert(addr->name == NULL);
+				i_assert(addr->mailbox == NULL);
 
 				/* cut out the ", " */
 				str_truncate(str, str_len(str)-2);

Index: message-address.h
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib-mail/message-address.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- message-address.h	20 Jan 2003 14:52:51 -0000	1.2
+++ message-address.h	6 Jan 2005 18:14:28 -0000	1.3
@@ -2,7 +2,7 @@
 #define __MESSAGE_ADDRESS_H
 
 /* group: ... ; will be stored like:
-   {name = "group", NULL, NULL, NULL}, ..., {NULL, NULL, NULL, NULL}
+   {name = NULL, NULL, "group", NULL}, ..., {NULL, NULL, NULL, NULL}
 */
 struct message_address {
 	struct message_address *next;
@@ -10,9 +10,7 @@
 	const char *name, *route, *mailbox, *domain;
 };
 
-/* data and size are passed directly to message_tokenize_init(), so (size_t)-1
-   can be given if data is \0 terminated. If there's more than max_addresses,
-   the rest are skipped. Setting max_addresses to 0 disables this. */
+/* Parse message addresses from given data. */
 struct message_address *
 message_address_parse(pool_t pool, const unsigned char *data, size_t size,
 		      unsigned int max_addresses);

--- NEW FILE: rfc822-parser.c ---
/* Copyright (C) 2005 Timo Sirainen */

#include "lib.h"
#include "str.h"
#include "rfc822-parser.h"

/*
   atext        =       ALPHA / DIGIT / ; Any character except controls,
                        "!" / "#" /     ;  SP, and specials.
                        "$" / "%" /     ;  Used for atoms
                        "&" / "'" /
                        "*" / "+" /
                        "-" / "/" /
                        "=" / "?" /
                        "^" / "_" /
                        "`" / "{" /
                        "|" / "}" /
                        "~"
*/

/* atext chars are marked with 1, alpha and digits with 2 */
static unsigned char atext_chars[256] = {
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */
	0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, /* 32-47 */
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 0, 1, /* 48-63 */
	0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 64-79 */
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 1, /* 80-95 */
	1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 96-111 */
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, /* 112-127 */

	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
#define IS_ATEXT(c) \
	(atext_chars[(int)(unsigned char)(c)] != 0)

void rfc822_parser_init(struct rfc822_parser_context *ctx,
			const unsigned char *data, size_t size,
			string_t *last_comment)
{
	memset(ctx, 0, sizeof(*ctx));
	ctx->data = data;
	ctx->end = data + size;
	ctx->last_comment = last_comment;
}

int rfc822_skip_comment(struct rfc822_parser_context *ctx)
{
	const unsigned char *start;
	int level = 1;

	i_assert(*ctx->data == '(');

	if (ctx->last_comment != NULL)
		str_truncate(ctx->last_comment, 0);

	start = ++ctx->data;
	for (; ctx->data != ctx->end; ctx->data++) {
		switch (*ctx->data) {
		case '(':
			level++;
			break;
		case ')':
			if (--level == 0) {
				if (ctx->last_comment != NULL) {
					str_append_n(ctx->last_comment, start,
						     ctx->data - start);
				}
				ctx->data++;
				return ctx->data != ctx->end;
			}
			break;
		case '\\':
			if (ctx->last_comment != NULL) {
				str_append_n(ctx->last_comment, start,
					     ctx->data - start);
			}
			start = ctx->data + 1;

			ctx->data++;
			if (ctx->data == ctx->end)
				return -1;
			break;
		}
	}

	/* missing ')' */
	return -1;
}

int rfc822_skip_lwsp(struct rfc822_parser_context *ctx)
{
	for (; ctx->data != ctx->end;) {
		if (*ctx->data == ' ' || *ctx->data == '\t' ||
		    *ctx->data == '\r' || *ctx->data == '\n') {
                        ctx->data++;
			continue;
		}

		if (*ctx->data != '(')
			break;

		if (rfc822_skip_comment(ctx) < 0)
			break;
	}
	return ctx->data != ctx->end;
}

int rfc822_parse_atom(struct rfc822_parser_context *ctx, string_t *str)
{
	const unsigned char *start;

	/*
	   atom            = [CFWS] 1*atext [CFWS]
	   atext           =
	     ; Any character except controls, SP, and specials.
	*/
	for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
		if (IS_ATEXT(*ctx->data))
			continue;

		str_append_n(str, start, ctx->data - start);
		return rfc822_skip_lwsp(ctx);
	}

	str_append_n(str, start, ctx->data - start);
	return 0;
}

int rfc822_parse_dot_atom(struct rfc822_parser_context *ctx, string_t *str)
{
	const unsigned char *start;
	int ret;

	/*
	   dot-atom        = [CFWS] dot-atom-text [CFWS]
	   dot-atom-text   = 1*atext *("." 1*atext)

	   atext           =
	     ; Any character except controls, SP, and specials.

	   For RFC-822 compatibility allow LWSP around '.'
	*/
	for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
		if (IS_ATEXT(*ctx->data))
			continue;

		str_append_n(str, start, ctx->data - start);

		if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
			return ret;

		if (*ctx->data != '.')
			return 1;

		ctx->data++;
		str_append_c(str, '.');

		if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
			return ret;
		start = ctx->data;
	}

	str_append_n(str, start, ctx->data - start);
	return 0;
}

int rfc822_parse_quoted_string(struct rfc822_parser_context *ctx, string_t *str)
{
	const unsigned char *start;

	i_assert(*ctx->data == '"');
	ctx->data++;

	for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
		if (*ctx->data == '"') {
			str_append_n(str, start, ctx->data - start);
			return rfc822_skip_lwsp(ctx);
		}

		if (*ctx->data != '\\')
			continue;

		ctx->data++;
		if (ctx->data == ctx->end)
			return -1;

		str_append_n(str, start, ctx->data - start);
		start = ctx->data;
	}

	/* missing '"' */
	return -1;
}

int rfc822_parse_phrase(struct rfc822_parser_context *ctx, string_t *str)
{
	int ret;

	for (;;) {
		if (*ctx->data == '"')
			ret = rfc822_parse_quoted_string(ctx, str);
		else
			ret = rfc822_parse_atom(ctx, str);
		if (ret <= 0)
			return ret;

		if (!IS_ATEXT(*ctx->data) && *ctx->data != '"')
			break;
		str_append_c(str, ' ');
	}
	return rfc822_skip_lwsp(ctx);
}

static int
rfc822_parse_domain_literal(struct rfc822_parser_context *ctx, string_t *str)
{
	const unsigned char *start;

	/*
	   domain-literal  = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
	   dcontent        = dtext / quoted-pair
	   dtext           = NO-WS-CTL /     ; Non white space controls
			     %d33-90 /       ; The rest of the US-ASCII
			     %d94-126        ;  characters not including "[",
					     ;  "]", or "\"
	*/
	i_assert(*ctx->data == '[');

	for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
		if (*ctx->data == '\\') {
			ctx->data++;
			if (ctx->data == ctx->end)
				break;
		} else if (*ctx->data == ']') {
			ctx->data++;
			str_append_n(str, start, ctx->data - start);
			return ctx->data != ctx->end;
		}
	}

	/* missing ']' */
	return -1;
}

int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str)
{
	/*
	   domain          = dot-atom / domain-literal / obs-domain
	   domain-literal  = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
	   obs-domain      = atom *("." atom)
	*/
	i_assert(*ctx->data == '@');
	ctx->data++;

	if (rfc822_skip_lwsp(ctx) <= 0)
		return -1;

	if (*ctx->data == '[') {
		if (rfc822_parse_domain_literal(ctx, str) < 0)
			return -1;
	} else {
		if (rfc822_parse_dot_atom(ctx, str) < 0)
			return -1;
	}

	return ctx->data != ctx->end;
}

--- NEW FILE: rfc822-parser.h ---
#ifndef __RFC822_PARSER_H
#define __RFC822_PARSER_H

struct rfc822_parser_context {
	const unsigned char *data, *end;
	string_t *last_comment;
};

void rfc822_parser_init(struct rfc822_parser_context *ctx,
			const unsigned char *data, size_t size,
			string_t *last_comment);

int rfc822_skip_comment(struct rfc822_parser_context *ctx);
int rfc822_skip_lwsp(struct rfc822_parser_context *ctx);
int rfc822_parse_atom(struct rfc822_parser_context *ctx, string_t *str);
int rfc822_parse_dot_atom(struct rfc822_parser_context *ctx, string_t *str);
int rfc822_parse_quoted_string(struct rfc822_parser_context *ctx,
			       string_t *str);
int rfc822_parse_phrase(struct rfc822_parser_context *ctx, string_t *str);
int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str);

#endif



More information about the dovecot-cvs mailing list