[dovecot-cvs] dovecot/src/lib-mail Makefile.am, 1.12, 1.13 message-header-parser.c, NONE, 1.1 message-header-parser.h, NONE, 1.1 message-parser.c, 1.71, 1.72 message-parser.h, 1.30, 1.31

tss-movial at dovecot.org tss-movial at dovecot.org
Mon May 8 11:46:21 EEST 2006


Update of /var/lib/cvs/dovecot/src/lib-mail
In directory talvi:/tmp/cvs-serv713/lib-mail

Modified Files:
	Makefile.am message-parser.c message-parser.h 
Added Files:
	message-header-parser.c message-header-parser.h 
Log Message:
Rewrote the message bodystructure parser to allow parsing from non-blocking streams. Also did a couple of API changes and cleanups.



Index: Makefile.am
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib-mail/Makefile.am,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -d -r1.12 -r1.13
--- Makefile.am	5 Jun 2005 20:39:53 -0000	1.12
+++ Makefile.am	8 May 2006 08:46:19 -0000	1.13
@@ -11,6 +11,7 @@
 	message-content-parser.c \
 	message-date.c \
 	message-header-decode.c \
+	message-header-parser.c \
 	message-header-search.c \
 	message-parser.c \
 	message-part-serialize.c \
@@ -27,6 +28,7 @@
 	message-content-parser.h \
 	message-date.h \
 	message-header-decode.h \
+	message-header-parser.h \
 	message-header-search.h \
 	message-parser.h \
 	message-part-serialize.h \

--- NEW FILE: message-header-parser.c ---
/* Copyright (C) 2002-2006 Timo Sirainen */

#include "lib.h"
#include "buffer.h"
#include "istream.h"
#include "str.h"
#include "message-size.h"
#include "message-header-parser.h"

struct message_header_parser_ctx {
	struct message_header_line line;

	struct istream *input;
	struct message_size *hdr_size;

	string_t *name;
	buffer_t *value_buf;
	size_t skip;

	unsigned int skip_initial_lwsp:1;
	unsigned int skip_line:1;
	unsigned int has_nuls:1;
};

struct message_header_parser_ctx *
message_parse_header_init(struct istream *input, struct message_size *hdr_size,
			  bool skip_initial_lwsp)
{
	struct message_header_parser_ctx *ctx;

	ctx = i_new(struct message_header_parser_ctx, 1);
	ctx->input = input;
	ctx->hdr_size = hdr_size;
	ctx->name = str_new(default_pool, 128);
	ctx->skip_initial_lwsp = skip_initial_lwsp;

	if (hdr_size != NULL)
		memset(hdr_size, 0, sizeof(*hdr_size));
	return ctx;
}

void message_parse_header_deinit(struct message_header_parser_ctx **_ctx)
{
	struct message_header_parser_ctx *ctx = *_ctx;

	i_stream_skip(ctx->input, ctx->skip);
	if (ctx->value_buf != NULL)
		buffer_free(ctx->value_buf);
	str_free(&ctx->name);
	i_free(ctx);

	*_ctx = NULL;
}

int message_parse_header_next(struct message_header_parser_ctx *ctx,
			      struct message_header_line **hdr_r)
{
        struct message_header_line *line = &ctx->line;
	const unsigned char *msg;
	size_t i, size, startpos, colon_pos, parse_size;
	int ret;
	bool last_no_newline;

	*hdr_r = NULL;
	if (line->eoh)
		return -1;

	if (ctx->skip > 0) {
		i_stream_skip(ctx->input, ctx->skip);
		ctx->skip = 0;
	}

	startpos = 0; colon_pos = UINT_MAX;

	last_no_newline = line->no_newline;
	line->no_newline = FALSE;

	if (line->continues) {
		if (line->use_full_value && !line->continued) {
			/* save the first line */
			if (ctx->value_buf != NULL)
				buffer_set_used_size(ctx->value_buf, 0);
			else {
				ctx->value_buf =
					buffer_create_dynamic(default_pool,
							      4096);
			}
			buffer_append(ctx->value_buf,
				      line->value, line->value_len);
		}

		line->continued = TRUE;
		line->continues = FALSE;
		colon_pos = 0;
	} else {
		/* new header line */
		line->continued = FALSE;
                line->name_offset = ctx->input->v_offset;
	}

	for (;;) {
		ret = i_stream_read_data(ctx->input, &msg, &size, startpos+1);

		if (ret > 0) {
			/* we want to know one byte in advance to find out
			   if it's multiline header */
			parse_size = size-1;
		} else {
			parse_size = size;
		}

		if (ret <= 0 && startpos == size) {
			if (ret == -1) {
				if (startpos > 0) {
					/* header ended unexpectedly. */
					line->no_newline = TRUE;
					ctx->skip = startpos;
					break;
				}
				/* error / EOF with no bytes */
				return -1;
			}
			if (ret == 0 && !ctx->input->eof) {
				/* stream is nonblocking - need more data */
				return 0;
			}

			if (msg[0] == '\n' ||
			    (msg[0] == '\r' && size > 1 && msg[1] == '\n')) {
				/* end of headers - this mostly happens just
				   with mbox where headers are read separately
				   from body */
				size = 0;
				if (ctx->hdr_size != NULL)
					ctx->hdr_size->lines++;
				if (msg[0] == '\r')
					ctx->skip = 2;
				else {
					ctx->skip = 1;
					if (ctx->hdr_size != NULL)
						ctx->hdr_size->virtual_size++;
				}
				break;
			}

			/* a) line is larger than input buffer
			   b) header ended unexpectedly */
			if (colon_pos == UINT_MAX && ret == -2 &&
			    !line->continued) {
				/* header name is huge. just skip it. */
				if (msg[size-1] == '\r')
					size--;

				if (ctx->hdr_size != NULL) {
					ctx->hdr_size->physical_size += size;
					ctx->hdr_size->virtual_size += size;
				}
				i_stream_skip(ctx->input, size);
				ctx->skip_line = TRUE;
				startpos = 0;
				continue;
			}

			if (ret == -2) {
				/* go back to last LWSP if found. */
				size_t min_pos =
					!line->continued ? colon_pos : 0;
				for (i = size-1; i > min_pos; i--) {
					if (IS_LWSP(msg[i])) {
						size = i;
						break;
					}
				}

				line->continues = TRUE;
			}
			line->no_newline = TRUE;
			ctx->skip = size;
			break;
		}

		/* find ':' */
		if (colon_pos == UINT_MAX) {
			for (i = startpos; i < parse_size; i++) {
				if (msg[i] <= ':') {
					if (msg[i] == ':') {
						colon_pos = i;
						line->full_value_offset =
							ctx->input->v_offset +
							i + 1;
						break;
					}
					if (msg[i] == '\n') {
						/* end of headers, or error */
						break;
					}

					if (msg[i] == '\0')
						ctx->has_nuls = TRUE;
				}
			}
		}

		/* find '\n' */
		for (i = startpos; i < parse_size; i++) {
			if (msg[i] <= '\n') {
				if (msg[i] == '\n')
					break;
				if (msg[i] == '\0')
					ctx->has_nuls = TRUE;
			}
		}

		if (i < parse_size) {
			/* got a line */
			if (ctx->skip_line) {
				/* skipping a huge line */
				if (ctx->hdr_size != NULL) {
					ctx->hdr_size->physical_size += i;
					ctx->hdr_size->virtual_size += i;
					if (i == 0 || msg[i-1] != '\r') {
						/* missing CR */
						ctx->hdr_size->virtual_size++;
					}
				}
				i_stream_skip(ctx->input, i);
				startpos = 0;
				ctx->skip_line = FALSE;
				continue;
			}
			line->continues = i+1 < size && IS_LWSP(msg[i+1]);

			if (ctx->hdr_size != NULL)
				ctx->hdr_size->lines++;
			if (i == 0 || msg[i-1] != '\r') {
				/* missing CR */
				if (ctx->hdr_size != NULL)
					ctx->hdr_size->virtual_size++;
				size = i;
			} else {
				size = i-1;
			}

			ctx->skip = i+1;
			break;
		}

		startpos = i;
	}

	if (size == 0) {
		/* end of headers */
		line->eoh = TRUE;
		line->name_len = line->value_len = line->full_value_len = 0;
		line->name = ""; line->value = line->full_value = NULL;
		line->middle = NULL; line->middle_len = 0;
	} else if (line->continued) {
		line->value = msg;
		line->value_len = size;
		line->middle = NULL;
		line->middle_len = 0;
	} else if (colon_pos == UINT_MAX) {
		/* missing ':', assume the whole line is name */
		line->value = NULL;
		line->value_len = 0;

		str_truncate(ctx->name, 0);
		str_append_n(ctx->name, msg, size);
		line->name = str_c(ctx->name);
		line->name_len = str_len(ctx->name);

		line->middle = NULL;
		line->middle_len = 0;
	} else {
		size_t pos;

		line->value = msg + colon_pos+1;
		line->value_len = size - colon_pos - 1;
		if (ctx->skip_initial_lwsp) {
			/* get value. skip all LWSP after ':'. Note that
			   RFC2822 doesn't say we should, but history behind
			   it..

			   Exception to this is if the value consists only of
			   LWSP, then skip only the one LWSP after ':'. */
			for (pos = 0; pos < line->value_len; pos++) {
				if (!IS_LWSP(line->value[0]))
					break;
			}

			if (pos == line->value_len) {
				/* everything was LWSP */
				if (line->value_len > 0 &&
				    IS_LWSP(line->value[0]))
					pos = 1;
			}
		} else {
			pos = line->value_len > 0 &&
				IS_LWSP(line->value[0]) ? 1 : 0;
		}

		line->value += pos;
		line->value_len -= pos;
		line->full_value_offset += pos;

		/* get name, skip LWSP before ':' */
		while (colon_pos > 0 && IS_LWSP(msg[colon_pos-1]))
			colon_pos--;

		line->middle = msg + colon_pos;
		line->middle_len = (size_t)(line->value - line->middle);

		str_truncate(ctx->name, 0);
		str_append_n(ctx->name, msg, colon_pos);
		line->name = str_c(ctx->name);
		line->name_len = str_len(ctx->name);
	}

	if (!line->continued) {
		/* first header line, set full_value = value */
		line->full_value = line->value;
		line->full_value_len = line->value_len;
	} else if (line->use_full_value) {
		/* continue saving the full value */
		if (!last_no_newline)
			buffer_append_c(ctx->value_buf, '\n');
		buffer_append(ctx->value_buf, line->value, line->value_len);
		line->full_value = buffer_get_data(ctx->value_buf,
						   &line->full_value_len);
	} else {
		/* we didn't want full_value, and this is a continued line. */
		line->full_value = NULL;
		line->full_value_len = 0;
	}

	/* always reset it */
	line->use_full_value = FALSE;

	if (ctx->hdr_size != NULL) {
		ctx->hdr_size->physical_size += ctx->skip;
		ctx->hdr_size->virtual_size += ctx->skip;
	}

	*hdr_r = line;
	return 1;
}

bool message_parse_header_has_nuls(struct message_header_parser_ctx *ctx)
{
	return ctx->has_nuls;
}

void message_parse_header(struct istream *input, struct message_size *hdr_size,
			  message_header_callback_t *callback, void *context)
{
	struct message_header_parser_ctx *hdr_ctx;
	struct message_header_line *hdr;
	int ret;

	hdr_ctx = message_parse_header_init(input, hdr_size, TRUE);
	while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0)
		callback(hdr, context);
	i_assert(ret != 0);
	message_parse_header_deinit(&hdr_ctx);

	/* call after the final skipping */
	callback(NULL, context);
}

--- NEW FILE: message-header-parser.h ---
#ifndef __MESSAGE_HEADER_PARSER_H
#define __MESSAGE_HEADER_PARSER_H

#define IS_LWSP(c) \
	((c) == ' ' || (c) == '\t')

struct message_size;
struct message_header_parser_ctx;

struct message_header_line {
	const char *name;
	size_t name_len;

	const unsigned char *value;
	size_t value_len;

	const unsigned char *full_value;
	size_t full_value_len;

	const unsigned char *middle;
	size_t middle_len;

	uoff_t name_offset, full_value_offset;

	unsigned int continues:1; /* multiline header, continues in next line */
	unsigned int continued:1; /* multiline header, continues */
	unsigned int eoh:1; /* "end of headers" line */
	unsigned int no_newline:1; /* no \n after this line */
	unsigned int use_full_value:1; /* set if you want full_value */
};

/* called once with hdr = NULL at the end of headers */
typedef void message_header_callback_t(struct message_header_line *hdr,
				       void *context);

/* skip_initial_lwsp controls if we should skip LWSP after "header: ".
   Note that there may not be the single whitespace after "header:", and that
   "header : " is also possible. These two conditions can't be determined from
   struct message_header_line. */
struct message_header_parser_ctx *
message_parse_header_init(struct istream *input, struct message_size *hdr_size,
			  bool skip_initial_lwsp);
void message_parse_header_deinit(struct message_header_parser_ctx **ctx);

/* Read and return next header line. Returns 1 if header is returned, 0 if
   input stream is non-blocking and more data needs to be read, -1 when all is
   done or error occurred (see stream's error status). */
int message_parse_header_next(struct message_header_parser_ctx *ctx,
			      struct message_header_line **hdr_r);

/* Returns TRUE if the parser has seen NUL characters. */
bool message_parse_header_has_nuls(struct message_header_parser_ctx *ctx);

/* Read and parse the header from the given stream. */
void message_parse_header(struct istream *input, struct message_size *hdr_size,
			  message_header_callback_t *callback, void *context);

#endif

Index: message-parser.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib-mail/message-parser.c,v
retrieving revision 1.71
retrieving revision 1.72
diff -u -d -r1.71 -r1.72
--- message-parser.c	14 Jan 2006 18:47:35 -0000	1.71
+++ message-parser.c	8 May 2006 08:46:19 -0000	1.72
@@ -1,13 +1,14 @@
-/* Copyright (C) 2002 Timo Sirainen */
+/* Copyright (C) 2002-2006 Timo Sirainen */
 
 #include "lib.h"
-#include "buffer.h"
 #include "istream.h"
-#include "str.h"
 #include "strescape.h"
 #include "message-content-parser.h"
 #include "message-parser.h"
-#include "message-size.h"
[...1348 lines suppressed...]
-		buffer_append(ctx->value_buf, line->value, line->value_len);
-		line->full_value = buffer_get_data(ctx->value_buf,
-						   &line->full_value_len);
-	} else {
-		/* we didn't want full_value, and this is a continued line. */
-		line->full_value = NULL;
-		line->full_value_len = 0;
-	}
-
-	/* always reset it */
-	line->use_full_value = FALSE;
-
-	if (ctx->hdr_size != NULL) {
-		ctx->hdr_size->physical_size += ctx->skip;
-		ctx->hdr_size->virtual_size += ctx->skip;
-	}
-
-	*hdr_r = line;
-	return 1;
-}

Index: message-parser.h
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib-mail/message-parser.h,v
retrieving revision 1.30
retrieving revision 1.31
diff -u -d -r1.30 -r1.31
--- message-parser.h	14 Jan 2006 18:47:35 -0000	1.30
+++ message-parser.h	8 May 2006 08:46:19 -0000	1.31
@@ -1,11 +1,9 @@
 #ifndef __MESSAGE_PARSER_H
 #define __MESSAGE_PARSER_H
 
+#include "message-header-parser.h"
 #include "message-size.h"
 
-#define IS_LWSP(c) \
-	((c) == ' ' || (c) == '\t')
-
 enum message_part_flags {
 	MESSAGE_PART_FLAG_MULTIPART		= 0x01,
 	MESSAGE_PART_FLAG_MULTIPART_DIGEST	= 0x02,
@@ -38,47 +36,20 @@
 };
 
 struct message_parser_ctx;
-struct message_header_parser_ctx;
-
-struct message_header_line {
-	const char *name;
-	size_t name_len;
-
-	const unsigned char *value;
-	size_t value_len;
-
-	const unsigned char *full_value;
-	size_t full_value_len;
 
-	const unsigned char *middle;
-	size_t middle_len;
-
-	uoff_t name_offset, full_value_offset;
+struct message_block {
+	/* non-NULL if a header line was read */
+	struct message_header_line *hdr;
 
-	unsigned int continues:1; /* multiline header, continues in next line */
-	unsigned int continued:1; /* multiline header, continues */
-	unsigned int eoh:1; /* "end of headers" line */
-	unsigned int no_newline:1; /* no \n after this line */
-	unsigned int use_full_value:1; /* set if you want full_value */
+	/* hdr = NULL, size = 0 block returned at the end of headers */
+	const unsigned char *data;
+	size_t size;
 };
 
 /* called once with hdr = NULL at the end of headers */
-typedef void message_header_callback_t(struct message_part *part,
-				       struct message_header_line *hdr,
-				       void *context);
-/* called once with size = 0 at the end of message part */
-typedef void message_body_callback_t(struct message_part *part,
-				     const unsigned char *data, size_t size,
-				     void *context);
-
-/* callback is called for each field in message header. */
-void message_parse_from_parts(struct message_part *part, struct istream *input,
-			      message_header_callback_t *callback,
-			      void *context);
-void message_parse_header(struct message_part *part, struct istream *input,
-			  struct message_size *hdr_size,
-			  message_header_callback_t *callback, void *context);
-
+typedef void message_part_header_callback_t(struct message_part *part,
+					    struct message_header_line *hdr,
+					    void *context);
 
 /* Initialize message parser. part_spool specifies where struct message_parts
    are allocated from. */
@@ -86,32 +57,27 @@
 message_parser_init(pool_t part_pool, struct istream *input);
 struct message_part *message_parser_deinit(struct message_parser_ctx **ctx);
 
+/* Read the next block of a message. Returns 1 if block is returned, 0 if
+   input stream is non-blocking and more data needs to be read, -1 when all is
+   done or error occurred (see stream's error status). */
+int message_parser_parse_next_block(struct message_parser_ctx *ctx,
+				    struct message_block *block_r);
+
 /* Read and parse header. */
 void message_parser_parse_header(struct message_parser_ctx *ctx,
 				 struct message_size *hdr_size,
-				 message_header_callback_t *callback,
+				 message_part_header_callback_t *callback,
 				 void *context);
 /* Read and parse body. If message is a MIME multipart or message/rfc822
    message, hdr_callback is called for all headers. body_callback is called
    for the body content. */
 void message_parser_parse_body(struct message_parser_ctx *ctx,
-			       message_header_callback_t *hdr_callback,
-			       message_body_callback_t *body_callback,
+			       message_part_header_callback_t *hdr_callback,
 			       void *context);
 
-/* skip_initial_lwsp controls if we should skip LWSP after "header: ".
-   Note that there may not be the single whitespace after "header:", and that
-   "header : " is also possible. These two conditions can't be determined from
-   struct message_header_line. */
-struct message_header_parser_ctx *
-message_parse_header_init(struct istream *input, struct message_size *hdr_size,
-			 bool skip_initial_lwsp);
-void message_parse_header_deinit(struct message_header_parser_ctx **ctx);
-
-/* Read and return next header line. Returns 1 if header is returned, 0 if
-   input stream is non-blocking and more data needs to be read, -1 when all is
-   done or error occurred (see stream's error status). */
-int message_parse_header_next(struct message_header_parser_ctx *ctx,
-			      struct message_header_line **hdr_r);
+/* callback is called for each field in message header. */
+void message_parse_from_parts(struct message_part *part, struct istream *input,
+			      message_part_header_callback_t *callback,
+			      void *context);
 
 #endif



More information about the dovecot-cvs mailing list