dovecot-2.2: message parser: Added MESSAGE_PARSER_FLAG_INCLUDE_M...

dovecot at dovecot.org dovecot at dovecot.org
Mon Feb 13 00:40:08 EET 2012


details:   http://hg.dovecot.org/dovecot-2.2/rev/e7854f8d7213
changeset: 14147:e7854f8d7213
user:      Timo Sirainen <tss at iki.fi>
date:      Mon Feb 13 00:40:03 2012 +0200
description:
message parser: Added MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS.
Patch by Stephan Bosch.

diffstat:

 src/lib-mail/message-parser.c |  249 ++++++++++++++++++++++++++++++++++++++---
 src/lib-mail/message-parser.h |    4 +-
 2 files changed, 233 insertions(+), 20 deletions(-)

diffs (truncated from 345 to 300 lines):

diff -r e456e1bce47f -r e7854f8d7213 src/lib-mail/message-parser.c
--- a/src/lib-mail/message-parser.c	Mon Feb 13 00:29:55 2012 +0200
+++ b/src/lib-mail/message-parser.c	Mon Feb 13 00:40:03 2012 +0200
@@ -54,6 +54,8 @@
 				       struct message_block *block_r);
 static int parse_next_body_to_eof(struct message_parser_ctx *ctx,
 				  struct message_block *block_r);
+static int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx,
+					 struct message_block *block_r);
 static int preparsed_parse_next_header_init(struct message_parser_ctx *ctx,
 					    struct message_block *block_r);
 
@@ -271,9 +273,17 @@
 	block_r->size = (ptr - block_r->data) + 1;
 	parse_body_add_block(ctx, block_r);
 
-	/* a new MIME part begins */
-	ctx->parse_next_block = parse_next_mime_header_init;
-	return 1;
+	if (ctx->boundaries == NULL || ctx->boundaries->part != ctx->part) {
+		/* epilogue */
+		if (ctx->boundaries != NULL)
+			ctx->parse_next_block = parse_next_body_to_boundary;
+		else
+			ctx->parse_next_block = parse_next_body_to_eof;
+	} else {
+		/* a new MIME part begins */
+		ctx->parse_next_block = parse_next_mime_header_init;
+	}
+	return ctx->parse_next_block(ctx, block_r);
 }
 
 static int parse_part_finish(struct message_parser_ctx *ctx,
@@ -293,28 +303,24 @@
 	if (boundary->epilogue_found) {
 		/* this boundary isn't needed anymore */
 		ctx->boundaries = boundary->next;
-
-		if (ctx->boundaries != NULL)
-			ctx->parse_next_block = parse_next_body_to_boundary;
-		else
-			ctx->parse_next_block = parse_next_body_to_eof;
-		return ctx->parse_next_block(ctx, block_r);
+	} else {
+		/* forget about the boundaries we possibly skipped */
+		ctx->boundaries = boundary;
 	}
 
-	/* forget about the boundaries we possibly skipped */
-	ctx->boundaries = boundary;
-
 	/* the boundary itself should already be in buffer. add that. */
 	block_r->data = i_stream_get_data(ctx->input, &block_r->size);
 	i_assert(block_r->size >= ctx->skip + 2 + boundary->len +
 		 (first_line ? 0 : 1));
 	block_r->data += ctx->skip;
-	/* [\n]--<boundary> */
-	block_r->size = (first_line ? 0 : 1) + 2 + boundary->len;
+	/* [\n]--<boundary>[--] */
+	block_r->size = (first_line ? 0 : 1) + 2 + boundary->len +
+		(boundary->epilogue_found ? 2 : 0);
 	parse_body_add_block(ctx, block_r);
 
 	ctx->parse_next_block = parse_next_body_skip_boundary_line;
-	return 1;
+
+	return ctx->parse_next_block(ctx, block_r);
 }
 
 static int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
@@ -392,6 +398,11 @@
 	}
 	if (block_r->size != 0) {
 		parse_body_add_block(ctx, block_r);
+
+		if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
+		    (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
+			return 0;
+
 		return 1;
 	}
 	return ret <= 0 ? ret :
@@ -408,6 +419,11 @@
 		return ret;
 
 	parse_body_add_block(ctx, block_r);
+
+	if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
+	    (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
+		return 0;
+
 	return 1;
 }
 
@@ -583,6 +599,24 @@
 			ctx->part = ctx->part->next;
 			break;
 		}
+
+		/* parse epilogue of multipart parent if requested */
+		if (ctx->part->parent != NULL &&
+		    (ctx->part->parent->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
+		    (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) != 0) {
+			/* check for presence of epilogue */
+			uoff_t part_end = ctx->part->physical_pos +
+				ctx->part->header_size.physical_size +
+				ctx->part->body_size.physical_size;
+			uoff_t parent_end = ctx->part->parent->physical_pos +
+				ctx->part->parent->header_size.physical_size +
+				ctx->part->parent->body_size.physical_size;
+
+			if (parent_end > part_end) {
+				ctx->parse_next_block = preparsed_parse_epilogue_init;
+				break;
+			}
+		}
 		ctx->part = ctx->part->parent;
 	}
 	if (ctx->part == NULL)
@@ -599,6 +633,17 @@
 	return ctx->parse_next_block(ctx, block_r);
 }
 
+static int preparsed_parse_prologue_finish(struct message_parser_ctx *ctx,
+					   struct message_block *block_r)
+{
+	i_stream_skip(ctx->input, ctx->skip);
+	ctx->skip = 0;
+
+	ctx->parse_next_block = preparsed_parse_next_header_init;
+	ctx->part = ctx->part->children;
+	return ctx->parse_next_block(ctx, block_r);
+}
+
 static int preparsed_parse_body_more(struct message_parser_ctx *ctx,
 				     struct message_block *block_r)
 {
@@ -619,6 +664,143 @@
 	return 1;
 }
 
+static int preparsed_parse_prologue_more(struct message_parser_ctx *ctx,
+					 struct message_block *block_r)
+{
+	uoff_t end_offset = ctx->part->children->physical_pos;
+	uoff_t boundary_min_start;
+	const unsigned char *cur;
+	bool full;
+	int ret;
+
+	if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
+		return ret;
+
+	if (ctx->input->v_offset + block_r->size >= end_offset) {
+		/* we've got the full prologue: clip off the initial boundary */
+		block_r->size = end_offset - ctx->input->v_offset;
+		cur = block_r->data + block_r->size - 1;
+
+		/* [\r]\n--boundary[\r]\n */ 
+		if (block_r->size < 5 || *cur != '\n') {
+			ctx->broken = TRUE;
+			return -1;
+		}
+		
+		cur--;
+		if (*cur == '\r') cur--;
+
+		/* find newline just before boundary */
+		for (; cur >= block_r->data; cur--) {
+			if (*cur == '\n') break;
+		}
+
+		if (cur[0] != '\n' || cur[1] != '-' || cur[2] != '-') {
+			ctx->broken = TRUE;
+			return -1;
+		}
+
+		if (cur != block_r->data && cur[-1] == '\r') cur--;
+
+		/* clip boundary */
+		block_r->size = cur - block_r->data;			
+
+		ctx->parse_next_block = preparsed_parse_prologue_finish;
+		ctx->skip = block_r->size;
+		return 1;
+	}
+		
+	/* retain enough data in the stream buffer to contain initial boundary */
+	if (end_offset > BOUNDARY_END_MAX_LEN)
+		boundary_min_start = end_offset - BOUNDARY_END_MAX_LEN;
+	else
+		boundary_min_start = 0;
+
+	if (ctx->input->v_offset + block_r->size >= boundary_min_start) {
+		if (boundary_min_start <= ctx->input->v_offset)
+			return 0;
+		block_r->size = boundary_min_start - ctx->input->v_offset;
+	}
+	ctx->skip = block_r->size;
+	return 1;
+}
+
+static int preparsed_parse_epilogue_more(struct message_parser_ctx *ctx,
+					 struct message_block *block_r)
+{
+	uoff_t end_offset = ctx->part->physical_pos +
+		ctx->part->header_size.physical_size +
+		ctx->part->body_size.physical_size;
+	bool full;
+	int ret;
+
+	if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
+		return ret;
+
+	if (ctx->input->v_offset + block_r->size >= end_offset) {
+		block_r->size = end_offset - ctx->input->v_offset;
+		ctx->parse_next_block = preparsed_parse_body_finish;
+	}
+	ctx->skip = block_r->size;
+	return 1;
+}
+
+static int preparsed_parse_epilogue_boundary(struct message_parser_ctx *ctx,
+					     struct message_block *block_r)
+{
+	uoff_t end_offset = ctx->part->physical_pos +
+		ctx->part->header_size.physical_size +
+		ctx->part->body_size.physical_size;
+	const unsigned char *data, *cur;
+	size_t size;
+	bool full;
+	int ret;
+
+	if (end_offset - ctx->input->v_offset < 7) {
+		ctx->broken = TRUE;
+		return -1;
+	}
+
+	if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
+		return ret;
+
+	/* [\r]\n--boundary--[\r]\n */
+	if (block_r->size < 7) {
+		ctx->want_count = 7;
+		return 0;
+	}
+
+	data = block_r->data;
+	size = block_r->size;
+	cur = data;
+
+	if (*cur == '\r') cur++;
+
+	if (cur[0] != '\n' || cur[1] != '-' || data[2] != '-') {
+		ctx->broken = TRUE;
+		return -1;
+	}
+
+	/* find the end of the line */
+	cur += 3;
+	if ((cur = memchr(cur, '\n', size - (cur-data))) == NULL) {
+		if (end_offset < ctx->input->v_offset + size) {
+			ctx->broken = TRUE;
+			return -1;
+		} else if (ctx->input->v_offset + size < end_offset &&
+			   size < BOUNDARY_END_MAX_LEN &&
+			   !ctx->input->eof && !full) {
+			ctx->want_count = BOUNDARY_END_MAX_LEN;
+			return 0;
+		}
+	}
+
+	block_r->size = 0;
+	ctx->parse_next_block = preparsed_parse_epilogue_more;
+	ctx->skip = cur - data + 1;
+	return 0;
+}
+
 static int preparsed_parse_body_init(struct message_parser_ctx *ctx,
 				     struct message_block *block_r)
 {
@@ -632,16 +814,45 @@
 	}
 	i_stream_skip(ctx->input, offset - ctx->input->v_offset);
 
-	ctx->parse_next_block = preparsed_parse_body_more;
-	return preparsed_parse_body_more(ctx, block_r);
+	if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0)
+		ctx->parse_next_block = preparsed_parse_body_more;
+	else
+		ctx->parse_next_block = preparsed_parse_prologue_more;
+	return ctx->parse_next_block(ctx, block_r);
+}
+
+static int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx,
+					 struct message_block *block_r)
+{
+	uoff_t offset = ctx->part->physical_pos +
+		ctx->part->header_size.physical_size +
+		ctx->part->body_size.physical_size;


More information about the dovecot-cvs mailing list