[dovecot-cvs] dovecot/src/lib-mail message-parser.c,1.46,1.47 message-parser.h,1.22,1.23

cras at procontrol.fi cras at procontrol.fi
Thu Aug 21 04:04:13 EEST 2003


Update of /home/cvs/dovecot/src/lib-mail
In directory danu:/tmp/cvs-serv9890/lib-mail

Modified Files:
	message-parser.c message-parser.h 
Log Message:
Message parsing can now be done in two parts - header and body. We're now
more eager at parsing MIME structure for message. It's done whenever body
has to be fully read (eg. rfc822.size).



Index: message-parser.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-parser.c,v
retrieving revision 1.46
retrieving revision 1.47
diff -u -d -r1.46 -r1.47
--- message-parser.c	20 Aug 2003 01:41:30 -0000	1.46
+++ message-parser.c	21 Aug 2003 00:04:11 -0000	1.47
@@ -17,15 +17,17 @@
 	size_t len;
 };
 
-struct parser_context {
-	pool_t pool;
-	struct message_part *part;
+struct message_parser_ctx {
+	pool_t parser_pool, part_pool;
+	struct istream *input;
+	struct message_part *parts, *part;
 
 	char *last_boundary;
 	char *last_content_type;
 	struct message_boundary *boundaries;
 
 	message_header_callback_t *callback;
+	message_body_callback_t *body_callback;
 	void *context;
 };
 
@@ -42,16 +44,19 @@
 	int has_nuls;
 };
 
+static void
+message_parse_part_header(struct message_parser_ctx *parser_ctx);
+
 static struct message_part *
-message_parse_part(struct istream *input,
-		   struct parser_context *parser_ctx);
+message_parse_part_body(struct message_parser_ctx *parser_ctx);
 
 static struct message_part *
-message_parse_body(struct istream *input, struct message_boundary *boundaries,
-		   struct message_size *body_size, int *has_nuls);
+message_parse_body(struct message_parser_ctx *parser_ctx,
+		   struct message_boundary *boundaries,
+		   struct message_size *msg_size, int *has_nuls);
 
 static struct message_part *
-message_skip_boundary(struct istream *input,
+message_skip_boundary(struct message_parser_ctx *parser_ctx,
 		      struct message_boundary *boundaries,
 		      struct message_size *boundary_size, int *has_nuls);
 
@@ -92,14 +97,14 @@
 static void parse_content_type(const unsigned char *value, size_t value_len,
 			       void *context)
 {
-	struct parser_context *parser_ctx = context;
+	struct message_parser_ctx *parser_ctx = context;
 	const char *str;
 
 	if (parser_ctx->last_content_type != NULL || value_len == 0)
 		return;
 
 	str = parser_ctx->last_content_type =
-		p_strndup(parser_ctx->pool, value, value_len);
+		p_strndup(parser_ctx->parser_pool, value, value_len);
 
 	if (strcasecmp(str, "message/rfc822") == 0)
 		parser_ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
@@ -121,7 +126,7 @@
 			 const unsigned char *value, size_t value_len,
 			 int value_quoted, void *context)
 {
-	struct parser_context *parser_ctx = context;
+	struct message_parser_ctx *parser_ctx = context;
 
 	if ((parser_ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
 	    name_len != 8 || memcasecmp(name, "boundary", 8) != 0)
@@ -129,22 +134,21 @@
 
 	if (parser_ctx->last_boundary == NULL) {
 		parser_ctx->last_boundary =
-			p_strndup(parser_ctx->pool, value, value_len);
+			p_strndup(parser_ctx->parser_pool, value, value_len);
 		if (value_quoted)
 			str_unescape(parser_ctx->last_boundary);
 	}
 }
 
 static struct message_part *
-message_parse_multipart(struct istream *input,
-			struct parser_context *parser_ctx)
+message_parse_multipart(struct message_parser_ctx *parser_ctx)
 {
 	struct message_part *parent_part, *next_part, *part;
 	struct message_boundary *b;
 	int has_nuls;
 
 	/* multipart message. add new boundary */
-	b = t_new(struct message_boundary, 1);
+	b = p_new(parser_ctx->parser_pool, struct message_boundary, 1);
 	b->part = parser_ctx->part;
 	b->boundary = parser_ctx->last_boundary;
 	b->len = strlen(b->boundary);
@@ -158,7 +162,7 @@
 
 	/* skip the data before the first boundary */
 	parent_part = parser_ctx->part;
-	next_part = message_skip_boundary(input, parser_ctx->boundaries,
+	next_part = message_skip_boundary(parser_ctx, parser_ctx->boundaries,
 					  &parent_part->body_size, &has_nuls);
 	if (has_nuls)
 		parent_part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
@@ -166,12 +170,13 @@
 	/* now, parse the parts */
 	while (next_part == parent_part) {
 		/* new child */
-		part = message_part_append(parser_ctx->pool, parent_part);
+		part = message_part_append(parser_ctx->part_pool, parent_part);
 		if ((parent_part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0)
 			part->flags |= MESSAGE_PART_FLAG_IS_MIME;
 
-                parser_ctx->part = part;
-		next_part = message_parse_part(input, parser_ctx);
+		parser_ctx->part = part;
+                message_parse_part_header(parser_ctx);
+		next_part = message_parse_part_body(parser_ctx);
 
 		if ((part->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0) {
 			/* it also belongs to parent */
@@ -185,7 +190,8 @@
 			break;
 
 		/* skip the boundary */
-		next_part = message_skip_boundary(input, parser_ctx->boundaries,
+		next_part = message_skip_boundary(parser_ctx,
+						  parser_ctx->boundaries,
 						  &parent_part->body_size,
 						  &has_nuls);
 		if (has_nuls)
@@ -201,27 +207,22 @@
 #define MUTEX_FLAGS \
 	(MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART)
 
-static struct message_part *
-message_parse_part(struct istream *input, struct parser_context *parser_ctx)
+static void message_parse_part_header(struct message_parser_ctx *parser_ctx)
 {
+	struct message_part *part = parser_ctx->part;
 	struct message_header_parser_ctx *hdr_ctx;
 	struct message_header_line *hdr;
-	struct message_part *next_part, *part;
-	uoff_t hdr_size;
-	int has_nuls;
 
-	hdr_ctx = message_parse_header_init(input,
-					    &parser_ctx->part->header_size);
+	hdr_ctx = message_parse_header_init(parser_ctx->input,
+					    &part->header_size);
 	while ((hdr = message_parse_header_next(hdr_ctx)) != NULL) {
 		/* call the user-defined header parser */
-		if (parser_ctx->callback != NULL) {
-			parser_ctx->callback(parser_ctx->part, hdr,
-					     parser_ctx->context);
-		}
+		if (parser_ctx->callback != NULL)
+			parser_ctx->callback(part, hdr, parser_ctx->context);
 
 		if (!hdr->eoh && strcasecmp(hdr->name, "Mime-Version") == 0) {
 			/* it's MIME. Content-* headers are valid */
-			parser_ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME;
+			part->flags |= MESSAGE_PART_FLAG_IS_MIME;
 		}
 
 		if (!hdr->eoh && strcasecmp(hdr->name, "Content-Type") == 0) {
@@ -238,63 +239,66 @@
 		}
 	}
 
-	if ((parser_ctx->part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
+	if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
 		/* It's not MIME. Reset everything we found from
 		   Content-Type. */
-		parser_ctx->part->flags = 0;
+		part->flags = 0;
                 parser_ctx->last_boundary = NULL;
 		parser_ctx->last_content_type = NULL;
 	}
-	if (parser_ctx->callback != NULL) {
-		parser_ctx->callback(parser_ctx->part, NULL,
-				     parser_ctx->context);
-	}
+	if (parser_ctx->callback != NULL)
+		parser_ctx->callback(part, NULL, parser_ctx->context);
 	if (hdr_ctx->has_nuls)
-		parser_ctx->part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
+		part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
 	message_parse_header_deinit(hdr_ctx);
 
-	i_assert((parser_ctx->part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
+	i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
+}
 
-	/* update message position/size */
-	hdr_size = parser_ctx->part->header_size.physical_size;
+static struct message_part *
+message_parse_part_body(struct message_parser_ctx *parser_ctx)
+{
+	struct message_part *part = parser_ctx->part;
+        struct message_part *next_part;
+	int has_nuls;
 
 	if (parser_ctx->last_boundary != NULL)
-		return message_parse_multipart(input, parser_ctx);
+		return message_parse_multipart(parser_ctx);
 
 	if (parser_ctx->last_content_type == NULL) {
-		if (parser_ctx->part->parent != NULL &&
-		    (parser_ctx->part->parent->flags &
+		if (part->parent != NULL &&
+		    (part->parent->flags &
 		     MESSAGE_PART_FLAG_MULTIPART_DIGEST)) {
 			/* when there's no content-type specified and we're
 			   below multipart/digest, the assume message/rfc822
 			   content-type */
-			parser_ctx->part->flags |=
-				MESSAGE_PART_FLAG_MESSAGE_RFC822;
+			part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
 		} else {
 			/* otherwise we default to text/plain */
-			parser_ctx->part->flags |= MESSAGE_PART_FLAG_TEXT;
+			part->flags |= MESSAGE_PART_FLAG_TEXT;
 		}
 	}
 
 	parser_ctx->last_boundary = NULL;
         parser_ctx->last_content_type = NULL;
 
-	if (parser_ctx->part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) {
+	if (part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) {
 		/* message/rfc822 part - the message body begins with
 		   headers again, this works pretty much the same as
 		   a single multipart/mixed item */
-		part = message_part_append(parser_ctx->pool, parser_ctx->part);
+		part = message_part_append(parser_ctx->part_pool, part);
 
 		parser_ctx->part = part;
-		next_part = message_parse_part(input, parser_ctx);
+		message_parse_part_header(parser_ctx);
+		next_part = message_parse_part_body(parser_ctx);
 		parser_ctx->part = part->parent;
 
 		/* our body size is the size of header+body in message/rfc822 */
 		message_size_add_part(&part->parent->body_size, part);
 	} else {
 		/* normal message, read until the next boundary */
-		part = parser_ctx->part;
-		next_part = message_parse_body(input, parser_ctx->boundaries,
+		next_part = message_parse_body(parser_ctx,
+					       parser_ctx->boundaries,
 					       &part->body_size, &has_nuls);
 		if (has_nuls)
 			part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
@@ -495,18 +499,19 @@
 }
 
 static struct message_part *
-message_parse_body(struct istream *input, struct message_boundary *boundaries,
+message_parse_body(struct message_parser_ctx *parser_ctx,
+		   struct message_boundary *boundaries,
 		   struct message_size *msg_size, int *has_nuls)
 {
 	struct message_boundary *boundary;
 	struct message_size body_size;
 
 	if (boundaries == NULL) {
-		message_get_body_size(input, &body_size, has_nuls);
+		message_get_body_size(parser_ctx->input, &body_size, has_nuls);
 		message_size_add(msg_size, &body_size);
 		boundary = NULL;
 	} else {
-		boundary = message_find_boundary(input, boundaries,
+		boundary = message_find_boundary(parser_ctx->input, boundaries,
 						 msg_size, FALSE, has_nuls);
 	}
 
@@ -516,7 +521,7 @@
 /* skip data until next boundary is found. if it's end boundary,
    skip the footer as well. */
 static struct message_part *
-message_skip_boundary(struct istream *input,
+message_skip_boundary(struct message_parser_ctx *parser_ctx,
 		      struct message_boundary *boundaries,
 		      struct message_size *boundary_size, int *has_nuls)
 {
@@ -525,61 +530,90 @@
 	size_t size;
 	int end_boundary;
 
-	boundary = message_find_boundary(input, boundaries,
+	boundary = message_find_boundary(parser_ctx->input, boundaries,
 					 boundary_size, TRUE, has_nuls);
 	if (boundary == NULL)
 		return NULL;
 
 	/* now, see if it's end boundary */
 	end_boundary = FALSE;
-	if (i_stream_read_data(input, &msg, &size, 1) > 0)
+	if (i_stream_read_data(parser_ctx->input, &msg, &size, 1) > 0)
 		end_boundary = msg[0] == '-' && msg[1] == '-';
 
 	/* skip the rest of the line */
-	message_skip_line(input, boundary_size, !end_boundary, has_nuls);
+	message_skip_line(parser_ctx->input, boundary_size,
+			  !end_boundary, has_nuls);
 
 	if (end_boundary) {
 		/* skip the footer */
-		return message_parse_body(input, boundary->next,
+		return message_parse_body(parser_ctx, boundary->next,
 					  boundary_size, has_nuls);
 	}
 
 	return boundary == NULL ? NULL : boundary->part;
 }
 
-struct message_part *message_parse(pool_t pool, struct istream *input,
-				   message_header_callback_t *callback,
-				   void *context)
+struct message_parser_ctx *
+message_parser_init(pool_t part_pool, struct istream *input)
 {
-	struct message_part *part;
-	struct parser_context parser_ctx;
+	struct message_parser_ctx *ctx;
+	pool_t pool;
 
-	memset(&parser_ctx, 0, sizeof(parser_ctx));
-	parser_ctx.pool = pool;
-	parser_ctx.callback = callback;
-	parser_ctx.context = context;
-	parser_ctx.part = part = p_new(pool, struct message_part, 1);
+	pool = pool_alloconly_create("Message Parser", 1024);
+	ctx = p_new(pool, struct message_parser_ctx, 1);
+	ctx->parser_pool = pool;
+	ctx->part_pool = part_pool;
+	ctx->input = input;
+	ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1);
+	return ctx;
+}
 
-	message_parse_part(input, &parser_ctx);
-	return part;
+struct message_part *message_parser_deinit(struct message_parser_ctx *ctx)
+{
+	struct message_part *parts = ctx->parts;
+
+	pool_unref(ctx->parser_pool);
+	return parts;
+}
+
+void message_parser_parse_header(struct message_parser_ctx *ctx,
+				 struct message_size *hdr_size,
+				 message_header_callback_t *callback,
+				 void *context)
+{
+	ctx->callback = callback;
+	ctx->context = context;
+
+	message_parse_part_header(ctx);
+        *hdr_size = ctx->part->header_size;
+}
+
+void message_parser_parse_body(struct message_parser_ctx *ctx,
+			       message_header_callback_t *hdr_callback,
+			       message_body_callback_t *body_callback,
+			       void *context)
+{
+	ctx->callback = hdr_callback;
+	ctx->body_callback = body_callback;
+	ctx->context = context;
+
+	message_parse_part_body(ctx);
 }
 
 static void part_parse_headers(struct message_part *part, struct istream *input,
-			       uoff_t start_offset,
 			       message_header_callback_t *callback,
 			       void *context)
 {
 	while (part != NULL) {
 		/* note that we want to parse the header of all
 		   the message parts, multiparts too. */
-		i_assert(part->physical_pos >= input->v_offset - start_offset);
-		i_stream_skip(input, part->physical_pos -
-			      (input->v_offset - start_offset));
+		i_assert(part->physical_pos >= input->v_offset);
+		i_stream_skip(input, part->physical_pos - input->v_offset);
 
 		message_parse_header(part, input, NULL, callback, context);
 		if (part->children != NULL) {
 			part_parse_headers(part->children, input,
-					   start_offset, callback, context);
+					   callback, context);
 		}
 
 		part = part->next;
@@ -590,7 +624,7 @@
 			      message_header_callback_t *callback,
 			      void *context)
 {
-	part_parse_headers(part, input, input->v_offset, callback, context);
+	part_parse_headers(part, input, callback, context);
 }
 
 void message_parse_header(struct message_part *part, struct istream *input,

Index: message-parser.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-parser.h,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -d -r1.22 -r1.23
--- message-parser.h	20 Aug 2003 01:41:30 -0000	1.22
+++ message-parser.h	21 Aug 2003 00:04:11 -0000	1.23
@@ -37,6 +37,7 @@
 	void *context;
 };
 
+struct message_parser_ctx;
 struct message_header_parser_ctx;
 
 struct message_header_line {
@@ -56,21 +57,42 @@
 	unsigned int use_full_value:1; /* set if you want full_value */
 };
 
-/* called once with hdr = NULL at end of headers */
+/* called once with hdr = NULL at the end of headers */
 typedef void message_header_callback_t(struct message_part *part,
 				       struct message_header_line *hdr,
 				       void *context);
+/* called once with size = 0 at the end of message part */
+typedef void message_body_callback_t(struct message_part *part,
+				     const unsigned char *data, size_t size,
+				     void *context);
 
 /* callback is called for each field in message header. */
-struct message_part *message_parse(pool_t pool, struct istream *input,
-				   message_header_callback_t *callback,
-				   void *context);
 void message_parse_from_parts(struct message_part *part, struct istream *input,
 			      message_header_callback_t *callback,
 			      void *context);
 void message_parse_header(struct message_part *part, struct istream *input,
 			  struct message_size *hdr_size,
 			  message_header_callback_t *callback, void *context);
+
+
+/* Initialize message parser. part_spool specifies where struct message_parts
+   are allocated from. */
+struct message_parser_ctx *
+message_parser_init(pool_t part_pool, struct istream *input);
+struct message_part *message_parser_deinit(struct message_parser_ctx *ctx);
+
+/* Read and parse header. */
+void message_parser_parse_header(struct message_parser_ctx *ctx,
+				 struct message_size *hdr_size,
+				 message_header_callback_t *callback,
+				 void *context);
+/* Read and parse body. If message is a MIME multipart or message/rfc822
+   message, hdr_callback is called for all headers. body_callback is called
+   for the body content. */
+void message_parser_parse_body(struct message_parser_ctx *ctx,
+			       message_header_callback_t *hdr_callback,
+			       message_body_callback_t *body_callback,
+			       void *context);
 
 struct message_header_parser_ctx *
 message_parse_header_init(struct istream *input, struct message_size *hdr_size);



More information about the dovecot-cvs mailing list