dovecot-2.2-pigeonhole: lib-sieve: Moved message body parsing co...

pigeonhole at rename-it.nl pigeonhole at rename-it.nl
Fri Nov 13 23:30:39 UTC 2015


details:   http://hg.rename-it.nl/dovecot-2.2-pigeonhole/rev/d1e5a06fc9d7
changeset: 2130:d1e5a06fc9d7
user:      Stephan Bosch <stephan at rename-it.nl>
date:      Sat Nov 14 00:30:30 2015 +0100
description:
lib-sieve: Moved message body parsing code from body extension to Sieve core.
This makes this available for other extensions.

diffstat:

 src/lib-sieve/plugins/body/ext-body-common.c |  582 +--------------------------
 src/lib-sieve/sieve-message.c                |  568 ++++++++++++++++++++++++++-
 src/lib-sieve/sieve-message.h                |   23 +-
 3 files changed, 594 insertions(+), 579 deletions(-)

diffs (truncated from 1277 to 300 lines):

diff -r 334f0ba2fd9b -r d1e5a06fc9d7 src/lib-sieve/plugins/body/ext-body-common.c
--- a/src/lib-sieve/plugins/body/ext-body-common.c	Mon Nov 02 18:56:39 2015 +0100
+++ b/src/lib-sieve/plugins/body/ext-body-common.c	Sat Nov 14 00:30:30 2015 +0100
@@ -7,11 +7,6 @@
 #include "array.h"
 #include "str.h"
 #include "istream.h"
-#include "rfc822-parser.h"
-#include "message-date.h"
-#include "message-parser.h"
-#include "message-decoder.h"
-#include "mail-html2text.h"
 #include "mail-storage.h"
 
 #include "sieve-common.h"
@@ -22,571 +17,6 @@
 
 #include "ext-body-common.h"
 
-struct ext_body_part {
-	const char *content;
-	unsigned long size;
-};
-
-struct ext_body_part_cached {
-	const char *content_type;
-
-	const char *decoded_body;
-	const char *text_body;
-	size_t decoded_body_size;
-	size_t text_body_size;
-
-	bool have_body; /* there's the empty end-of-headers line */
-};
-
-struct ext_body_message_context {
-	pool_t pool;
-	ARRAY(struct ext_body_part_cached) cached_body_parts;
-	ARRAY(struct ext_body_part) return_body_parts;
-	buffer_t *tmp_buffer;
-	buffer_t *raw_body;
-};
-
-static bool _is_wanted_content_type
-(const char * const *wanted_types, const char *content_type)
-{
-	const char *subtype = strchr(content_type, '/');
-	size_t type_len;
-
-	type_len = ( subtype == NULL ? strlen(content_type) :
-		(size_t)(subtype - content_type) );
-
-	i_assert( wanted_types != NULL );
-
-	for (; *wanted_types != NULL; wanted_types++) {
-		const char *wanted_subtype;
-
-		if (**wanted_types == '\0') {
-			/* empty string matches everything */
-			return TRUE;
-		}
-
-		wanted_subtype = strchr(*wanted_types, '/');
-		if (wanted_subtype == NULL) {
-			/* match only main type */
-			if (strlen(*wanted_types) == type_len &&
-			  strncasecmp(*wanted_types, content_type, type_len) == 0)
-				return TRUE;
-		} else {
-			/* match whole type/subtype */
-			if (strcasecmp(*wanted_types, content_type) == 0)
-				return TRUE;
-		}
-	}
-	return FALSE;
-}
-
-static bool _want_multipart_content_type
-(const char * const *wanted_types)
-{
-	for (; *wanted_types != NULL; wanted_types++) {
-		if (**wanted_types == '\0') {
-			/* empty string matches everything */
-			return TRUE;
-		}
-
-		/* match only main type */
-		if ( strncasecmp(*wanted_types, "multipart", 9) == 0 &&
-			( strlen(*wanted_types) == 9 || *(*wanted_types+9) == '/' ) )
-			return TRUE;
-	}
-
-	return FALSE;
-}
-
-
-static bool ext_body_get_return_parts
-(struct ext_body_message_context *ctx, const char * const *wanted_types,
-	bool extract_text)
-{
-	const struct ext_body_part_cached *body_parts;
-	unsigned int i, count;
-	struct ext_body_part *return_part;
-
-	/* Check whether any body parts are cached already */
-	body_parts = array_get(&ctx->cached_body_parts, &count);
-	if ( count == 0 )
-		return FALSE;
-
-	/* Clear result array */
-	array_clear(&ctx->return_body_parts);
-
-	/* Fill result array with requested content_types */
-	for (i = 0; i < count; i++) {
-		if (!body_parts[i].have_body) {
-			/* Part has no body; according to RFC this MUST not match to anything and
-			 * therefore it is not included in the result.
-			 */
-			continue;
-		}
-
-		/* Skip content types that are not requested */
-		if (!_is_wanted_content_type(wanted_types, body_parts[i].content_type))
-			continue;
-
-		/* Add new item to the result */
-		return_part = array_append_space(&ctx->return_body_parts);
-
-		/* Depending on whether a decoded body part is requested, the appropriate
-		 * cache item is read. If it is missing, this function fails and the cache
-		 * needs to be completed by ext_body_parts_add_missing().
-		 */
-		if (extract_text) {
-			if (body_parts[i].text_body == NULL)
-				return FALSE;
-			return_part->content = body_parts[i].text_body;
-			return_part->size = body_parts[i].text_body_size;
-		} else {
-			if (body_parts[i].decoded_body == NULL)
-				return FALSE;
-			return_part->content = body_parts[i].decoded_body;
-			return_part->size = body_parts[i].decoded_body_size;			
-		}
-	}
-
-	return TRUE;
-}
-
-static void ext_body_part_save
-(struct ext_body_message_context *ctx,
-	struct ext_body_part_cached *body_part, bool extract_text)
-{
-	buffer_t *buf = ctx->tmp_buffer;
-	buffer_t *text_buf = NULL;
-	char *part_data;
-	size_t part_size;
-
-	/* Add terminating NUL to the body part buffer */
-	buffer_append_c(buf, '\0');
-
-	if ( extract_text ) {
-		if ( mail_html2text_content_type_match
-			(body_part->content_type) ) {
-			struct mail_html2text *html2text;
-
-			text_buf = buffer_create_dynamic(default_pool, 4096);
-
-			/* Remove HTML markup */
-			html2text = mail_html2text_init(0);
-			mail_html2text_more(html2text, buf->data, buf->used, text_buf);
-			mail_html2text_deinit(&html2text);
-	
-			buf = text_buf;
-		}
-	}
-
-	part_data = p_malloc(ctx->pool, buf->used);
-	memcpy(part_data, buf->data, buf->used);
-	part_size = buf->used - 1;
-
-	if ( text_buf != NULL)
-		buffer_free(&text_buf);
-
-	/* Depending on whether the part is processed into text, store message
-	 * body in the appropriate cache location.
-	 */
-	if ( !extract_text ) {
-		body_part->decoded_body = part_data;
-		body_part->decoded_body_size = part_size;
-	} else {
-		body_part->text_body = part_data;
-		body_part->text_body_size = part_size;
-	}
-
-	/* Clear buffer */
-	buffer_set_used_size(ctx->tmp_buffer, 0);
-}
-
-static const char *_parse_content_type(const struct message_header_line *hdr)
-{
-	struct rfc822_parser_context parser;
-	string_t *content_type;
-
-	/* Initialize parsing */
-	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
-	(void)rfc822_skip_lwsp(&parser);
-
-	/* Parse content type */
-	content_type = t_str_new(64);
-	if (rfc822_parse_content_type(&parser, content_type) < 0)
-		return "";
-
-	/* Content-type value must end here, otherwise it is invalid after all */
-	(void)rfc822_skip_lwsp(&parser);
-	if ( parser.data != parser.end && *parser.data != ';' )
-		return "";
-
-	/* Success */
-	return str_c(content_type);
-}
-
-/* ext_body_parts_add_missing():
- *   Add requested message body parts to the cache that are missing.
- */
-static int ext_body_parts_add_missing
-(const struct sieve_runtime_env *renv,
-	struct ext_body_message_context *ctx,
-	const char *const *content_types, bool extract_text)
-{
-	buffer_t *buf = ctx->tmp_buffer;
-	struct mail *mail = sieve_message_get_mail(renv->msgctx);
-	struct ext_body_part_cached *body_part = NULL, *header_part = NULL;
-	struct message_parser_ctx *parser;
-	struct message_decoder_context *decoder;
-	struct message_block block, decoded;
-	struct message_part *parts, *prev_part = NULL;
-	ARRAY(struct message_part *) part_index;
-	struct istream *input;
-	unsigned int idx = 0;
-	bool save_body = FALSE, want_multipart, have_all;
-	int ret;
-
-	/* First check whether any are missing */
-	if (ext_body_get_return_parts(ctx, content_types, extract_text)) {
-		/* Cache hit; all are present */
-		return SIEVE_EXEC_OK;
-	}
-
-	/* Get the message stream */
-	if ( mail_get_stream(mail, NULL, NULL, &input) < 0 ) {
-		return sieve_runtime_mail_error(renv, mail,
-			"body test: failed to read input message");
-	}
-	if (mail_get_parts(mail, &parts) < 0) {
-		return sieve_runtime_mail_error(renv, mail,
-			"body test: failed to parse input message");
-	}
-
-	if ( (want_multipart=_want_multipart_content_type(content_types)) ) {
-		t_array_init(&part_index, 8);
-	}
-
-	buffer_set_used_size(buf, 0);
-
-	/* Initialize body decoder */
-	decoder = message_decoder_init(NULL, 0);
-
-	//parser = message_parser_init_from_parts(parts, input, 0,
-		//MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS);
-	parser = message_parser_init(ctx->pool, input, 0,
-		MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS);
-	while ( (ret = message_parser_parse_next_block(parser, &block)) > 0 ) {
-
-		if ( block.part != prev_part ) {
-			bool message_rfc822 = FALSE;
-
-			/* Save previous body part */
-			if ( body_part != NULL ) {
-				/* Treat message/rfc822 separately; headers become content */
-				if ( block.part->parent == prev_part &&
-					strcmp(body_part->content_type, "message/rfc822") == 0 ) {
-					message_rfc822 = TRUE;
-				} else {
-					if ( save_body ) {
-						ext_body_part_save(ctx, body_part, extract_text);
-					}
-				}
-			}
-
-			/* Start processing next */
-			body_part = array_idx_modifiable(&ctx->cached_body_parts, idx);
-			body_part->content_type = "text/plain";
-
-			/* Check whether this is the epilogue block of a wanted multipart part */
-			if ( want_multipart ) {
-				array_idx_set(&part_index, idx, &block.part);
-
-				if ( prev_part != NULL && prev_part->next != block.part &&
-					block.part->parent != prev_part ) {
-					struct message_part *const *iparts;


More information about the dovecot-cvs mailing list