[dovecot-cvs] dovecot/src/lib-mail message-header-decode.c,NONE,1.1 message-header-decode.h,NONE,1.1 Makefile.am,1.5,1.6 message-header-search.c,1.6,1.7 quoted-printable.c,1.2,1.3

Mon Dec 16 05:26:57 EET 2002

Update of /home/cvs/dovecot/src/lib-mail
In directory danu:/tmp/cvs-serv19656/lib-mail

Modified Files:
	Makefile.am message-header-search.c quoted-printable.c 
Added Files:
	message-header-decode.c message-header-decode.h 
Log Message:
Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
separate function.

--- NEW FILE: message-header-decode.c ---
/* Copyright (C) 2002 Timo Sirainen */

#include "lib.h"
#include "base64.h"
#include "buffer.h"
#include "quoted-printable.h"
#include "message-header-decode.h"

static int split_encoded(const unsigned char *data, size_t *size_p,
			 const char **charset, const char **encoding,
			 const unsigned char **text, size_t *text_size_r)
{
	size_t size, pos, textpos;

	size = *size_p;

	/* get charset */
	for (pos = 0; pos < size && data[pos] != '?'; pos++) ;
	if (data[pos] != '?') return FALSE;
	*charset = t_strndup(data, pos);

	/* get encoding */
	pos++;
	if (pos+2 >= size || data[pos+1] != '?')
		return FALSE;

	if (data[pos] == 'Q' || data[pos] == 'q')
		*encoding = "Q";
	else if (data[pos] == 'B' || data[pos] == 'b')
		*encoding = "B";
	else
		return FALSE;

	/* get text */
	pos += 2;
	textpos = pos;
	while (pos < size && data[pos] != '?') pos++;
	if (data[pos] != '?' || pos+1 >= size || data[pos+1] != '=')
		return FALSE;

	*text = data + textpos;
	*text_size_r = pos - textpos;
	*size_p = pos+2;
	return TRUE;
}

static int
message_header_decode_encoded(const unsigned char *data, size_t *size,
			      MessageHeaderDecodeFunc func, void *context)
{
	const unsigned char *text;
	const char *charset, *encoding;
	Buffer *decodebuf;
	size_t text_size;
	int ret;

	t_push();

	/* first split the string charset?encoding?text?= */
	if (!split_encoded(data, size, &charset, &encoding,
			   &text, &text_size)) {
		t_pop();
		return TRUE;
	}

	decodebuf = buffer_create_static_hard(data_stack_pool, text_size);

	if (*encoding == 'Q')
		quoted_printable_decode(text, text_size, NULL, decodebuf);
	else {
		if (base64_decode(text, text_size, NULL, decodebuf) < 0) {
			/* corrupted encoding */
			t_pop();
			return TRUE;
		}
	}

	ret = func(buffer_get_data(decodebuf, NULL),
		   buffer_get_used_size(decodebuf), charset, context);

	t_pop();
	return ret;
}

void message_header_decode(const unsigned char *data, size_t size,
			   MessageHeaderDecodeFunc func, void *context)
{
	size_t pos, start_pos, subsize;

	start_pos = pos = 0;
	while (pos < size) {
		if (data[pos] == '=' && pos+1 < size && data[pos+1] == '?') {
			/* encoded string beginning */
			if (pos != start_pos) {
				/* send the unencoded data so far */
				if (!func(data + start_pos, pos - start_pos,
					  NULL, context))
					return;
			}

			pos += 2;
			subsize = size - pos;
			if (!message_header_decode_encoded(data + pos, &subsize,
							   func, context))
				return;

			pos += subsize;
			start_pos = pos;
		} else {
			pos++;
		}
	}

	(void)func(data + start_pos, size - start_pos, NULL, context);
}

--- NEW FILE: message-header-decode.h ---
#ifndef __MESSAGE_HEADER_DECODE_H
#define __MESSAGE_HEADER_DECODE_H

/* Return FALSE if you wish to stop decoding. charset is NULL when it's not
   RFC2047-encoded. */
typedef int (*MessageHeaderDecodeFunc)(const unsigned char *data, size_t size,
				       const char *charset, void *context);

/* Decode RFC2047 encoded words. Call specified function for each
   decoded block. */
void message_header_decode(const unsigned char *data, size_t size,
			   MessageHeaderDecodeFunc func, void *context);

#endif

Index: Makefile.am
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/Makefile.am,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- Makefile.am	13 Nov 2002 11:08:18 -0000	1.5
+++ Makefile.am	16 Dec 2002 03:26:55 -0000	1.6
@@ -7,6 +7,7 @@
 libmail_a_SOURCES = \
 	message-body-search.c \
 	message-content-parser.c \
+	message-header-decode.c \
 	message-header-search.c \
 	message-parser.c \
 	message-part-serialize.c \
@@ -20,6 +21,7 @@
 noinst_HEADERS = \
 	message-body-search.h \
 	message-content-parser.h \
+	message-header-decode.h \
 	message-header-search.h \
 	message-parser.h \
 	message-part-serialize.h \

Index: message-header-search.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-header-search.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- message-header-search.c	8 Dec 2002 05:23:08 -0000	1.6
+++ message-header-search.c	16 Dec 2002 03:26:55 -0000	1.7
@@ -6,6 +6,7 @@
 #include "charset-utf8.h"
 #include "rfc822-tokenize.h"
 #include "quoted-printable.h"
+#include "message-header-decode.h"
 #include "message-header-search.h"
 
 #include <ctype.h>
@@ -27,6 +28,9 @@
 	unsigned int unknown_charset:1;
 };
 
+static void search_loop(const unsigned char *data, size_t size,
+			HeaderSearchContext *ctx);
+
 HeaderSearchContext *
 message_header_search_init(Pool pool, const char *key, const char *charset,
 			   int *unknown_charset)
@@ -79,12 +83,12 @@
 	p_free(pool, ctx);
 }
 
-static int match_data(const Buffer *buffer, const char *charset,
-		      HeaderSearchContext *ctx)
+static void search_with_charset(const unsigned char *data, size_t size,
+				const char *charset, HeaderSearchContext *ctx)
 {
+	Buffer *buf;
 	const char *utf8_data;
-	size_t size;
-	int ret;
+	size_t utf8_size;
 
 	if (ctx->unknown_charset) {
 		/* we don't know the source charset, so assume we want to
@@ -96,127 +100,30 @@
 		charset = ctx->key_charset;
 	}
 
-	utf8_data = charset_to_ucase_utf8_string(charset, NULL, buffer, &size);
+	buf = buffer_create_const_data(data_stack_pool, data, size);
+	utf8_data = charset_to_ucase_utf8_string(charset, NULL,
+						 buf, &utf8_size);
+
 	if (utf8_data == NULL) {
 		/* unknown character set, or invalid data */
-		return FALSE;
-	}
-
-	ctx->submatch = TRUE;
-	ret = message_header_search(utf8_data, size, ctx);
-	ctx->submatch = FALSE;
-
-	return ret;
-}
-
-static int split_encoded(Buffer *buffer, size_t *last_pos,
-			 const char **charset, const char **encoding)
-{
-	const char *p;
-	size_t size, pos, textpos;
-
-	p = buffer_get_data(buffer, &size);
-
-	/* get charset */
-	for (pos = 0; pos < size && p[pos] != '?'; pos++) ;
-	if (p[pos] != '?') return FALSE;
-	*charset = t_strndup(p, pos);
-
-	/* get encoding */
-	pos++;
-	if (pos+2 >= size || p[pos+1] != '?')
-		return FALSE;
-
-	if (p[pos] == 'Q' || p[pos] == 'q')
-		*encoding = "Q";
-	else if (p[pos] == 'B' || p[pos] == 'b')
-		*encoding = "B";
-	else
-		return FALSE;
-
-	/* get text */
-	pos += 2;
-	textpos = pos;
-	for (; pos < size && p[pos] != '?'; pos++) ;
-	if (p[pos] != '?' || pos+1 >= size || p[pos+1] != '=') return FALSE;
-
-	buffer_set_limit(buffer, pos);
-	buffer_set_start_pos(buffer, textpos + buffer_get_start_pos(buffer));
-	*last_pos = pos+1;
-
-	return TRUE;
-}
-
-static int match_encoded(Buffer *buffer, size_t *last_pos,
-			 HeaderSearchContext *ctx)
-{
-	const char *charset, *encoding, *text;
-	Buffer *decodebuf;
-	size_t textsize;
-
-	/* first split the string charset?encoding?text?= */
-	if (!split_encoded(buffer, last_pos, &charset, &encoding)) {
-		ctx->match_count = 0;
-		return FALSE;
-	}
-
-	/* buffer is now limited to only the text portion */
-	text = buffer_get_data(buffer, &textsize);
-	decodebuf = buffer_create_static_hard(data_stack_pool, textsize);
-
-	if (*encoding == 'Q')
-		quoted_printable_decode(text, textsize, NULL, decodebuf);
-	else {
-		if (base64_decode(text, textsize, NULL, decodebuf) < 0) {
-			/* corrupted encoding */
-			ctx->match_count = 0;
-			return FALSE;
-		}
+	} else {
+		ctx->submatch = TRUE;
+		search_loop(utf8_data, utf8_size, ctx);
+		ctx->submatch = FALSE;
 	}
-
-	return match_data(decodebuf, charset, ctx);
 }
 
-int message_header_search(const unsigned char *header_block, size_t size,
-			  HeaderSearchContext *ctx)
+static void search_loop(const unsigned char *data, size_t size,
+			HeaderSearchContext *ctx)
 {
-	Buffer *buf;
+	size_t pos;
 	ssize_t i;
-	size_t pos, subpos;
 	unsigned char chr;
-	int last_newline, ret;
-
-	if (ctx->found)
-		return TRUE;
-
-	t_push();
-	buf = buffer_create_const_data(data_stack_pool, header_block, size);
+	int last_newline;
 
 	last_newline = ctx->last_newline;
 	for (pos = 0; pos < size; pos++) {
-		chr = header_block[pos];
-
-		if (chr == '=' && pos+1 < size &&
-		    header_block[pos+1] == '?' && !ctx->submatch) {
-			/* encoded string. read it. */
-                        buffer_set_start_pos(buf, pos+2);
-
-			t_push();
-			ret = match_encoded(buf, &subpos, ctx);
-			t_pop();
-
-			if (ret) {
-				ctx->found = TRUE;
-				break;
-			}
-
-			buffer_set_start_pos(buf, 0);
-			buffer_set_limit(buf, (size_t)-1);
-
-			pos += subpos - 1;
-			last_newline = FALSE;
-			continue;
-		}
+		chr = data[pos];
 
 		if (!ctx->submatch) {
 			if ((chr & 0x80) == 0)
@@ -225,10 +132,8 @@
 				/* we have non-ascii in header and key contains
 				   non-ascii characters. treat the rest of the
 				   header as encoded with the key's charset */
-				t_push();
-				ctx->found = match_data(buf, ctx->key_charset,
-							ctx);
-				t_pop();
+				search_with_charset(data + pos, size - pos,
+						    ctx->key_charset, ctx);
 				break;
 			}
 		}
@@ -250,8 +155,7 @@
 				if (++ctx->matches[i] == ctx->key_len) {
 					/* full match */
 					ctx->found = TRUE;
-					t_pop();
-					return TRUE;
+					return;
 				}
 			} else {
 				/* non-match */
@@ -274,9 +178,32 @@
 			ctx->matches[ctx->match_count++] = 1;
 		}
 	}
-	t_pop();
 
 	ctx->last_newline = last_newline;
+}
+
+static int search_block(const unsigned char *data, size_t size,
+			const char *charset, void *context)
+{
+	HeaderSearchContext *ctx = context;
+
+	t_push();
+	if (charset != NULL) {
+		/* need to convert to UTF-8 */
+		search_with_charset(data, size, charset, ctx);
+	} else {
+		search_loop(data, size, ctx);
+	}
+
+	t_pop();
+	return !ctx->found;
+}
+
+int message_header_search(const unsigned char *header_block, size_t size,
+			  HeaderSearchContext *ctx)
+{
+	if (!ctx->found)
+		message_header_decode(header_block, size, search_block, ctx);
 	return ctx->found;
 }
 

Index: quoted-printable.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/quoted-printable.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- quoted-printable.c	8 Dec 2002 05:23:08 -0000	1.2
+++ quoted-printable.c	16 Dec 2002 03:26:55 -0000	1.3
@@ -18,10 +18,10 @@
 		if (src[src_pos] != '_' && src[src_pos] != '=')
 			continue;
 
-		buffer_append(dest, src, src_pos - next);
+		buffer_append(dest, src + next, src_pos - next);
 
 		if (src[src_pos] == '_') {
-			buffer_append(dest, " ", 1);
+			buffer_append_c(dest, ' ');
 			next = src_pos+1;
 		} else {
 			/* =<hex> */
@@ -40,6 +40,8 @@
 			}
 		}
 	}
+
+	buffer_append(dest, src + next, src_size - next);
 
 	if (src_pos_r != NULL)
 		*src_pos_r = src_pos;