dovecot-2.2: lib-mail: Added message_header_encode_data() to sup...

Mon May 5 12:40:29 UTC 2014

details:   http://hg.dovecot.org/dovecot-2.2/rev/cc622a9a2f3c
changeset: 17319:cc622a9a2f3c
user:      Timo Sirainen <tss at iki.fi>
date:      Mon May 05 15:39:58 2014 +0300
description:
lib-mail: Added message_header_encode_data() to support encoding also NUL characters.

diffstat:

 src/lib-mail/Makefile.am                  |   2 +-
 src/lib-mail/message-header-encode.c      |  32 +++++++++++++++++-----------
 src/lib-mail/message-header-encode.h      |   2 +
 src/lib-mail/test-message-header-decode.c |  34 +++++++++++++++++++++++++++++++
 src/lib-mail/test-message-header-encode.c |  16 ++++++++++++++
 5 files changed, 72 insertions(+), 14 deletions(-)

diffs (206 lines):

diff -r 841d31e6c647 -r cc622a9a2f3c src/lib-mail/Makefile.am

--- a/src/lib-mail/Makefile.am	Mon May 05 15:05:20 2014 +0300
+++ b/src/lib-mail/Makefile.am	Mon May 05 15:39:58 2014 +0300
@@ -134,7 +134,7 @@
 test_message_decoder_DEPENDENCIES = $(test_deps)
 
 test_message_header_decode_SOURCES = test-message-header-decode.c
-test_message_header_decode_LDADD = message-header-decode.lo quoted-printable.lo $(test_libs)
+test_message_header_decode_LDADD = message-header-decode.lo quoted-printable.lo message-header-encode.lo $(test_libs)
 test_message_header_decode_DEPENDENCIES = $(test_deps)
 
 test_message_header_encode_SOURCES = test-message-header-encode.c
diff -r 841d31e6c647 -r cc622a9a2f3c src/lib-mail/message-header-encode.c
--- a/src/lib-mail/message-header-encode.c	Mon May 05 15:05:20 2014 +0300
+++ b/src/lib-mail/message-header-encode.c	Mon May 05 15:39:58 2014 +0300
@@ -11,7 +11,8 @@
 #define IS_LWSP(c) \
 	((c) == ' ' || (c) == '\t' || (c) == '\n')
 
-static bool input_idx_need_encoding(const unsigned char *input, unsigned int i)
+static bool input_idx_need_encoding(const unsigned char *input,
+				    unsigned int i, unsigned int len)
 {
 	/* 8bit chars */
 	if ((input[i] & 0x80) != 0)
@@ -21,7 +22,7 @@
 		return TRUE;
 
 	/* <LWSP>=? */
-	if (input[i] == '=' && input[i+1] == '?' &&
+	if (input[i] == '=' && i+1 < len && input[i+1] == '?' &&
 	    (i == 0 || IS_LWSP(input[i-1])))
 		return TRUE;
 	return FALSE;
@@ -130,21 +131,26 @@
 	}
 }
 
-void message_header_encode(const char *_input, string_t *output)
+void message_header_encode(const char *input, string_t *output)
 {
-	const unsigned char *input = (const unsigned char *)_input;
+	message_header_encode_data((const void *)input, strlen(input), output);
+}
+
+void message_header_encode_data(const unsigned char *input, unsigned int len,
+				string_t *output)
+{
 	unsigned int i, first_idx, last_idx;
 	unsigned int enc_chars, enc_len, base64_len, q_len;
 	bool use_q;
 
 	/* find the first word that needs encoding */
-	for (i = 0; input[i] != '\0'; i++) {
-		if (input_idx_need_encoding(input, i))
+	for (i = 0; i < len; i++) {
+		if (input_idx_need_encoding(input, i, len))
 			break;
 	}
-	if (input[i] == '\0') {
+	if (i == len) {
 		/* no encoding necessary */
-		str_append(output, _input);
+		str_append_data(output, input, len);
 		return;
 	}
 	first_idx = i;
@@ -153,13 +159,13 @@
 
 	/* find the last word that needs encoding */
 	last_idx = ++i; enc_chars = 1;
-	for (; input[i] != '\0'; i++) {
-		if (input_idx_need_encoding(input, i)) {
+	for (; i < len; i++) {
+		if (input_idx_need_encoding(input, i, len)) {
 			last_idx = i + 1;
 			enc_chars++;
 		}
 	}
-	while (input[last_idx] != '\0' && !IS_LWSP(input[last_idx]))
+	while (last_idx < len && !IS_LWSP(input[last_idx]))
 		last_idx++;
 
 	/* figure out if we should use Q or B encoding. Prefer Q if it's not
@@ -170,10 +176,10 @@
 	use_q = q_len*2/3 <= base64_len;
 
 	/* and do it */
-	str_append_n(output, input, first_idx);
+	str_append_data(output, input, first_idx);
 	if (use_q)
 		message_header_encode_q(input + first_idx, enc_len, output);
 	else
 		message_header_encode_b(input + first_idx, enc_len, output);
-	str_append(output, _input + last_idx);
+	str_append_data(output, input + last_idx, len - last_idx);
 }
diff -r 841d31e6c647 -r cc622a9a2f3c src/lib-mail/message-header-encode.h
--- a/src/lib-mail/message-header-encode.h	Mon May 05 15:05:20 2014 +0300
+++ b/src/lib-mail/message-header-encode.h	Mon May 05 15:39:58 2014 +0300
@@ -3,6 +3,8 @@
 
 /* Encode UTF-8 input into output wherever necessary. */
 void message_header_encode(const char *input, string_t *output);
+void message_header_encode_data(const unsigned char *input, unsigned int len,
+				string_t *output);
 
 /* Encode the whole UTF-8 input using "Q" or "B" encoding into output.
    The output is split into multiple lines if necessary. The first line length
diff -r 841d31e6c647 -r cc622a9a2f3c src/lib-mail/test-message-header-decode.c
--- a/src/lib-mail/test-message-header-decode.c	Mon May 05 15:05:20 2014 +0300
+++ b/src/lib-mail/test-message-header-decode.c	Mon May 05 15:39:58 2014 +0300
@@ -4,9 +4,12 @@
 #include "buffer.h"
 #include "str.h"
 #include "charset-utf8.h"
+#include "message-header-encode.h"
 #include "message-header-decode.h"
 #include "test-common.h"
 
+#include <stdlib.h>
+
 bool charset_is_utf8(const char *charset ATTR_UNUSED) { return TRUE; }
 
 int charset_to_utf8_begin(const char *charset ATTR_UNUSED,
@@ -25,6 +28,7 @@
 static void test_message_header_decode(void)
 {
 	static const char *data[] = {
+		" \t=?utf-8?q?=c3=a4?=  =?utf-8?q?=c3=a4?=  b  \t\r\n ", "ää  b  \t\r\n ",
 		"a =?utf-8?q?=c3=a4?= b", "a ä b",
 		"a =?utf-8?q?=c3=a4?= b", "a ä b",
 		"a =?utf-8?q?=c3=a4?=\t\t\r\n =?utf-8?q?=c3=a4?= b", "a ää b",
@@ -47,10 +51,40 @@
 	test_end();
 }
 
+static void test_message_header_decode_encode_random(void)
+{
+	string_t *encoded, *decoded;
+	unsigned char buf[1024];
+	unsigned int i, j, buflen;
+
+	test_begin("message header encode & decode randomly");
+
+	buf[0] = 'x';
+	encoded = t_str_new(256);
+	decoded = t_str_new(256);
+	for (i = 0; i < 1000; i++) {
+		/* fill only with 7bit data so we don't have to worry about
+		   the data being valid UTF-8 */
+		for (j = 1; j < sizeof(buf); j++)
+			buf[j] = rand() % 128;
+		buflen = rand() % sizeof(buf);
+
+		str_truncate(encoded, 0);
+		str_truncate(decoded, 0);
+		message_header_encode_data(buf, buflen, encoded);
+		message_header_decode_utf8(encoded->data, encoded->used,
+					   decoded, NULL);
+		test_assert(decoded->used == buflen &&
+			    memcmp(decoded->data, buf, buflen) == 0);
+	}
+	test_end();
+}
+
 int main(void)
 {
 	static void (*test_functions[])(void) = {
 		test_message_header_decode,
+		test_message_header_decode_encode_random,
 		NULL
 	};
 	return test_run(test_functions);
diff -r 841d31e6c647 -r cc622a9a2f3c src/lib-mail/test-message-header-encode.c
--- a/src/lib-mail/test-message-header-encode.c	Mon May 05 15:05:20 2014 +0300
+++ b/src/lib-mail/test-message-header-encode.c	Mon May 05 15:39:58 2014 +0300
@@ -183,12 +183,28 @@
 	test_end();
 }
 
+static void test_message_header_encode_data(void)
+{
+	string_t *str = t_str_new(128);
+	static unsigned char nuls[10] = { 0, };
+
+	test_begin("message header encode data");
+	message_header_encode_data(nuls, 1, str);
+	test_assert(strcmp(str_c(str), "=?utf-8?q?=00?=") == 0);
+
+	str_truncate(str, 0);
+	message_header_encode_data(nuls, sizeof(nuls), str);
+	test_assert(strcmp(str_c(str), "=?utf-8?b?AAAAAAAAAAAAAA==?=") == 0);
+	test_end();
+}
+
 int main(void)
 {
 	static void (*test_functions[])(void) = {
 		test_message_header_encode_q,
 		test_message_header_encode_b,
 		test_message_header_encode,
+		test_message_header_encode_data,
 		NULL
 	};
 	return test_run(test_functions);