dovecot-2.0-sslstream: Added message header (RFC 2047) encoder.

Sat Feb 13 02:55:47 EET 2010

details:   http://hg.dovecot.org/dovecot-2.0-sslstream/rev/104edcb89a70
changeset: 10212:104edcb89a70
user:      Timo Sirainen <tss at iki.fi>
date:      Tue Oct 27 22:44:39 2009 -0400
description:
Added message header (RFC 2047) encoder.

diffstat:

4 files changed, 388 insertions(+)
src/lib-mail/Makefile.am                  |    7 +
src/lib-mail/message-header-encode.c      |  173 +++++++++++++++++++++++++
src/lib-mail/message-header-encode.h      |   15 ++
src/lib-mail/test-message-header-encode.c |  193 +++++++++++++++++++++++++++++

diffs (truncated from 431 to 300 lines):

diff -r 66eaf7b1e36b -r 104edcb89a70 src/lib-mail/Makefile.am

--- a/src/lib-mail/Makefile.am	Tue Oct 27 17:35:16 2009 -0400
+++ b/src/lib-mail/Makefile.am	Tue Oct 27 22:44:39 2009 -0400
@@ -13,6 +13,7 @@ libmail_la_SOURCES = \
 	message-date.c \
 	message-decoder.c \
 	message-header-decode.c \
+	message-header-encode.c \
 	message-header-parser.c \
 	message-id.c \
 	message-parser.c \
@@ -33,6 +34,7 @@ headers = \
 	message-date.h \
 	message-decoder.h \
 	message-header-decode.h \
+	message-header-encode.h \
 	message-header-parser.h \
 	message-id.h \
 	message-parser.h \
@@ -59,6 +61,7 @@ test_programs = \
 	test-message-date \
 	test-message-decoder \
 	test-message-header-decode \
+	test-message-header-encode \
 	test-message-header-parser \
 	test-message-id \
 	test-message-parser \
@@ -99,6 +102,10 @@ test_message_header_decode_LDADD = messa
 test_message_header_decode_LDADD = message-header-decode.lo quoted-printable.lo $(test_libs)
 test_message_header_decode_DEPENDENCIES = message-header-decode.lo quoted-printable.lo $(test_libs)
 
+test_message_header_encode_SOURCES = test-message-header-encode.c
+test_message_header_encode_LDADD = message-header-encode.lo $(test_libs)
+test_message_header_encode_DEPENDENCIES = message-header-encode.lo $(test_libs)
+
 test_message_header_parser_SOURCES = test-message-header-parser.c
 test_message_header_parser_LDADD = message-header-parser.lo $(test_libs)
 test_message_header_parser_DEPENDENCIES = message-header-parser.lo $(test_libs)
diff -r 66eaf7b1e36b -r 104edcb89a70 src/lib-mail/message-header-encode.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-header-encode.c	Tue Oct 27 22:44:39 2009 -0400
@@ -0,0 +1,173 @@
+/* Copyright (c) 2009 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "base64.h"
+#include "message-header-encode.h"
+
+#define MIME_WRAPPER_LEN (strlen("=?utf-8?q?""?="))
+#define MIME_MAX_LINE_LEN 76
+
+#define IS_LWSP(c) \
+	((c) == ' ' || (c) == '\t' || (c) == '\n')
+
+static bool input_idx_need_encoding(const unsigned char *input, unsigned int i)
+{
+	if ((input[i] & 0x80) != 0)
+		return TRUE;
+
+	if (input[i] == '=' && input[i+1] == '?' &&
+	    (i == 0 || IS_LWSP(input[i-1])))
+		return TRUE;
+	return FALSE;
+}
+
+static unsigned int str_last_line_len(string_t *str)
+{
+	const unsigned char *data = str_data(str);
+	unsigned int i = str_len(str);
+
+	while (i > 0 && data[i-1] != '\n')
+		i--;
+	return str_len(str) - i;
+}
+
+void message_header_encode_q(const unsigned char *input, unsigned int len,
+			     string_t *output)
+{
+	unsigned int i, line_len, line_len_left;
+
+	line_len = str_last_line_len(output);
+	if (line_len >= MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - 3) {
+		str_append(output, "\n\t");
+		line_len = 1;
+	}
+
+	str_append(output, "=?utf-8?q?");
+	line_len_left = MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - line_len;
+	for (i = 0; i < len; i++) {
+		if (line_len_left < 3) {
+			/* if we're not at the beginning of a character,
+			   go backwards until we are */
+			while ((input[i] & 0xc0) == 0x80) {
+				str_truncate(output, str_len(output)-3);
+				i--;
+			}
+			str_append(output, "?=\n\t=?utf-8?q?");
+			line_len_left = MIME_MAX_LINE_LEN -
+				MIME_WRAPPER_LEN - 1;
+		}
+		switch (input[i]) {
+		case ' ':
+			str_append_c(output, '_');
+			break;
+		case '=':
+		case '?':
+		case '_':
+			str_printfa(output, "=%2X", input[i]);
+			break;
+		default:
+			if (input[i] < 32 || (input[i] & 0x80) != 0) {
+				line_len_left -= 2;
+				str_printfa(output, "=%2X", input[i]);
+			} else {
+				str_append_c(output, input[i]);
+			}
+			break;
+		}
+		line_len_left--;
+	}
+	str_append(output, "?=");
+}
+
+void message_header_encode_b(const unsigned char *input, unsigned int len,
+			     string_t *output)
+{
+	unsigned int line_len, line_len_left, max;
+
+	line_len = str_last_line_len(output);
+	if (line_len >= MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN) {
+		str_append(output, "\n\t");
+		line_len = 1;
+	}
+
+	for (;;) {
+		line_len_left = MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - line_len;
+		max = MAX_BASE64_DECODED_SIZE(line_len_left);
+		do {
+			max--;
+			if (max > len)
+				max = len;
+			else {
+				/* all of it doesn't fit. find a character where we
+				   can split it from. */
+				while (max > 0 && (input[max] & 0xc0) == 0x80)
+					max--;
+			}
+		} while (MAX_BASE64_ENCODED_SIZE(max) > line_len_left &&
+			 max > 0);
+
+		if (max > 0) {
+			str_append(output, "=?utf-8?b?");
+			base64_encode(input, max, output);
+			str_append(output, "?=");
+		}
+
+		input += max;
+		len -= max;
+
+		if (len == 0)
+			break;
+
+		str_append(output, "\n\t");
+		line_len = 1;
+	}
+}
+
+void message_header_encode(const char *_input, string_t *output)
+{
+	const unsigned char *input = (const unsigned char *)_input;
+	unsigned int i, first_idx, last_idx;
+	unsigned int enc_chars, enc_len, base64_len, q_len;
+	bool use_q;
+
+	/* find the first word that needs encoding */
+	for (i = 0; input[i] != '\0'; i++) {
+		if (input_idx_need_encoding(input, i))
+			break;
+	}
+	if (input[i] == '\0') {
+		/* no encoding necessary */
+		str_append(output, _input);
+		return;
+	}
+	first_idx = i;
+	while (first_idx > 0 && !IS_LWSP(input[first_idx-1]))
+		first_idx--;
+
+	/* find the last word that needs encoding */
+	last_idx = ++i; enc_chars = 1;
+	for (; input[i] != '\0'; i++) {
+		if (input_idx_need_encoding(input, i)) {
+			last_idx = i + 1;
+			enc_chars++;
+		}
+	}
+	while (input[last_idx] != '\0' && !IS_LWSP(input[last_idx]))
+		last_idx++;
+
+	/* figure out if we should use Q or B encoding. Prefer Q if it's not
+	   too much larger. */
+	enc_len = last_idx - first_idx;
+	base64_len = MAX_BASE64_ENCODED_SIZE(enc_len);
+	q_len = enc_len + enc_chars*3;
+	use_q = q_len*2/3 <= base64_len;
+
+	/* and do it */
+	str_append_n(output, input, first_idx);
+	if (use_q)
+		message_header_encode_q(input + first_idx, enc_len, output);
+	else
+		message_header_encode_b(input + first_idx, enc_len, output);
+	str_append(output, _input + last_idx);
+}
diff -r 66eaf7b1e36b -r 104edcb89a70 src/lib-mail/message-header-encode.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-header-encode.h	Tue Oct 27 22:44:39 2009 -0400
@@ -0,0 +1,15 @@
+#ifndef MESSAGE_HEADER_ENCODE_H
+#define MESSAGE_HEADER_ENCODE_H
+
+/* Encode UTF-8 input into output wherever necessary. */
+void message_header_encode(const char *input, string_t *output);
+
+/* Encode the whole UTF-8 input using "Q" or "B" encoding into output.
+   The output is split into multiple lines if necessary. The first line length
+   is looked up from the output string. */
+void message_header_encode_q(const unsigned char *input, unsigned int len,
+			     string_t *output);
+void message_header_encode_b(const unsigned char *input, unsigned int len,
+			     string_t *output);
+
+#endif
diff -r 66eaf7b1e36b -r 104edcb89a70 src/lib-mail/test-message-header-encode.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/test-message-header-encode.c	Tue Oct 27 22:44:39 2009 -0400
@@ -0,0 +1,193 @@
+/* Copyright (c) 2009 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "base64.h"
+#include "buffer.h"
+#include "str.h"
+#include "message-header-encode.h"
+#include "test-common.h"
+
+static bool verify_q(const char *str, unsigned int i, bool starts_with_a)
+{
+	unsigned int line_start = i, char_count = 0;
+
+	if (strncmp(str+i, "\n\t", 2) == 0) {
+		i += 2;
+		line_start = i - 1;
+	}
+
+	for (;;) {
+		if (strncmp(str+i, "=?utf-8?q?", 10) != 0)
+			return FALSE;
+		i += 10;
+
+		if (starts_with_a) {
+			if (str[i] != 'a')
+				return FALSE;
+			starts_with_a = FALSE;
+			i++;
+		}
+		while (strncmp(str+i, "?=", 2) != 0) {
+			if (strncmp(str+i, "=C3=A4", 6) != 0)
+				return FALSE;
+			i += 6;
+			char_count++;
+		}
+		i += 2;
+		if (i - line_start > 76)
+			return FALSE;
+
+		if (str[i] == '\0')
+			break;
+		if (strncmp(str+i, "\n\t", 2) != 0)
+			return FALSE;
+		i += 2;
+		line_start = i - 1;
+	}
+	return char_count == 40;
+}
+
+static void test_message_header_encode_q(void)
+{
+	string_t *input = t_str_new(100);
+	string_t *str = t_str_new(512);
+	unsigned int i, j, skip;
+
+	test_begin("message header encode q");
+
+	str_append_c(input, 'a');
+	for (i = 0; i < 40; i++)
+		str_append(input, "Ã¤");
+	for (i = 0; i < 80; i++) {
+		for (skip = 0; skip < 2; skip++) {