dovecot-2.0-sslstream: Added message header (RFC 2047) encoder.
dovecot at dovecot.org
dovecot at dovecot.org
Sat Feb 13 02:55:47 EET 2010
details: http://hg.dovecot.org/dovecot-2.0-sslstream/rev/104edcb89a70
changeset: 10212:104edcb89a70
user: Timo Sirainen <tss at iki.fi>
date: Tue Oct 27 22:44:39 2009 -0400
description:
Added message header (RFC 2047) encoder.
diffstat:
4 files changed, 388 insertions(+)
src/lib-mail/Makefile.am | 7 +
src/lib-mail/message-header-encode.c | 173 +++++++++++++++++++++++++
src/lib-mail/message-header-encode.h | 15 ++
src/lib-mail/test-message-header-encode.c | 193 +++++++++++++++++++++++++++++
diffs (truncated from 431 to 300 lines):
diff -r 66eaf7b1e36b -r 104edcb89a70 src/lib-mail/Makefile.am
--- a/src/lib-mail/Makefile.am Tue Oct 27 17:35:16 2009 -0400
+++ b/src/lib-mail/Makefile.am Tue Oct 27 22:44:39 2009 -0400
@@ -13,6 +13,7 @@ libmail_la_SOURCES = \
message-date.c \
message-decoder.c \
message-header-decode.c \
+ message-header-encode.c \
message-header-parser.c \
message-id.c \
message-parser.c \
@@ -33,6 +34,7 @@ headers = \
message-date.h \
message-decoder.h \
message-header-decode.h \
+ message-header-encode.h \
message-header-parser.h \
message-id.h \
message-parser.h \
@@ -59,6 +61,7 @@ test_programs = \
test-message-date \
test-message-decoder \
test-message-header-decode \
+ test-message-header-encode \
test-message-header-parser \
test-message-id \
test-message-parser \
@@ -99,6 +102,10 @@ test_message_header_decode_LDADD = messa
test_message_header_decode_LDADD = message-header-decode.lo quoted-printable.lo $(test_libs)
test_message_header_decode_DEPENDENCIES = message-header-decode.lo quoted-printable.lo $(test_libs)
+test_message_header_encode_SOURCES = test-message-header-encode.c
+test_message_header_encode_LDADD = message-header-encode.lo $(test_libs)
+test_message_header_encode_DEPENDENCIES = message-header-encode.lo $(test_libs)
+
test_message_header_parser_SOURCES = test-message-header-parser.c
test_message_header_parser_LDADD = message-header-parser.lo $(test_libs)
test_message_header_parser_DEPENDENCIES = message-header-parser.lo $(test_libs)
diff -r 66eaf7b1e36b -r 104edcb89a70 src/lib-mail/message-header-encode.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-header-encode.c Tue Oct 27 22:44:39 2009 -0400
@@ -0,0 +1,173 @@
+/* Copyright (c) 2009 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "base64.h"
+#include "message-header-encode.h"
+
+#define MIME_WRAPPER_LEN (strlen("=?utf-8?q?""?="))
+#define MIME_MAX_LINE_LEN 76
+
+#define IS_LWSP(c) \
+ ((c) == ' ' || (c) == '\t' || (c) == '\n')
+
+static bool input_idx_need_encoding(const unsigned char *input, unsigned int i)
+{
+ if ((input[i] & 0x80) != 0)
+ return TRUE;
+
+ if (input[i] == '=' && input[i+1] == '?' &&
+ (i == 0 || IS_LWSP(input[i-1])))
+ return TRUE;
+ return FALSE;
+}
+
+static unsigned int str_last_line_len(string_t *str)
+{
+ const unsigned char *data = str_data(str);
+ unsigned int i = str_len(str);
+
+ while (i > 0 && data[i-1] != '\n')
+ i--;
+ return str_len(str) - i;
+}
+
+void message_header_encode_q(const unsigned char *input, unsigned int len,
+ string_t *output)
+{
+ unsigned int i, line_len, line_len_left;
+
+ line_len = str_last_line_len(output);
+ if (line_len >= MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - 3) {
+ str_append(output, "\n\t");
+ line_len = 1;
+ }
+
+ str_append(output, "=?utf-8?q?");
+ line_len_left = MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - line_len;
+ for (i = 0; i < len; i++) {
+ if (line_len_left < 3) {
+ /* if we're not at the beginning of a character,
+ go backwards until we are */
+ while ((input[i] & 0xc0) == 0x80) {
+ str_truncate(output, str_len(output)-3);
+ i--;
+ }
+ str_append(output, "?=\n\t=?utf-8?q?");
+ line_len_left = MIME_MAX_LINE_LEN -
+ MIME_WRAPPER_LEN - 1;
+ }
+ switch (input[i]) {
+ case ' ':
+ str_append_c(output, '_');
+ break;
+ case '=':
+ case '?':
+ case '_':
+ str_printfa(output, "=%2X", input[i]);
+ break;
+ default:
+ if (input[i] < 32 || (input[i] & 0x80) != 0) {
+ line_len_left -= 2;
+ str_printfa(output, "=%2X", input[i]);
+ } else {
+ str_append_c(output, input[i]);
+ }
+ break;
+ }
+ line_len_left--;
+ }
+ str_append(output, "?=");
+}
+
+void message_header_encode_b(const unsigned char *input, unsigned int len,
+ string_t *output)
+{
+ unsigned int line_len, line_len_left, max;
+
+ line_len = str_last_line_len(output);
+ if (line_len >= MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN) {
+ str_append(output, "\n\t");
+ line_len = 1;
+ }
+
+ for (;;) {
+ line_len_left = MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - line_len;
+ max = MAX_BASE64_DECODED_SIZE(line_len_left);
+ do {
+ max--;
+ if (max > len)
+ max = len;
+ else {
+ /* all of it doesn't fit. find a character where we
+ can split it from. */
+ while (max > 0 && (input[max] & 0xc0) == 0x80)
+ max--;
+ }
+ } while (MAX_BASE64_ENCODED_SIZE(max) > line_len_left &&
+ max > 0);
+
+ if (max > 0) {
+ str_append(output, "=?utf-8?b?");
+ base64_encode(input, max, output);
+ str_append(output, "?=");
+ }
+
+ input += max;
+ len -= max;
+
+ if (len == 0)
+ break;
+
+ str_append(output, "\n\t");
+ line_len = 1;
+ }
+}
+
+void message_header_encode(const char *_input, string_t *output)
+{
+ const unsigned char *input = (const unsigned char *)_input;
+ unsigned int i, first_idx, last_idx;
+ unsigned int enc_chars, enc_len, base64_len, q_len;
+ bool use_q;
+
+ /* find the first word that needs encoding */
+ for (i = 0; input[i] != '\0'; i++) {
+ if (input_idx_need_encoding(input, i))
+ break;
+ }
+ if (input[i] == '\0') {
+ /* no encoding necessary */
+ str_append(output, _input);
+ return;
+ }
+ first_idx = i;
+ while (first_idx > 0 && !IS_LWSP(input[first_idx-1]))
+ first_idx--;
+
+ /* find the last word that needs encoding */
+ last_idx = ++i; enc_chars = 1;
+ for (; input[i] != '\0'; i++) {
+ if (input_idx_need_encoding(input, i)) {
+ last_idx = i + 1;
+ enc_chars++;
+ }
+ }
+ while (input[last_idx] != '\0' && !IS_LWSP(input[last_idx]))
+ last_idx++;
+
+ /* figure out if we should use Q or B encoding. Prefer Q if it's not
+ too much larger. */
+ enc_len = last_idx - first_idx;
+ base64_len = MAX_BASE64_ENCODED_SIZE(enc_len);
+ q_len = enc_len + enc_chars*3;
+ use_q = q_len*2/3 <= base64_len;
+
+ /* and do it */
+ str_append_n(output, input, first_idx);
+ if (use_q)
+ message_header_encode_q(input + first_idx, enc_len, output);
+ else
+ message_header_encode_b(input + first_idx, enc_len, output);
+ str_append(output, _input + last_idx);
+}
diff -r 66eaf7b1e36b -r 104edcb89a70 src/lib-mail/message-header-encode.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-header-encode.h Tue Oct 27 22:44:39 2009 -0400
@@ -0,0 +1,15 @@
+#ifndef MESSAGE_HEADER_ENCODE_H
+#define MESSAGE_HEADER_ENCODE_H
+
+/* Encode UTF-8 input into output wherever necessary. */
+void message_header_encode(const char *input, string_t *output);
+
+/* Encode the whole UTF-8 input using "Q" or "B" encoding into output.
+ The output is split into multiple lines if necessary. The first line length
+ is looked up from the output string. */
+void message_header_encode_q(const unsigned char *input, unsigned int len,
+ string_t *output);
+void message_header_encode_b(const unsigned char *input, unsigned int len,
+ string_t *output);
+
+#endif
diff -r 66eaf7b1e36b -r 104edcb89a70 src/lib-mail/test-message-header-encode.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/test-message-header-encode.c Tue Oct 27 22:44:39 2009 -0400
@@ -0,0 +1,193 @@
+/* Copyright (c) 2009 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "base64.h"
+#include "buffer.h"
+#include "str.h"
+#include "message-header-encode.h"
+#include "test-common.h"
+
+static bool verify_q(const char *str, unsigned int i, bool starts_with_a)
+{
+ unsigned int line_start = i, char_count = 0;
+
+ if (strncmp(str+i, "\n\t", 2) == 0) {
+ i += 2;
+ line_start = i - 1;
+ }
+
+ for (;;) {
+ if (strncmp(str+i, "=?utf-8?q?", 10) != 0)
+ return FALSE;
+ i += 10;
+
+ if (starts_with_a) {
+ if (str[i] != 'a')
+ return FALSE;
+ starts_with_a = FALSE;
+ i++;
+ }
+ while (strncmp(str+i, "?=", 2) != 0) {
+ if (strncmp(str+i, "=C3=A4", 6) != 0)
+ return FALSE;
+ i += 6;
+ char_count++;
+ }
+ i += 2;
+ if (i - line_start > 76)
+ return FALSE;
+
+ if (str[i] == '\0')
+ break;
+ if (strncmp(str+i, "\n\t", 2) != 0)
+ return FALSE;
+ i += 2;
+ line_start = i - 1;
+ }
+ return char_count == 40;
+}
+
+static void test_message_header_encode_q(void)
+{
+ string_t *input = t_str_new(100);
+ string_t *str = t_str_new(512);
+ unsigned int i, j, skip;
+
+ test_begin("message header encode q");
+
+ str_append_c(input, 'a');
+ for (i = 0; i < 40; i++)
+ str_append(input, "ä");
+ for (i = 0; i < 80; i++) {
+ for (skip = 0; skip < 2; skip++) {
More information about the dovecot-cvs
mailing list