[dovecot-cvs] dovecot/src/lib-mail message-header-decode.c,NONE,1.1 message-header-decode.h,NONE,1.1 Makefile.am,1.5,1.6 message-header-search.c,1.6,1.7 quoted-printable.c,1.2,1.3
cras at procontrol.fi
cras at procontrol.fi
Mon Dec 16 05:26:57 EET 2002
Update of /home/cvs/dovecot/src/lib-mail
In directory danu:/tmp/cvs-serv19656/lib-mail
Modified Files:
Makefile.am message-header-search.c quoted-printable.c
Added Files:
message-header-decode.c message-header-decode.h
Log Message:
Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
separate function.
--- NEW FILE: message-header-decode.c ---
/* Copyright (C) 2002 Timo Sirainen */
#include "lib.h"
#include "base64.h"
#include "buffer.h"
#include "quoted-printable.h"
#include "message-header-decode.h"
static int split_encoded(const unsigned char *data, size_t *size_p,
const char **charset, const char **encoding,
const unsigned char **text, size_t *text_size_r)
{
size_t size, pos, textpos;
size = *size_p;
/* get charset */
for (pos = 0; pos < size && data[pos] != '?'; pos++) ;
if (data[pos] != '?') return FALSE;
*charset = t_strndup(data, pos);
/* get encoding */
pos++;
if (pos+2 >= size || data[pos+1] != '?')
return FALSE;
if (data[pos] == 'Q' || data[pos] == 'q')
*encoding = "Q";
else if (data[pos] == 'B' || data[pos] == 'b')
*encoding = "B";
else
return FALSE;
/* get text */
pos += 2;
textpos = pos;
while (pos < size && data[pos] != '?') pos++;
if (data[pos] != '?' || pos+1 >= size || data[pos+1] != '=')
return FALSE;
*text = data + textpos;
*text_size_r = pos - textpos;
*size_p = pos+2;
return TRUE;
}
static int
message_header_decode_encoded(const unsigned char *data, size_t *size,
MessageHeaderDecodeFunc func, void *context)
{
const unsigned char *text;
const char *charset, *encoding;
Buffer *decodebuf;
size_t text_size;
int ret;
t_push();
/* first split the string charset?encoding?text?= */
if (!split_encoded(data, size, &charset, &encoding,
&text, &text_size)) {
t_pop();
return TRUE;
}
decodebuf = buffer_create_static_hard(data_stack_pool, text_size);
if (*encoding == 'Q')
quoted_printable_decode(text, text_size, NULL, decodebuf);
else {
if (base64_decode(text, text_size, NULL, decodebuf) < 0) {
/* corrupted encoding */
t_pop();
return TRUE;
}
}
ret = func(buffer_get_data(decodebuf, NULL),
buffer_get_used_size(decodebuf), charset, context);
t_pop();
return ret;
}
void message_header_decode(const unsigned char *data, size_t size,
MessageHeaderDecodeFunc func, void *context)
{
size_t pos, start_pos, subsize;
start_pos = pos = 0;
while (pos < size) {
if (data[pos] == '=' && pos+1 < size && data[pos+1] == '?') {
/* encoded string beginning */
if (pos != start_pos) {
/* send the unencoded data so far */
if (!func(data + start_pos, pos - start_pos,
NULL, context))
return;
}
pos += 2;
subsize = size - pos;
if (!message_header_decode_encoded(data + pos, &subsize,
func, context))
return;
pos += subsize;
start_pos = pos;
} else {
pos++;
}
}
(void)func(data + start_pos, size - start_pos, NULL, context);
}
--- NEW FILE: message-header-decode.h ---
#ifndef __MESSAGE_HEADER_DECODE_H
#define __MESSAGE_HEADER_DECODE_H
/* Return FALSE if you wish to stop decoding. charset is NULL when it's not
RFC2047-encoded. */
typedef int (*MessageHeaderDecodeFunc)(const unsigned char *data, size_t size,
const char *charset, void *context);
/* Decode RFC2047 encoded words. Call specified function for each
decoded block. */
void message_header_decode(const unsigned char *data, size_t size,
MessageHeaderDecodeFunc func, void *context);
#endif
Index: Makefile.am
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/Makefile.am,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- Makefile.am 13 Nov 2002 11:08:18 -0000 1.5
+++ Makefile.am 16 Dec 2002 03:26:55 -0000 1.6
@@ -7,6 +7,7 @@
libmail_a_SOURCES = \
message-body-search.c \
message-content-parser.c \
+ message-header-decode.c \
message-header-search.c \
message-parser.c \
message-part-serialize.c \
@@ -20,6 +21,7 @@
noinst_HEADERS = \
message-body-search.h \
message-content-parser.h \
+ message-header-decode.h \
message-header-search.h \
message-parser.h \
message-part-serialize.h \
Index: message-header-search.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-header-search.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- message-header-search.c 8 Dec 2002 05:23:08 -0000 1.6
+++ message-header-search.c 16 Dec 2002 03:26:55 -0000 1.7
@@ -6,6 +6,7 @@
#include "charset-utf8.h"
#include "rfc822-tokenize.h"
#include "quoted-printable.h"
+#include "message-header-decode.h"
#include "message-header-search.h"
#include <ctype.h>
@@ -27,6 +28,9 @@
unsigned int unknown_charset:1;
};
+static void search_loop(const unsigned char *data, size_t size,
+ HeaderSearchContext *ctx);
+
HeaderSearchContext *
message_header_search_init(Pool pool, const char *key, const char *charset,
int *unknown_charset)
@@ -79,12 +83,12 @@
p_free(pool, ctx);
}
-static int match_data(const Buffer *buffer, const char *charset,
- HeaderSearchContext *ctx)
+static void search_with_charset(const unsigned char *data, size_t size,
+ const char *charset, HeaderSearchContext *ctx)
{
+ Buffer *buf;
const char *utf8_data;
- size_t size;
- int ret;
+ size_t utf8_size;
if (ctx->unknown_charset) {
/* we don't know the source charset, so assume we want to
@@ -96,127 +100,30 @@
charset = ctx->key_charset;
}
- utf8_data = charset_to_ucase_utf8_string(charset, NULL, buffer, &size);
+ buf = buffer_create_const_data(data_stack_pool, data, size);
+ utf8_data = charset_to_ucase_utf8_string(charset, NULL,
+ buf, &utf8_size);
+
if (utf8_data == NULL) {
/* unknown character set, or invalid data */
- return FALSE;
- }
-
- ctx->submatch = TRUE;
- ret = message_header_search(utf8_data, size, ctx);
- ctx->submatch = FALSE;
-
- return ret;
-}
-
-static int split_encoded(Buffer *buffer, size_t *last_pos,
- const char **charset, const char **encoding)
-{
- const char *p;
- size_t size, pos, textpos;
-
- p = buffer_get_data(buffer, &size);
-
- /* get charset */
- for (pos = 0; pos < size && p[pos] != '?'; pos++) ;
- if (p[pos] != '?') return FALSE;
- *charset = t_strndup(p, pos);
-
- /* get encoding */
- pos++;
- if (pos+2 >= size || p[pos+1] != '?')
- return FALSE;
-
- if (p[pos] == 'Q' || p[pos] == 'q')
- *encoding = "Q";
- else if (p[pos] == 'B' || p[pos] == 'b')
- *encoding = "B";
- else
- return FALSE;
-
- /* get text */
- pos += 2;
- textpos = pos;
- for (; pos < size && p[pos] != '?'; pos++) ;
- if (p[pos] != '?' || pos+1 >= size || p[pos+1] != '=') return FALSE;
-
- buffer_set_limit(buffer, pos);
- buffer_set_start_pos(buffer, textpos + buffer_get_start_pos(buffer));
- *last_pos = pos+1;
-
- return TRUE;
-}
-
-static int match_encoded(Buffer *buffer, size_t *last_pos,
- HeaderSearchContext *ctx)
-{
- const char *charset, *encoding, *text;
- Buffer *decodebuf;
- size_t textsize;
-
- /* first split the string charset?encoding?text?= */
- if (!split_encoded(buffer, last_pos, &charset, &encoding)) {
- ctx->match_count = 0;
- return FALSE;
- }
-
- /* buffer is now limited to only the text portion */
- text = buffer_get_data(buffer, &textsize);
- decodebuf = buffer_create_static_hard(data_stack_pool, textsize);
-
- if (*encoding == 'Q')
- quoted_printable_decode(text, textsize, NULL, decodebuf);
- else {
- if (base64_decode(text, textsize, NULL, decodebuf) < 0) {
- /* corrupted encoding */
- ctx->match_count = 0;
- return FALSE;
- }
+ } else {
+ ctx->submatch = TRUE;
+ search_loop(utf8_data, utf8_size, ctx);
+ ctx->submatch = FALSE;
}
-
- return match_data(decodebuf, charset, ctx);
}
-int message_header_search(const unsigned char *header_block, size_t size,
- HeaderSearchContext *ctx)
+static void search_loop(const unsigned char *data, size_t size,
+ HeaderSearchContext *ctx)
{
- Buffer *buf;
+ size_t pos;
ssize_t i;
- size_t pos, subpos;
unsigned char chr;
- int last_newline, ret;
-
- if (ctx->found)
- return TRUE;
-
- t_push();
- buf = buffer_create_const_data(data_stack_pool, header_block, size);
+ int last_newline;
last_newline = ctx->last_newline;
for (pos = 0; pos < size; pos++) {
- chr = header_block[pos];
-
- if (chr == '=' && pos+1 < size &&
- header_block[pos+1] == '?' && !ctx->submatch) {
- /* encoded string. read it. */
- buffer_set_start_pos(buf, pos+2);
-
- t_push();
- ret = match_encoded(buf, &subpos, ctx);
- t_pop();
-
- if (ret) {
- ctx->found = TRUE;
- break;
- }
-
- buffer_set_start_pos(buf, 0);
- buffer_set_limit(buf, (size_t)-1);
-
- pos += subpos - 1;
- last_newline = FALSE;
- continue;
- }
+ chr = data[pos];
if (!ctx->submatch) {
if ((chr & 0x80) == 0)
@@ -225,10 +132,8 @@
/* we have non-ascii in header and key contains
non-ascii characters. treat the rest of the
header as encoded with the key's charset */
- t_push();
- ctx->found = match_data(buf, ctx->key_charset,
- ctx);
- t_pop();
+ search_with_charset(data + pos, size - pos,
+ ctx->key_charset, ctx);
break;
}
}
@@ -250,8 +155,7 @@
if (++ctx->matches[i] == ctx->key_len) {
/* full match */
ctx->found = TRUE;
- t_pop();
- return TRUE;
+ return;
}
} else {
/* non-match */
@@ -274,9 +178,32 @@
ctx->matches[ctx->match_count++] = 1;
}
}
- t_pop();
ctx->last_newline = last_newline;
+}
+
+static int search_block(const unsigned char *data, size_t size,
+ const char *charset, void *context)
+{
+ HeaderSearchContext *ctx = context;
+
+ t_push();
+ if (charset != NULL) {
+ /* need to convert to UTF-8 */
+ search_with_charset(data, size, charset, ctx);
+ } else {
+ search_loop(data, size, ctx);
+ }
+
+ t_pop();
+ return !ctx->found;
+}
+
+int message_header_search(const unsigned char *header_block, size_t size,
+ HeaderSearchContext *ctx)
+{
+ if (!ctx->found)
+ message_header_decode(header_block, size, search_block, ctx);
return ctx->found;
}
Index: quoted-printable.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/quoted-printable.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- quoted-printable.c 8 Dec 2002 05:23:08 -0000 1.2
+++ quoted-printable.c 16 Dec 2002 03:26:55 -0000 1.3
@@ -18,10 +18,10 @@
if (src[src_pos] != '_' && src[src_pos] != '=')
continue;
- buffer_append(dest, src, src_pos - next);
+ buffer_append(dest, src + next, src_pos - next);
if (src[src_pos] == '_') {
- buffer_append(dest, " ", 1);
+ buffer_append_c(dest, ' ');
next = src_pos+1;
} else {
/* =<hex> */
@@ -40,6 +40,8 @@
}
}
}
+
+ buffer_append(dest, src + next, src_size - next);
if (src_pos_r != NULL)
*src_pos_r = src_pos;
More information about the dovecot-cvs
mailing list