[dovecot-cvs] dovecot/src/lib-mail message-body-search.c,1.10,1.11 message-parser.c,1.36,1.37 message-parser.h,1.18,1.19
cras at procontrol.fi
cras at procontrol.fi
Wed Mar 26 19:29:04 EET 2003
Update of /home/cvs/dovecot/src/lib-mail
In directory danu:/tmp/cvs-serv10853/lib-mail
Modified Files:
message-body-search.c message-parser.c message-parser.h
Log Message:
Better handling for multiline headers. Before we skipped headers larger than
input buffer size (8k with read (default), 256k with mmap). The skipping was
also a bit buggy.
Now we parse the lines one at a time. There's also a way to read the header
fully into memory before parsing it, if really needed.
Index: message-body-search.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-body-search.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -d -r1.10 -r1.11
--- message-body-search.c 20 Jan 2003 14:52:51 -0000 1.10
+++ message-body-search.c 26 Mar 2003 17:29:02 -0000 1.11
@@ -28,7 +28,6 @@
struct part_search_context {
struct body_search_context *body_ctx;
- struct header_search_context *hdr_search_ctx;
struct charset_translation *translation;
buffer_t *decode_buf;
@@ -42,7 +41,6 @@
unsigned int content_unknown:1;
unsigned int content_type_text:1; /* text/any or message/any */
unsigned int ignore_header:1;
- unsigned int found:1;
};
static void parse_content_type(const unsigned char *value, size_t value_len,
@@ -101,47 +99,59 @@
}
}
-static void header_find(struct message_part *part __attr_unused__,
- const unsigned char *name, size_t name_len,
- const unsigned char *value, size_t value_len,
- void *context)
-{
- struct part_search_context *ctx = context;
-
- if (ctx->found)
- return;
-
- if (!ctx->ignore_header) {
- ctx->found = message_header_search(value, value_len,
- ctx->hdr_search_ctx);
- }
-
- if (name_len == 12 && memcasecmp(name, "Content-Type", 12) == 0) {
- message_content_parse_header(value, value_len,
- parse_content_type,
- parse_content_type_param,
- ctx);
- } else if (name_len == 25 &&
- memcasecmp(name, "Content-Transfer-Encoding", 25) == 0) {
- message_content_parse_header(value, value_len,
- parse_content_encoding,
- NULL, ctx);
- }
-}
-
static int message_search_header(struct part_search_context *ctx,
struct istream *input)
{
- ctx->hdr_search_ctx = message_header_search_init(data_stack_pool,
- ctx->body_ctx->key,
- ctx->body_ctx->charset,
- NULL);
+ struct header_search_context *hdr_search_ctx;
+ struct message_header_parser_ctx *hdr_ctx;
+ struct message_header_line *hdr;
+ int found = FALSE;
+
+ hdr_search_ctx = message_header_search_init(data_stack_pool,
+ ctx->body_ctx->key,
+ ctx->body_ctx->charset,
+ NULL);
/* we default to text content-type */
ctx->content_type_text = TRUE;
- message_parse_header(NULL, input, NULL, header_find, ctx);
- return ctx->found;
+ hdr_ctx = message_parse_header_init(input, NULL);
+ while ((hdr = message_parse_header_next(hdr_ctx)) != NULL) {
+ if (!ctx->ignore_header) {
+ if (message_header_search(hdr->value, hdr->value_len,
+ hdr_search_ctx)) {
+ found = TRUE;
+ break;
+ }
+ }
+
+ if (hdr->name_len == 12 &&
+ strcasecmp(hdr->name, "Content-Type") == 0) {
+ if (hdr->continues) {
+ hdr->use_full_value = TRUE;
+ continue;
+ }
+ message_content_parse_header(hdr->full_value,
+ hdr->full_value_len,
+ parse_content_type,
+ parse_content_type_param,
+ ctx);
+ } else if (hdr->name_len == 25 &&
+ strcasecmp(hdr->name,
+ "Content-Transfer-Encoding") == 0) {
+ if (hdr->continues) {
+ hdr->use_full_value = TRUE;
+ continue;
+ }
+ message_content_parse_header(hdr->full_value,
+ hdr->full_value_len,
+ parse_content_encoding,
+ NULL, ctx);
+ }
+ }
+ message_parse_header_deinit(hdr_ctx);
+
+ return found;
}
static int message_search_decoded_block(struct part_search_context *ctx,
Index: message-parser.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-parser.c,v
retrieving revision 1.36
retrieving revision 1.37
diff -u -d -r1.36 -r1.37
--- message-parser.c 25 Feb 2003 19:10:03 -0000 1.36
+++ message-parser.c 26 Mar 2003 17:29:02 -0000 1.37
@@ -1,7 +1,9 @@
/* Copyright (C) 2002 Timo Sirainen */
#include "lib.h"
+#include "buffer.h"
#include "istream.h"
+#include "str.h"
#include "strescape.h"
#include "message-content-parser.h"
#include "message-parser.h"
@@ -27,6 +29,17 @@
void *context;
};
+struct message_header_parser_ctx {
+ struct message_header_line line;
+
+ struct istream *input;
+ struct message_size *hdr_size;
+
+ string_t *name;
+ buffer_t *value_buf;
+ size_t skip;
+};
+
static struct message_part *
message_parse_part(struct istream *input,
struct parser_context *parser_ctx);
@@ -119,28 +132,6 @@
}
}
-static void parse_header_field(struct message_part *part,
- const unsigned char *name, size_t name_len,
- const unsigned char *value, size_t value_len,
- void *context)
-{
- struct parser_context *parser_ctx = context;
-
- /* call the user-defined header parser */
- if (parser_ctx->callback != NULL) {
- parser_ctx->callback(part, name, name_len, value, value_len,
- parser_ctx->context);
- }
-
- if (name_len == 12 && memcasecmp(name, "Content-Type", 12) == 0) {
- /* we need to know the boundary */
- message_content_parse_header(value, value_len,
- parse_content_type,
- parse_content_type_param,
- parser_ctx);
- }
-}
-
static struct message_part *
message_parse_multipart(struct istream *input,
struct parser_context *parser_ctx)
@@ -197,12 +188,38 @@
static struct message_part *
message_parse_part(struct istream *input, struct parser_context *parser_ctx)
{
+ struct message_header_parser_ctx *hdr_ctx;
+ struct message_header_line *hdr;
struct message_part *next_part, *part;
uoff_t hdr_size;
- message_parse_header(parser_ctx->part, input,
- &parser_ctx->part->header_size,
- parse_header_field, parser_ctx);
+ hdr_ctx = message_parse_header_init(input,
+ &parser_ctx->part->header_size);
+ while ((hdr = message_parse_header_next(hdr_ctx)) != NULL) {
+ /* call the user-defined header parser */
+ if (parser_ctx->callback != NULL) {
+ parser_ctx->callback(parser_ctx->part, hdr,
+ parser_ctx->context);
+ }
+
+ if (strcasecmp(hdr->name, "Content-Type") == 0) {
+ if (hdr->continues) {
+ hdr->use_full_value = TRUE;
+ continue;
+ }
+ /* we need to know the boundary */
+ message_content_parse_header(hdr->full_value,
+ hdr->full_value_len,
+ parse_content_type,
+ parse_content_type_param,
+ parser_ctx);
+ }
+ }
+ if (parser_ctx->callback != NULL) {
+ parser_ctx->callback(parser_ctx->part, NULL,
+ parser_ctx->context);
+ }
+ message_parse_header_deinit(hdr_ctx);
i_assert((parser_ctx->part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
@@ -252,23 +269,6 @@
return next_part;
}
-struct message_part *message_parse(pool_t pool, struct istream *input,
- message_header_callback_t *callback,
- void *context)
-{
- struct message_part *part;
- struct parser_context parser_ctx;
-
- memset(&parser_ctx, 0, sizeof(parser_ctx));
- parser_ctx.pool = pool;
- parser_ctx.callback = callback;
- parser_ctx.context = context;
- parser_ctx.part = part = p_new(pool, struct message_part, 1);
-
- message_parse_part(input, &parser_ctx);
- return part;
-}
-
static void message_skip_line(struct istream *input,
struct message_size *msg_size, int skip_lf)
{
@@ -315,135 +315,6 @@
}
}
-void message_parse_header(struct message_part *part, struct istream *input,
- struct message_size *hdr_size,
- message_header_callback_t *callback, void *context)
-{
- const unsigned char *msg;
- size_t i, size, parse_size, startpos, missing_cr_count;
- size_t line_start, colon_pos, end_pos, name_len, value_len;
- int ret;
-
- if (hdr_size != NULL)
- memset(hdr_size, 0, sizeof(struct message_size));
-
- missing_cr_count = startpos = line_start = 0;
- colon_pos = UINT_MAX;
- for (;;) {
- ret = i_stream_read_data(input, &msg, &size, startpos+1);
- if (ret == -2) {
- /* overflow, line is too long. just skip it. */
- i_assert(size > 2);
-
- message_skip_line(input, hdr_size, TRUE);
- startpos = line_start = 0;
- colon_pos = UINT_MAX;
- continue;
- }
-
- if (ret < 0 || (ret <= 0 && size == startpos)) {
- /* EOF and nothing in buffer. the later check is
- needed only when there's no message body */
- break;
- }
-
- parse_size = size <= startpos+1 ? size : size-1;
- for (i = startpos; i < parse_size; i++) {
- if (msg[i] == ':' && colon_pos == UINT_MAX) {
- colon_pos = i;
- continue;
- }
-
- if (msg[i] != '\n')
- continue;
-
- if (hdr_size != NULL)
- hdr_size->lines++;
-
- if (i == 0 || msg[i-1] != '\r') {
- /* missing CR */
- missing_cr_count++;
- }
-
- if (i == 0 || (i == 1 && msg[i-1] == '\r')) {
- /* no headers at all */
- break;
- }
-
- if ((i > 0 && msg[i-1] == '\n') ||
- (i > 1 && msg[i-2] == '\n' && msg[i-1] == '\r')) {
- /* \n\n or \n\r\n - end of headers */
- break;
- }
-
- /* make sure the header doesn't continue to next line */
- if (i+1 == size || !IS_LWSP(msg[i+1])) {
- if (colon_pos != UINT_MAX &&
- colon_pos != line_start &&
- callback != NULL &&
- !IS_LWSP(msg[line_start])) {
- /* we have a valid header line */
-
- /* get length of name-field */
- end_pos = colon_pos-1;
- while (end_pos > line_start &&
- IS_LWSP(msg[end_pos]))
- end_pos--;
- name_len = end_pos - line_start + 1;
-
- /* get length of value field.
- skip all LWSP after ':'. */
- colon_pos++;
- while (colon_pos < i &&
- IS_LWSP(msg[colon_pos]))
- colon_pos++;
- value_len = i - colon_pos;
- if (msg[i-1] == '\r') value_len--;
-
- /* and finally call the function */
- callback(part,
- msg + line_start, name_len,
- msg + colon_pos, value_len,
- context);
- }
-
- colon_pos = UINT_MAX;
- line_start = i+1;
- }
- }
-
- if (i < parse_size) {
- /* end of header */
- startpos = i+1;
- break;
- }
-
- /* leave the last line to buffer */
- if (colon_pos != UINT_MAX)
- colon_pos -= line_start;
- if (hdr_size != NULL)
- hdr_size->physical_size += line_start;
- i_stream_skip(input, line_start);
-
- startpos = i-line_start;
- line_start = 0;
- }
-
- i_stream_skip(input, startpos);
-
- if (hdr_size != NULL) {
- hdr_size->physical_size += startpos;
- hdr_size->virtual_size +=
- hdr_size->physical_size + missing_cr_count;
- i_assert(hdr_size->virtual_size >= hdr_size->physical_size);
- }
-
- if (callback != NULL) {
- /* "end of headers" notify */
- callback(part, NULL, 0, NULL, 0, context);
- }
-}
-
static struct message_boundary *
boundary_find(struct message_boundary *boundaries,
const unsigned char *msg, size_t len)
@@ -615,4 +486,246 @@
}
return boundary == NULL ? NULL : boundary->part;
+}
+
+struct message_part *message_parse(pool_t pool, struct istream *input,
+ message_header_callback_t *callback,
+ void *context)
+{
+ struct message_part *part;
+ struct parser_context parser_ctx;
+
+ memset(&parser_ctx, 0, sizeof(parser_ctx));
+ parser_ctx.pool = pool;
+ parser_ctx.callback = callback;
+ parser_ctx.context = context;
+ parser_ctx.part = part = p_new(pool, struct message_part, 1);
+
+ message_parse_part(input, &parser_ctx);
+ return part;
+}
+
+void message_parse_header(struct message_part *part, struct istream *input,
+ struct message_size *hdr_size,
+ message_header_callback_t *callback, void *context)
+{
+ struct message_header_parser_ctx *hdr_ctx;
+ struct message_header_line *hdr;
+
+ hdr_ctx = message_parse_header_init(input, hdr_size);
+ while ((hdr = message_parse_header_next(hdr_ctx)) != NULL)
+ callback(part, hdr, context);
+ callback(part, NULL, context);
+ message_parse_header_deinit(hdr_ctx);
+}
+
+struct message_header_parser_ctx *
+message_parse_header_init(struct istream *input, struct message_size *hdr_size)
+{
+ struct message_header_parser_ctx *ctx;
+
+ ctx = i_new(struct message_header_parser_ctx, 1);
+ ctx->input = input;
+ ctx->hdr_size = hdr_size;
+ ctx->name = str_new(default_pool, 128);
+
+ if (hdr_size != NULL)
+ memset(hdr_size, 0, sizeof(*hdr_size));
+ return ctx;
+}
+
+void message_parse_header_deinit(struct message_header_parser_ctx *ctx)
+{
+ i_stream_skip(ctx->input, ctx->skip);
+ if (ctx->value_buf != NULL)
+ buffer_free(ctx->value_buf);
+ str_free(ctx->name);
+ i_free(ctx);
+}
+
+struct message_header_line *
+message_parse_header_next(struct message_header_parser_ctx *ctx)
+{
+ struct message_header_line *line = &ctx->line;
+ const unsigned char *msg;
+ size_t i, size, startpos, colon_pos, parse_size;
+ int ret;
+
+ if (line->eoh)
+ return NULL;
+
+ if (ctx->skip > 0) {
+ i_stream_skip(ctx->input, ctx->skip);
+ ctx->skip = 0;
+ }
+
+ startpos = 0; colon_pos = UINT_MAX;
+
+ line->no_newline = FALSE;
+
+ if (line->continues) {
+ if (line->use_full_value && !line->continued) {
+ /* save the first line */
+ if (ctx->value_buf != NULL)
+ buffer_set_used_size(ctx->value_buf, 0);
+ else {
+ ctx->value_buf =
+ buffer_create_dynamic(default_pool,
+ 4096, (size_t)-1);
+ }
+ buffer_append(ctx->value_buf,
+ line->value, line->value_len);
+ }
+
+ line->continued = TRUE;
+ line->continues = FALSE;
+ colon_pos = 0;
+ } else {
+ /* new header line */
+ line->continued = FALSE;
+ }
+
+ for (;;) {
+ ret = i_stream_read_data(ctx->input, &msg, &size, startpos+1);
+
+ if (ret != 0) {
+ /* we want to know one byte in advance to find out
+ if it's multiline header */
+ parse_size = size-1;
+ } else {
+ parse_size = size;
+ }
+
+ if (ret <= 0 && (ret != 0 || startpos == size)) {
+ if (ret == -1) {
+ /* error / EOF with no bytes */
+ return NULL;
+ }
+
+ /* a) line is larger than input buffer
+ b) header ended unexpectedly */
+ if (colon_pos == UINT_MAX) {
+ /* header name is huge. just skip it. */
+ message_skip_line(ctx->input, ctx->hdr_size,
+ TRUE);
+ continue;
+ }
+
+ /* go back to last LWSP if found. */
+ for (i = size-1; i > colon_pos; i--) {
+ if (IS_LWSP(msg[i])) {
+ size = i;
+ break;
+ }
+ }
+
+ line->no_newline = TRUE;
+ line->continues = TRUE;
+ ctx->skip = size;
+ break;
+ }
+
+ /* find ':' */
+ if (colon_pos == UINT_MAX) {
+ for (i = startpos; i < parse_size; i++) {
+ if (msg[i] <= ':') {
+ if (msg[i] == ':') {
+ colon_pos = i;
+ break;
+ }
+ if (msg[i] == '\n') {
+ /* end of headers, or error */
+ break;
+ }
+ }
+ }
+ }
+
+ /* find '\n' */
+ for (i = startpos; i < parse_size; i++) {
+ if (msg[i] == '\n')
+ break;
+ }
+
+ if (i < parse_size) {
+ /* got a line */
+ line->continues = i+1 < size && IS_LWSP(msg[i+1]);
+
+ if (ctx->hdr_size != NULL)
+ ctx->hdr_size->lines++;
+ if (i == 0 || msg[i-1] != '\r') {
+ /* missing CR */
+ if (ctx->hdr_size != NULL)
+ ctx->hdr_size->virtual_size++;
+ size = i;
+ } else {
+ size = i-1;
+ }
+
+ ctx->skip = i+1;
+ break;
+ }
+
+ startpos = i;
+ }
+
+ if (size == 0 || (size == 1 && msg[0] == '\r')) {
+ /* end of headers */
+ line->eoh = TRUE;
+ line->name_len = line->value_len = 0;
+ } else if (line->continued) {
+ line->value = msg;
+ line->value_len = size;
+ } else if (colon_pos == UINT_MAX) {
+ /* missing ':', assume the whole line is name */
+ line->value = NULL;
+ line->value_len = 0;
+
+ str_truncate(ctx->name, 0);
+ str_append_n(ctx->name, msg, size);
+ line->name = str_c(ctx->name);
+ line->name_len = str_len(ctx->name);
+ } else {
+ /* get value, skip only first LWSP after ':' */
+ line->value = msg + colon_pos+1;
+ line->value_len = size - colon_pos - 1;
+ if (line->value_len > 0 &&
+ IS_LWSP(line->value[0])) {
+ line->value++;
+ line->value_len--;
+ }
+
+ /* get name, skip LWSP before ':' */
+ while (colon_pos > 0 && IS_LWSP(msg[colon_pos-1]))
+ colon_pos--;
+
+ str_truncate(ctx->name, 0);
+ str_append_n(ctx->name, msg, colon_pos);
+ line->name = str_c(ctx->name);
+ line->name_len = str_len(ctx->name);
+ }
+
+ if (!line->continued) {
+ /* first header line, set full_value = value */
+ line->full_value = line->value;
+ line->full_value_len = line->value_len;
+ } else if (line->use_full_value) {
+ /* continue saving the full value */
+ buffer_append(ctx->value_buf, line->value, line->value_len);
+ line->full_value = buffer_get_data(ctx->value_buf,
+ &line->full_value_len);
+ } else {
+ /* we didn't want full_value, and this is a continued line. */
+ line->full_value = NULL;
+ line->full_value_len = 0;
+ }
+
+ /* always reset it */
+ line->use_full_value = FALSE;
+
+ if (ctx->hdr_size != NULL) {
+ ctx->hdr_size->physical_size += ctx->skip;
+ ctx->hdr_size->virtual_size += ctx->skip;
+ }
+ return line;
}
Index: message-parser.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-mail/message-parser.h,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -d -r1.18 -r1.19
--- message-parser.h 27 Jan 2003 02:00:00 -0000 1.18
+++ message-parser.h 26 Mar 2003 17:29:02 -0000 1.19
@@ -31,27 +31,44 @@
void *context;
};
-/* NOTE: name and value aren't \0-terminated. Also called once at end of
- headers with name_len = value_len = 0. */
+struct message_header_parser_ctx;
+
+struct message_header_line {
+ const char *name;
+ size_t name_len;
+
+ const unsigned char *value;
+ size_t value_len;
+
+ const unsigned char *full_value;
+ size_t full_value_len;
+
+ unsigned int continues:1; /* multiline header, continues in next line */
+ unsigned int continued:1; /* multiline header, continues */
+ unsigned int eoh:1; /* "end of headers" line */
+ unsigned int no_newline:1; /* no \n after this line */
+ unsigned int use_full_value:1; /* set if you want full_value */
+};
+
+/* called once with hdr = NULL at end of headers */
typedef void message_header_callback_t(struct message_part *part,
- const unsigned char *name,
- size_t name_len,
- const unsigned char *value,
- size_t value_len,
+ struct message_header_line *hdr,
void *context);
/* callback is called for each field in message header. */
struct message_part *message_parse(pool_t pool, struct istream *input,
message_header_callback_t *callback,
void *context);
-
-/* Call callback for each field in message header. Fills the hdr_size.
- part can be NULL, just make sure your header function works with it.
- This function doesn't use data stack so your header function may save
- values to it. When finished, input will point to beginning of message
- body. */
void message_parse_header(struct message_part *part, struct istream *input,
struct message_size *hdr_size,
message_header_callback_t *callback, void *context);
+
+struct message_header_parser_ctx *
+message_parse_header_init(struct istream *input, struct message_size *hdr_size);
+void message_parse_header_deinit(struct message_header_parser_ctx *ctx);
+
+/* Read and return next header line. */
+struct message_header_line *
+message_parse_header_next(struct message_header_parser_ctx *ctx);
#endif
More information about the dovecot-cvs
mailing list