[dovecot-cvs] dovecot/src/lib-mail Makefile.am, 1.12,
1.13 message-header-parser.c, NONE,
1.1 message-header-parser.h, NONE, 1.1 message-parser.c, 1.71,
1.72 message-parser.h, 1.30, 1.31
tss-movial at dovecot.org
tss-movial at dovecot.org
Mon May 8 11:46:21 EEST 2006
Update of /var/lib/cvs/dovecot/src/lib-mail
In directory talvi:/tmp/cvs-serv713/lib-mail
Modified Files:
Makefile.am message-parser.c message-parser.h
Added Files:
message-header-parser.c message-header-parser.h
Log Message:
Rewrote the message bodystructure parser to allow parsing from non-blocking streams. Also did a couple of API changes and cleanups.
Index: Makefile.am
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib-mail/Makefile.am,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -d -r1.12 -r1.13
--- Makefile.am 5 Jun 2005 20:39:53 -0000 1.12
+++ Makefile.am 8 May 2006 08:46:19 -0000 1.13
@@ -11,6 +11,7 @@
message-content-parser.c \
message-date.c \
message-header-decode.c \
+ message-header-parser.c \
message-header-search.c \
message-parser.c \
message-part-serialize.c \
@@ -27,6 +28,7 @@
message-content-parser.h \
message-date.h \
message-header-decode.h \
+ message-header-parser.h \
message-header-search.h \
message-parser.h \
message-part-serialize.h \
--- NEW FILE: message-header-parser.c ---
/* Copyright (C) 2002-2006 Timo Sirainen */
#include "lib.h"
#include "buffer.h"
#include "istream.h"
#include "str.h"
#include "message-size.h"
#include "message-header-parser.h"
struct message_header_parser_ctx {
struct message_header_line line;
struct istream *input;
struct message_size *hdr_size;
string_t *name;
buffer_t *value_buf;
size_t skip;
unsigned int skip_initial_lwsp:1;
unsigned int skip_line:1;
unsigned int has_nuls:1;
};
struct message_header_parser_ctx *
message_parse_header_init(struct istream *input, struct message_size *hdr_size,
bool skip_initial_lwsp)
{
struct message_header_parser_ctx *ctx;
ctx = i_new(struct message_header_parser_ctx, 1);
ctx->input = input;
ctx->hdr_size = hdr_size;
ctx->name = str_new(default_pool, 128);
ctx->skip_initial_lwsp = skip_initial_lwsp;
if (hdr_size != NULL)
memset(hdr_size, 0, sizeof(*hdr_size));
return ctx;
}
void message_parse_header_deinit(struct message_header_parser_ctx **_ctx)
{
struct message_header_parser_ctx *ctx = *_ctx;
i_stream_skip(ctx->input, ctx->skip);
if (ctx->value_buf != NULL)
buffer_free(ctx->value_buf);
str_free(&ctx->name);
i_free(ctx);
*_ctx = NULL;
}
int message_parse_header_next(struct message_header_parser_ctx *ctx,
struct message_header_line **hdr_r)
{
struct message_header_line *line = &ctx->line;
const unsigned char *msg;
size_t i, size, startpos, colon_pos, parse_size;
int ret;
bool last_no_newline;
*hdr_r = NULL;
if (line->eoh)
return -1;
if (ctx->skip > 0) {
i_stream_skip(ctx->input, ctx->skip);
ctx->skip = 0;
}
startpos = 0; colon_pos = UINT_MAX;
last_no_newline = line->no_newline;
line->no_newline = FALSE;
if (line->continues) {
if (line->use_full_value && !line->continued) {
/* save the first line */
if (ctx->value_buf != NULL)
buffer_set_used_size(ctx->value_buf, 0);
else {
ctx->value_buf =
buffer_create_dynamic(default_pool,
4096);
}
buffer_append(ctx->value_buf,
line->value, line->value_len);
}
line->continued = TRUE;
line->continues = FALSE;
colon_pos = 0;
} else {
/* new header line */
line->continued = FALSE;
line->name_offset = ctx->input->v_offset;
}
for (;;) {
ret = i_stream_read_data(ctx->input, &msg, &size, startpos+1);
if (ret > 0) {
/* we want to know one byte in advance to find out
if it's multiline header */
parse_size = size-1;
} else {
parse_size = size;
}
if (ret <= 0 && startpos == size) {
if (ret == -1) {
if (startpos > 0) {
/* header ended unexpectedly. */
line->no_newline = TRUE;
ctx->skip = startpos;
break;
}
/* error / EOF with no bytes */
return -1;
}
if (ret == 0 && !ctx->input->eof) {
/* stream is nonblocking - need more data */
return 0;
}
if (msg[0] == '\n' ||
(msg[0] == '\r' && size > 1 && msg[1] == '\n')) {
/* end of headers - this mostly happens just
with mbox where headers are read separately
from body */
size = 0;
if (ctx->hdr_size != NULL)
ctx->hdr_size->lines++;
if (msg[0] == '\r')
ctx->skip = 2;
else {
ctx->skip = 1;
if (ctx->hdr_size != NULL)
ctx->hdr_size->virtual_size++;
}
break;
}
/* a) line is larger than input buffer
b) header ended unexpectedly */
if (colon_pos == UINT_MAX && ret == -2 &&
!line->continued) {
/* header name is huge. just skip it. */
if (msg[size-1] == '\r')
size--;
if (ctx->hdr_size != NULL) {
ctx->hdr_size->physical_size += size;
ctx->hdr_size->virtual_size += size;
}
i_stream_skip(ctx->input, size);
ctx->skip_line = TRUE;
startpos = 0;
continue;
}
if (ret == -2) {
/* go back to last LWSP if found. */
size_t min_pos =
!line->continued ? colon_pos : 0;
for (i = size-1; i > min_pos; i--) {
if (IS_LWSP(msg[i])) {
size = i;
break;
}
}
line->continues = TRUE;
}
line->no_newline = TRUE;
ctx->skip = size;
break;
}
/* find ':' */
if (colon_pos == UINT_MAX) {
for (i = startpos; i < parse_size; i++) {
if (msg[i] <= ':') {
if (msg[i] == ':') {
colon_pos = i;
line->full_value_offset =
ctx->input->v_offset +
i + 1;
break;
}
if (msg[i] == '\n') {
/* end of headers, or error */
break;
}
if (msg[i] == '\0')
ctx->has_nuls = TRUE;
}
}
}
/* find '\n' */
for (i = startpos; i < parse_size; i++) {
if (msg[i] <= '\n') {
if (msg[i] == '\n')
break;
if (msg[i] == '\0')
ctx->has_nuls = TRUE;
}
}
if (i < parse_size) {
/* got a line */
if (ctx->skip_line) {
/* skipping a huge line */
if (ctx->hdr_size != NULL) {
ctx->hdr_size->physical_size += i;
ctx->hdr_size->virtual_size += i;
if (i == 0 || msg[i-1] != '\r') {
/* missing CR */
ctx->hdr_size->virtual_size++;
}
}
i_stream_skip(ctx->input, i);
startpos = 0;
ctx->skip_line = FALSE;
continue;
}
line->continues = i+1 < size && IS_LWSP(msg[i+1]);
if (ctx->hdr_size != NULL)
ctx->hdr_size->lines++;
if (i == 0 || msg[i-1] != '\r') {
/* missing CR */
if (ctx->hdr_size != NULL)
ctx->hdr_size->virtual_size++;
size = i;
} else {
size = i-1;
}
ctx->skip = i+1;
break;
}
startpos = i;
}
if (size == 0) {
/* end of headers */
line->eoh = TRUE;
line->name_len = line->value_len = line->full_value_len = 0;
line->name = ""; line->value = line->full_value = NULL;
line->middle = NULL; line->middle_len = 0;
} else if (line->continued) {
line->value = msg;
line->value_len = size;
line->middle = NULL;
line->middle_len = 0;
} else if (colon_pos == UINT_MAX) {
/* missing ':', assume the whole line is name */
line->value = NULL;
line->value_len = 0;
str_truncate(ctx->name, 0);
str_append_n(ctx->name, msg, size);
line->name = str_c(ctx->name);
line->name_len = str_len(ctx->name);
line->middle = NULL;
line->middle_len = 0;
} else {
size_t pos;
line->value = msg + colon_pos+1;
line->value_len = size - colon_pos - 1;
if (ctx->skip_initial_lwsp) {
/* get value. skip all LWSP after ':'. Note that
RFC2822 doesn't say we should, but history behind
it..
Exception to this is if the value consists only of
LWSP, then skip only the one LWSP after ':'. */
for (pos = 0; pos < line->value_len; pos++) {
if (!IS_LWSP(line->value[0]))
break;
}
if (pos == line->value_len) {
/* everything was LWSP */
if (line->value_len > 0 &&
IS_LWSP(line->value[0]))
pos = 1;
}
} else {
pos = line->value_len > 0 &&
IS_LWSP(line->value[0]) ? 1 : 0;
}
line->value += pos;
line->value_len -= pos;
line->full_value_offset += pos;
/* get name, skip LWSP before ':' */
while (colon_pos > 0 && IS_LWSP(msg[colon_pos-1]))
colon_pos--;
line->middle = msg + colon_pos;
line->middle_len = (size_t)(line->value - line->middle);
str_truncate(ctx->name, 0);
str_append_n(ctx->name, msg, colon_pos);
line->name = str_c(ctx->name);
line->name_len = str_len(ctx->name);
}
if (!line->continued) {
/* first header line, set full_value = value */
line->full_value = line->value;
line->full_value_len = line->value_len;
} else if (line->use_full_value) {
/* continue saving the full value */
if (!last_no_newline)
buffer_append_c(ctx->value_buf, '\n');
buffer_append(ctx->value_buf, line->value, line->value_len);
line->full_value = buffer_get_data(ctx->value_buf,
&line->full_value_len);
} else {
/* we didn't want full_value, and this is a continued line. */
line->full_value = NULL;
line->full_value_len = 0;
}
/* always reset it */
line->use_full_value = FALSE;
if (ctx->hdr_size != NULL) {
ctx->hdr_size->physical_size += ctx->skip;
ctx->hdr_size->virtual_size += ctx->skip;
}
*hdr_r = line;
return 1;
}
bool message_parse_header_has_nuls(struct message_header_parser_ctx *ctx)
{
return ctx->has_nuls;
}
void message_parse_header(struct istream *input, struct message_size *hdr_size,
message_header_callback_t *callback, void *context)
{
struct message_header_parser_ctx *hdr_ctx;
struct message_header_line *hdr;
int ret;
hdr_ctx = message_parse_header_init(input, hdr_size, TRUE);
while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0)
callback(hdr, context);
i_assert(ret != 0);
message_parse_header_deinit(&hdr_ctx);
/* call after the final skipping */
callback(NULL, context);
}
--- NEW FILE: message-header-parser.h ---
#ifndef __MESSAGE_HEADER_PARSER_H
#define __MESSAGE_HEADER_PARSER_H
#define IS_LWSP(c) \
((c) == ' ' || (c) == '\t')
struct message_size;
struct message_header_parser_ctx;
struct message_header_line {
const char *name;
size_t name_len;
const unsigned char *value;
size_t value_len;
const unsigned char *full_value;
size_t full_value_len;
const unsigned char *middle;
size_t middle_len;
uoff_t name_offset, full_value_offset;
unsigned int continues:1; /* multiline header, continues in next line */
unsigned int continued:1; /* multiline header, continues */
unsigned int eoh:1; /* "end of headers" line */
unsigned int no_newline:1; /* no \n after this line */
unsigned int use_full_value:1; /* set if you want full_value */
};
/* called once with hdr = NULL at the end of headers */
typedef void message_header_callback_t(struct message_header_line *hdr,
void *context);
/* skip_initial_lwsp controls if we should skip LWSP after "header: ".
Note that there may not be the single whitespace after "header:", and that
"header : " is also possible. These two conditions can't be determined from
struct message_header_line. */
struct message_header_parser_ctx *
message_parse_header_init(struct istream *input, struct message_size *hdr_size,
bool skip_initial_lwsp);
void message_parse_header_deinit(struct message_header_parser_ctx **ctx);
/* Read and return next header line. Returns 1 if header is returned, 0 if
input stream is non-blocking and more data needs to be read, -1 when all is
done or error occurred (see stream's error status). */
int message_parse_header_next(struct message_header_parser_ctx *ctx,
struct message_header_line **hdr_r);
/* Returns TRUE if the parser has seen NUL characters. */
bool message_parse_header_has_nuls(struct message_header_parser_ctx *ctx);
/* Read and parse the header from the given stream. */
void message_parse_header(struct istream *input, struct message_size *hdr_size,
message_header_callback_t *callback, void *context);
#endif
Index: message-parser.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib-mail/message-parser.c,v
retrieving revision 1.71
retrieving revision 1.72
diff -u -d -r1.71 -r1.72
--- message-parser.c 14 Jan 2006 18:47:35 -0000 1.71
+++ message-parser.c 8 May 2006 08:46:19 -0000 1.72
@@ -1,13 +1,14 @@
-/* Copyright (C) 2002 Timo Sirainen */
+/* Copyright (C) 2002-2006 Timo Sirainen */
#include "lib.h"
-#include "buffer.h"
#include "istream.h"
-#include "str.h"
#include "strescape.h"
#include "message-content-parser.h"
#include "message-parser.h"
-#include "message-size.h"
[...1348 lines suppressed...]
- buffer_append(ctx->value_buf, line->value, line->value_len);
- line->full_value = buffer_get_data(ctx->value_buf,
- &line->full_value_len);
- } else {
- /* we didn't want full_value, and this is a continued line. */
- line->full_value = NULL;
- line->full_value_len = 0;
- }
-
- /* always reset it */
- line->use_full_value = FALSE;
-
- if (ctx->hdr_size != NULL) {
- ctx->hdr_size->physical_size += ctx->skip;
- ctx->hdr_size->virtual_size += ctx->skip;
- }
-
- *hdr_r = line;
- return 1;
-}
Index: message-parser.h
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib-mail/message-parser.h,v
retrieving revision 1.30
retrieving revision 1.31
diff -u -d -r1.30 -r1.31
--- message-parser.h 14 Jan 2006 18:47:35 -0000 1.30
+++ message-parser.h 8 May 2006 08:46:19 -0000 1.31
@@ -1,11 +1,9 @@
#ifndef __MESSAGE_PARSER_H
#define __MESSAGE_PARSER_H
+#include "message-header-parser.h"
#include "message-size.h"
-#define IS_LWSP(c) \
- ((c) == ' ' || (c) == '\t')
-
enum message_part_flags {
MESSAGE_PART_FLAG_MULTIPART = 0x01,
MESSAGE_PART_FLAG_MULTIPART_DIGEST = 0x02,
@@ -38,47 +36,20 @@
};
struct message_parser_ctx;
-struct message_header_parser_ctx;
-
-struct message_header_line {
- const char *name;
- size_t name_len;
-
- const unsigned char *value;
- size_t value_len;
-
- const unsigned char *full_value;
- size_t full_value_len;
- const unsigned char *middle;
- size_t middle_len;
-
- uoff_t name_offset, full_value_offset;
+struct message_block {
+ /* non-NULL if a header line was read */
+ struct message_header_line *hdr;
- unsigned int continues:1; /* multiline header, continues in next line */
- unsigned int continued:1; /* multiline header, continues */
- unsigned int eoh:1; /* "end of headers" line */
- unsigned int no_newline:1; /* no \n after this line */
- unsigned int use_full_value:1; /* set if you want full_value */
+ /* hdr = NULL, size = 0 block returned at the end of headers */
+ const unsigned char *data;
+ size_t size;
};
/* called once with hdr = NULL at the end of headers */
-typedef void message_header_callback_t(struct message_part *part,
- struct message_header_line *hdr,
- void *context);
-/* called once with size = 0 at the end of message part */
-typedef void message_body_callback_t(struct message_part *part,
- const unsigned char *data, size_t size,
- void *context);
-
-/* callback is called for each field in message header. */
-void message_parse_from_parts(struct message_part *part, struct istream *input,
- message_header_callback_t *callback,
- void *context);
-void message_parse_header(struct message_part *part, struct istream *input,
- struct message_size *hdr_size,
- message_header_callback_t *callback, void *context);
-
+typedef void message_part_header_callback_t(struct message_part *part,
+ struct message_header_line *hdr,
+ void *context);
/* Initialize message parser. part_spool specifies where struct message_parts
are allocated from. */
@@ -86,32 +57,27 @@
message_parser_init(pool_t part_pool, struct istream *input);
struct message_part *message_parser_deinit(struct message_parser_ctx **ctx);
+/* Read the next block of a message. Returns 1 if block is returned, 0 if
+ input stream is non-blocking and more data needs to be read, -1 when all is
+ done or error occurred (see stream's error status). */
+int message_parser_parse_next_block(struct message_parser_ctx *ctx,
+ struct message_block *block_r);
+
/* Read and parse header. */
void message_parser_parse_header(struct message_parser_ctx *ctx,
struct message_size *hdr_size,
- message_header_callback_t *callback,
+ message_part_header_callback_t *callback,
void *context);
/* Read and parse body. If message is a MIME multipart or message/rfc822
message, hdr_callback is called for all headers. body_callback is called
for the body content. */
void message_parser_parse_body(struct message_parser_ctx *ctx,
- message_header_callback_t *hdr_callback,
- message_body_callback_t *body_callback,
+ message_part_header_callback_t *hdr_callback,
void *context);
-/* skip_initial_lwsp controls if we should skip LWSP after "header: ".
- Note that there may not be the single whitespace after "header:", and that
- "header : " is also possible. These two conditions can't be determined from
- struct message_header_line. */
-struct message_header_parser_ctx *
-message_parse_header_init(struct istream *input, struct message_size *hdr_size,
- bool skip_initial_lwsp);
-void message_parse_header_deinit(struct message_header_parser_ctx **ctx);
-
-/* Read and return next header line. Returns 1 if header is returned, 0 if
- input stream is non-blocking and more data needs to be read, -1 when all is
- done or error occurred (see stream's error status). */
-int message_parse_header_next(struct message_header_parser_ctx *ctx,
- struct message_header_line **hdr_r);
+/* callback is called for each field in message header. */
+void message_parse_from_parts(struct message_part *part, struct istream *input,
+ message_part_header_callback_t *callback,
+ void *context);
#endif
More information about the dovecot-cvs
mailing list