dovecot-1.1: mbox: Don't stop at From_-lines in the message bodi...
dovecot at dovecot.org
dovecot at dovecot.org
Sun May 4 16:58:01 EEST 2008
details: http://hg.dovecot.org/dovecot-1.1/rev/7871b6219480
changeset: 7479:7871b6219480
user: Timo Sirainen <tss at iki.fi>
date: Sun May 04 16:57:58 2008 +0300
description:
mbox: Don't stop at From_-lines in the message bodies. Use Content-Length:
header to figure out if it belongs to a message body or not.
diffstat:
3 files changed, 141 insertions(+), 75 deletions(-)
src/lib-storage/index/mbox/istream-raw-mbox.c | 91 ++++++++++++++-------
src/lib-storage/index/mbox/istream-raw-mbox.h | 19 ++--
src/lib-storage/index/mbox/mbox-mail.c | 106 ++++++++++++++++---------
diffs (truncated from 335 to 300 lines):
diff -r 0eb6a0c01001 -r 7871b6219480 src/lib-storage/index/mbox/istream-raw-mbox.c
--- a/src/lib-storage/index/mbox/istream-raw-mbox.c Sun May 04 14:35:51 2008 +0300
+++ b/src/lib-storage/index/mbox/istream-raw-mbox.c Sun May 04 16:57:58 2008 +0300
@@ -238,15 +238,19 @@ static ssize_t i_stream_raw_mbox_read(st
FIXME: if From-line is longer than input
buffer, we break. probably irrelevant.. */
i++;
- from_after_pos = i;
- from_start_pos = i - 6;
- if (from_start_pos > 0 &&
- buf[from_start_pos-1] == '\r') {
- /* CR also belongs to it. */
- crlf_ending = TRUE;
- from_start_pos--;
- } else {
- crlf_ending = FALSE;
+ if (rstream->hdr_offset + rstream->mail_size ==
+ stream->istream.v_offset + i - 6 ||
+ rstream->mail_size == (uoff_t)-1) {
+ from_after_pos = i;
+ from_start_pos = i - 6;
+ if (from_start_pos > 0 &&
+ buf[from_start_pos-1] == '\r') {
+ /* CR also belongs to it. */
+ crlf_ending = TRUE;
+ from_start_pos--;
+ } else {
+ crlf_ending = FALSE;
+ }
}
fromp = mbox_from;
} else if (from_start_pos != (size_t)-1) {
@@ -290,6 +294,17 @@ static ssize_t i_stream_raw_mbox_read(st
new_pos--;
}
+ if (stream->istream.v_offset -
+ rstream->hdr_offset + new_pos > rstream->mail_size) {
+ /* istream_raw_mbox_set_next_offset() used invalid
+ cached next_offset? */
+ i_error("Unexpectedly lost From-line at %"PRIuUOFF_T,
+ rstream->hdr_offset + rstream->mail_size);
+ rstream->eof = TRUE;
+ rstream->corrupted = TRUE;
+ return -1;
+ }
+
stream->buffer = buf;
if (new_pos == stream->pos) {
if (stream->istream.eof || ret > 0)
@@ -379,8 +394,7 @@ static int istream_raw_mbox_is_valid_fro
char *sender;
/* minimal: "From x Thu Nov 29 22:33:52 2001" = 31 chars */
- if (i_stream_read_data(rstream->istream.parent, &data, &size, 30) == -1)
- return -1;
+ (void)i_stream_read_data(rstream->istream.parent, &data, &size, 30);
if ((size == 1 && data[0] == '\n') ||
(size == 2 && data[0] == '\r' && data[1] == '\n')) {
@@ -469,33 +483,41 @@ uoff_t istream_raw_mbox_get_body_offset(
return rstream->body_offset;
}
-uoff_t istream_raw_mbox_get_body_size(struct istream *stream, uoff_t body_size)
+uoff_t istream_raw_mbox_get_body_size(struct istream *stream,
+ uoff_t expected_body_size)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
const unsigned char *data;
size_t size;
- uoff_t old_offset;
+ uoff_t old_offset, body_size;
i_assert(rstream->hdr_offset != (uoff_t)-1);
i_assert(rstream->body_offset != (uoff_t)-1);
- if (rstream->mail_size != (uoff_t)-1) {
- return rstream->mail_size -
- (rstream->body_offset - rstream->hdr_offset);
- }
-
+ body_size = rstream->mail_size == (uoff_t)-1 ? (uoff_t)-1 :
+ rstream->mail_size - (rstream->body_offset -
+ rstream->hdr_offset);
old_offset = stream->v_offset;
- if (body_size != (uoff_t)-1) {
+ if (expected_body_size != (uoff_t)-1) {
+ /* if we already have the existing body size, use it as long as
+ it's >= expected body_size. otherwise the previous parsing
+ may have stopped at a From_-line that belongs to the body. */
+ if (body_size != (uoff_t)-1 && body_size >= expected_body_size)
+ return body_size;
+
i_stream_seek(rstream->istream.parent,
- rstream->body_offset + body_size);
+ rstream->body_offset + expected_body_size);
if (istream_raw_mbox_is_valid_from(rstream) > 0) {
- rstream->mail_size = body_size +
+ rstream->mail_size = expected_body_size +
(rstream->body_offset - rstream->hdr_offset);
i_stream_seek(stream, old_offset);
- return body_size;
- }
- }
+ return expected_body_size;
+ }
+ /* invalid expected_body_size */
+ }
+ if (body_size != (uoff_t)-1)
+ return body_size;
/* have to read through the message body */
while (i_stream_read_data(stream, &data, &size, 0) > 0)
@@ -535,12 +557,13 @@ bool istream_raw_mbox_has_crlf_ending(st
return rstream->crlf_ending;
}
-void istream_raw_mbox_next(struct istream *stream, uoff_t body_size)
-{
- struct raw_mbox_istream *rstream =
- (struct raw_mbox_istream *)stream->real_stream;
-
- body_size = istream_raw_mbox_get_body_size(stream, body_size);
+void istream_raw_mbox_next(struct istream *stream, uoff_t expected_body_size)
+{
+ struct raw_mbox_istream *rstream =
+ (struct raw_mbox_istream *)stream->real_stream;
+ uoff_t body_size;
+
+ body_size = istream_raw_mbox_get_body_size(stream, expected_body_size);
rstream->mail_size = (uoff_t)-1;
rstream->received_time = rstream->next_received_time;
@@ -606,6 +629,14 @@ int istream_raw_mbox_seek(struct istream
return rstream->corrupted ? -1 : 0;
}
+void istream_raw_mbox_set_next_offset(struct istream *stream, uoff_t offset)
+{
+ struct raw_mbox_istream *rstream =
+ (struct raw_mbox_istream *)stream->real_stream;
+
+ rstream->mail_size = offset - rstream->hdr_offset;
+}
+
bool istream_raw_mbox_is_eof(struct istream *stream)
{
struct raw_mbox_istream *rstream =
diff -r 0eb6a0c01001 -r 7871b6219480 src/lib-storage/index/mbox/istream-raw-mbox.h
--- a/src/lib-storage/index/mbox/istream-raw-mbox.h Sun May 04 14:35:51 2008 +0300
+++ b/src/lib-storage/index/mbox/istream-raw-mbox.h Sun May 04 16:57:58 2008 +0300
@@ -12,10 +12,11 @@ uoff_t istream_raw_mbox_get_header_offse
/* Return offset to beginning of the body. */
uoff_t istream_raw_mbox_get_body_offset(struct istream *stream);
-/* Return the number of bytes in the body of this message. If body_size isn't
- (uoff_t)-1, we'll use it as potentially valid body size to avoid actually
- reading through the whole message. */
-uoff_t istream_raw_mbox_get_body_size(struct istream *stream, uoff_t body_size);
+/* Return the number of bytes in the body of this message. If
+ expected_body_size isn't (uoff_t)-1, we'll use it as potentially valid body
+ size to avoid actually reading through the whole message. */
+uoff_t istream_raw_mbox_get_body_size(struct istream *stream,
+ uoff_t expected_body_size);
/* Return received time of current message, or (time_t)-1 if the timestamp is
broken. */
@@ -26,14 +27,18 @@ const char *istream_raw_mbox_get_sender(
/* Return TRUE if the empty line between this and the next mail contains CR. */
bool istream_raw_mbox_has_crlf_ending(struct istream *stream);
-/* Jump to next message. If body_size isn't (uoff_t)-1, we'll use it as
- potentially valid body size. */
-void istream_raw_mbox_next(struct istream *stream, uoff_t body_size);
+/* Jump to next message. If expected_body_size isn't (uoff_t)-1, we'll use it
+ as potentially valid body size. */
+void istream_raw_mbox_next(struct istream *stream, uoff_t expected_body_size);
/* Seek to message at given offset. offset must point to beginning of
"\nFrom ", or 0 for beginning of file. Returns -1 if it offset doesn't
contain a valid From-line. */
int istream_raw_mbox_seek(struct istream *stream, uoff_t offset);
+/* Set next message's start offset. If this isn't set, read stops at the next
+ valid From_-line, even if it belongs to the current message's body
+ (Content-Length: header can be used to determine that). */
+void istream_raw_mbox_set_next_offset(struct istream *stream, uoff_t offset);
/* Returns TRUE if we've read the whole mbox. */
bool istream_raw_mbox_is_eof(struct istream *stream);
diff -r 0eb6a0c01001 -r 7871b6219480 src/lib-storage/index/mbox/mbox-mail.c
--- a/src/lib-storage/index/mbox/mbox-mail.c Sun May 04 14:35:51 2008 +0300
+++ b/src/lib-storage/index/mbox/mbox-mail.c Sun May 04 16:57:58 2008 +0300
@@ -166,12 +166,37 @@ mbox_mail_get_special(struct mail *_mail
return index_mail_get_special(_mail, field, value_r);
}
+static bool
+mbox_mail_get_next_offset(struct index_mail *mail, uoff_t *next_offset_r)
+{
+ struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox;
+ struct mail *_mail = &mail->mail.mail;
+ const struct mail_index_header *hdr;
+
+ hdr = mail_index_get_header(mail->trans->trans_view);
+ if (_mail->seq >= hdr->messages_count) {
+ if (_mail->seq != hdr->messages_count) {
+ /* we're appending a new message */
+ return FALSE;
+ }
+
+ /* last message, use the synced mbox size */
+ int trailer_size;
+
+ trailer_size = (mbox->storage->storage.flags &
+ MAIL_STORAGE_FLAG_SAVE_CRLF) != 0 ? 2 : 1;
+ *next_offset_r = hdr->sync_size - trailer_size;
+ return TRUE;
+ }
+ return mbox_file_lookup_offset(mbox, mail->trans->trans_view,
+ _mail->seq + 1, next_offset_r);
+}
+
static int mbox_mail_get_physical_size(struct mail *_mail, uoff_t *size_r)
{
struct index_mail *mail = (struct index_mail *)_mail;
struct index_mail_data *data = &mail->data;
struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox;
- const struct mail_index_header *hdr;
struct istream *input;
struct message_size hdr_size;
uoff_t old_offset, body_offset, body_size, next_offset;
@@ -194,26 +219,10 @@ static int mbox_mail_get_physical_size(s
/* use the next message's offset to avoid reading through the entire
message body to find out its size */
- hdr = mail_index_get_header(mail->trans->trans_view);
- if (_mail->seq >= hdr->messages_count) {
- if (_mail->seq == hdr->messages_count) {
- /* last message, use the synced mbox size */
- int trailer_size;
-
- trailer_size = (mbox->storage->storage.flags &
- MAIL_STORAGE_FLAG_SAVE_CRLF) != 0 ?
- 2 : 1;
- body_size = hdr->sync_size - body_offset - trailer_size;
- } else {
- /* we're appending a new message */
- body_size = (uoff_t)-1;
- }
- } else if (mbox_file_lookup_offset(mbox, mail->trans->trans_view,
- _mail->seq + 1, &next_offset) > 0) {
+ if (mbox_mail_get_next_offset(mail, &next_offset))
body_size = next_offset - body_offset;
- } else {
+ else
body_size = (uoff_t)-1;
- }
/* verify that the calculated body size is correct */
body_size = istream_raw_mbox_get_body_size(mbox->mbox_stream,
@@ -223,6 +232,42 @@ static int mbox_mail_get_physical_size(s
*size_r = data->physical_size;
i_stream_seek(input, old_offset);
+ return 0;
+}
+
+static int mbox_mail_init_stream(struct index_mail *mail)
+{
+ struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox;
+ struct istream *raw_stream;
+ uoff_t hdr_offset, next_offset;
+
+ if (mbox_mail_seek(mail) < 0)
+ return -1;
+
+ if (!mbox_mail_get_next_offset(mail, &next_offset)) {
+ if (mbox_mail_seek(mail) < 0)
+ return -1;
+ if (!mbox_mail_get_next_offset(mail, &next_offset)) {
+ i_warning("mbox %s: Can't find next message offset",
+ mbox->path);
+ next_offset = (uoff_t)-1;
+ }
+ }
+
+ raw_stream = mbox->mbox_stream;
+ hdr_offset = istream_raw_mbox_get_header_offset(raw_stream);
+ i_stream_seek(raw_stream, hdr_offset);
+
+ if (next_offset != (uoff_t)-1)
+ istream_raw_mbox_set_next_offset(raw_stream, next_offset);
+
+ raw_stream = i_stream_create_limit(raw_stream, (uoff_t)-1);
+ mail->data.stream =
+ i_stream_create_header_filter(raw_stream,
+ HEADER_FILTER_EXCLUDE | HEADER_FILTER_NO_CR,
+ mbox_hide_headers, mbox_hide_headers_count,
More information about the dovecot-cvs
mailing list