[dovecot-cvs] dovecot/src/lib-storage/index/mbox istream-raw-mbox.c, 1.8, 1.9 istream-raw-mbox.h, 1.4, 1.5 mbox-mail.c, 1.5, 1.6 mbox-sync-rewrite.c, 1.13, 1.14 mbox-sync-update.c, 1.10, 1.11 mbox-sync.c, 1.24, 1.25

cras at procontrol.fi cras at procontrol.fi
Wed Jun 16 08:38:25 EEST 2004


Update of /home/cvs/dovecot/src/lib-storage/index/mbox
In directory talvi:/tmp/cvs-serv6581/lib-storage/index/mbox

Modified Files:
	istream-raw-mbox.c istream-raw-mbox.h mbox-mail.c 
	mbox-sync-rewrite.c mbox-sync-update.c mbox-sync.c 
Log Message:
Complain if file isn't in mbox format. Complain if From-line wasn't found
from expected location. Parser should handle now correctly any kind of mbox
no matter how corrupted.



Index: istream-raw-mbox.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-storage/index/mbox/istream-raw-mbox.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- istream-raw-mbox.c	14 Jun 2004 23:35:17 -0000	1.8
+++ istream-raw-mbox.c	16 Jun 2004 05:38:23 -0000	1.9
@@ -14,6 +14,11 @@
 
 	uoff_t from_offset, hdr_offset, body_offset, mail_size;
 	struct istream *input;
+	uoff_t input_peak_offset;
+
+	unsigned int corrupted:1;
+	unsigned int eom:1;
+	unsigned int next_eof:1;
 };
 
 static void _close(struct _iostream *stream __attr_unused__)
@@ -93,6 +98,7 @@
 
 	/* we'll skip over From-line */
 	rstream->istream.istream.v_offset += line_pos+1;
+	i_stream_skip(rstream->input, line_pos+1);
 	rstream->hdr_offset = rstream->istream.istream.v_offset;
 	return 0;
 }
@@ -105,8 +111,19 @@
 	const char *fromp;
 	char *sender, eoh_char;
 	time_t received_time;
-	size_t i, pos, new_pos;
-	ssize_t ret;
+	size_t i, pos, new_pos, from_start_pos;
+	ssize_t ret = 0;
+
+	if (rstream->eom) {
+		if (rstream->body_offset == (uoff_t)-1) {
+			/* missing \n from headers */
+			rstream->body_offset =
+				stream->istream.v_offset +
+				(stream->pos - stream->skip);
+		}
+		stream->istream.eof = rstream->next_eof;
+		return -1;
+	}
 
 	i_stream_seek(rstream->input, stream->istream.v_offset);
 
@@ -117,90 +134,90 @@
 	do {
 		ret = i_stream_read(rstream->input);
 		buf = i_stream_get_data(rstream->input, &pos);
-	} while (ret > 0 && pos <= 6);
+	} while (ret > 0 &&
+		 stream->istream.v_offset + pos <= rstream->input_peak_offset);
 
-	if (pos == 0 || (pos == 1 && buf[0] == '\n')) {
-		/* EOF */
-		stream->pos = 0;
-		stream->istream.eof = TRUE;
-		rstream->mail_size = stream->istream.v_offset -
+	if (ret < 0) {
+		if (ret == -2)
+			return -2;
+
+		/* we've read the whole file, final byte should be
+		   the \n trailer */
+		if (pos > 0 && buf[pos-1] == '\n')
+			pos--;
+
+		i_assert(pos >= stream->pos);
+		ret = pos == stream->pos ? -1 :
+			(ssize_t)(pos - stream->pos);
+
+		stream->buffer = buf;
+		stream->pos = pos;
+
+		rstream->eom = TRUE;
+		rstream->next_eof = TRUE;
+		rstream->mail_size = stream->istream.v_offset + pos -
 			rstream->hdr_offset;
-		return -1;
+		return ret < 0 ? _read(stream) : ret;
 	}
 
 	if (stream->istream.v_offset == rstream->from_offset) {
+		/* beginning of message, we haven't yet read our From-line */
 		if (mbox_read_from_line(rstream) < 0) {
 			stream->pos = 0;
 			stream->istream.eof = TRUE;
+			rstream->corrupted = TRUE;
 			return -1;
 		}
-		return _read(stream);
-	}
-
-	i = 0;
-
-	if (pos >= 31) {
-		if (memcmp(buf, "\nFrom ", 6) == 0) {
-			if (mbox_from_parse(buf+6, pos-6,
-					    &received_time, &sender) == 0) {
-				rstream->next_received_time = received_time;
-				rstream->mail_size = stream->istream.v_offset -
-					rstream->hdr_offset;
-
-				i_free(rstream->next_sender);
-				rstream->next_sender = sender;
-				i_assert(stream->pos == 0);
-				return -1;
-			}
-
-			/* we don't want to get stuck at invalid From-line */
-			i += 6;
-		}
-	} else if (ret == -1) {
-		/* last few bytes, can't contain From-line */
-		if (buf[pos-1] == '\n') {
-			/* last LF doesn't belong to last message */
-			pos--;
-		}
-
-		if (rstream->body_offset == (uoff_t)-1) {
-			/* find body_offset */
-			for (; i < pos; i++) {
-				if (buf[i] == '\n' && i > 0 &&
-				    buf[i-1] == '\n') {
-					rstream->body_offset =
-						stream->istream.v_offset +
-						i + 1;
-					break;
-				}
-			}
-		}
-
-		ret = pos <= stream->pos ? -1 :
-			(ssize_t) (pos - stream->pos);
 
-		rstream->mail_size = stream->istream.v_offset + pos -
-			rstream->hdr_offset;
-
-		stream->buffer = buf;
-		stream->pos = pos;
-		stream->istream.eof = ret == -1;
-		return ret;
+		/* got it. we don't want to return it however,
+		   so start again from headers */
+		buf = i_stream_get_data(rstream->input, &pos);
+		if (pos == 0)
+			return _read(stream);
 	}
 
 	/* See if we have From-line here - note that it works right only
 	   because all characters are different in mbox_from. */
+        fromp = mbox_from; from_start_pos = 0;
 	eoh_char = rstream->body_offset == (uoff_t)-1 ? '\n' : '\0';
-	for (fromp = mbox_from; i < pos; i++) {
+	for (i = 0; i < pos; i++) {
 		if (buf[i] == eoh_char && i > 0 && buf[i-1] == '\n') {
 			rstream->body_offset = stream->istream.v_offset + i + 1;
 			eoh_char = '\0';
 		}
 		if (buf[i] == *fromp) {
 			if (*++fromp == '\0') {
-				/* potential From-line - stop here */
+				/* potential From-line, see if we have the
+				   rest of the line buffered.
+				   FIXME: if From-line is longer than input
+				   buffer, we break. probably irrelevant.. */
 				i++;
-				break;
+				from_start_pos = i;
+				fromp = mbox_from;
+			} else if (from_start_pos != 0) {
+				/* we have the whole From-line here now.
+				   See if it's a valid one. */
+				if (mbox_from_parse(buf + from_start_pos,
+						    pos - from_start_pos,
+						    &received_time,
+						    &sender) == 0) {
+					/* yep, we stop here. */
+					rstream->next_received_time =
+						received_time;
+					i_free(rstream->next_sender);
+					rstream->next_sender = sender;
+					rstream->eom = TRUE;
+
+                                        /* rewind "\nFrom " */
+					from_start_pos -= 6;
+
+					rstream->mail_size =
+						stream->istream.v_offset +
+						from_start_pos -
+						rstream->hdr_offset;
+					break;
+				}
+				from_start_pos = 0;
 			}
 		} else {
 			fromp = mbox_from;
@@ -208,27 +225,36 @@
 				fromp++;
 		}
 	}
-	new_pos = i - (fromp - mbox_from);
 
-	ret = new_pos <= stream->pos ? -1 :
-		(ssize_t) (pos - stream->pos);
-	stream->buffer = buf;
-	stream->pos = new_pos;
+	/* we want to go at least one byte further next time */
+	rstream->input_peak_offset = stream->istream.v_offset + i;
 
-	if (i < pos) {
-		/* beginning from From-line, try again
-		   FIXME: loops forever if we don't skip forward */
-		ret = 0;
+	if (from_start_pos != 0) {
+		/* we're waiting for the \n at the end of From-line */
+		new_pos = from_start_pos;
+	} else {
+		/* leave out the beginnings of potential From-line */
+		new_pos = i - (fromp - mbox_from);
 	}
+	i_assert(new_pos > stream->pos);
+	ret = new_pos - stream->pos;
 
+	stream->buffer = buf;
+	stream->pos = new_pos;
 	return ret;
 }
 
 static void _seek(struct _istream *stream, uoff_t v_offset)
 {
+	struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
+
 	stream->istream.v_offset = v_offset;
 	stream->skip = stream->pos = 0;
 	stream->buffer = NULL;
+
+        rstream->input_peak_offset = 0;
+	rstream->eom = FALSE;
+	rstream->next_eof = FALSE;
 }
 
 struct istream *i_stream_create_raw_mbox(pool_t pool, struct istream *input)
@@ -322,7 +348,7 @@
 	offset = stream->v_offset;
 	i_stream_seek(stream, rstream->hdr_offset);
 	while (rstream->body_offset == (uoff_t)-1) {
-		i_stream_get_data(rstream->input, &pos);
+		i_stream_get_data(stream, &pos);
 		i_stream_skip(stream, pos);
 
 		if (_read(&rstream->istream) < 0)
@@ -409,22 +435,33 @@
 	if (stream->v_offset != rstream->from_offset)
 		i_stream_seek(stream, rstream->from_offset);
 	i_stream_seek(rstream->input, rstream->from_offset);
+
+        rstream->input_peak_offset = 0;
+	rstream->eom = FALSE;
+	rstream->next_eof = FALSE;
 }
 
-void istream_raw_mbox_seek(struct istream *stream, uoff_t offset)
+int istream_raw_mbox_seek(struct istream *stream, uoff_t offset)
 {
 	struct raw_mbox_istream *rstream =
 		(struct raw_mbox_istream *)stream->real_stream;
+	int check;
+
+	rstream->corrupted = FALSE;
+	rstream->eom = FALSE;
+	rstream->next_eof = FALSE;
+        rstream->input_peak_offset = 0;
 
 	if (rstream->mail_size != (uoff_t)-1 &&
 	    rstream->hdr_offset + rstream->mail_size == offset) {
 		istream_raw_mbox_next(stream, (uoff_t)-1);
-		return;
+		return 0;
 	}
 
 	if (offset == rstream->from_offset) {
 		/* back to beginning of current message */
 		offset = rstream->hdr_offset;
+		check = offset == 0;
 	} else {
 		rstream->body_offset = (uoff_t)-1;
 		rstream->mail_size = (uoff_t)-1;
@@ -438,10 +475,15 @@
 
                 rstream->from_offset = offset;
 		rstream->hdr_offset = offset;
+		check = TRUE;
 	}
 
 	i_stream_seek(stream, offset);
 	i_stream_seek(rstream->input, offset);
+
+	if (check)
+		(void)_read(&rstream->istream);
+	return rstream->corrupted ? -1 : 0;
 }
 
 void istream_raw_mbox_flush(struct istream *stream)

Index: istream-raw-mbox.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-storage/index/mbox/istream-raw-mbox.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- istream-raw-mbox.h	14 Jun 2004 22:44:56 -0000	1.4
+++ istream-raw-mbox.h	16 Jun 2004 05:38:23 -0000	1.5
@@ -29,8 +29,9 @@
 void istream_raw_mbox_next(struct istream *stream, uoff_t body_size);
 
 /* Seek to message at given offset. offset must point to beginning of
-   "\nFrom ", or 0 for beginning of file. */
-void istream_raw_mbox_seek(struct istream *stream, uoff_t offset);
+   "\nFrom ", or 0 for beginning of file. Returns -1 if it offset doesn't
+   contain a valid From-line. */
+int istream_raw_mbox_seek(struct istream *stream, uoff_t offset);
 
 /* Flush all buffering. Call if you modify the mbox. */
 void istream_raw_mbox_flush(struct istream *stream);

Index: mbox-mail.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-storage/index/mbox/mbox-mail.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- mbox-mail.c	15 Jun 2004 01:15:44 -0000	1.5
+++ mbox-mail.c	16 Jun 2004 05:38:23 -0000	1.6
@@ -16,6 +16,7 @@
 {
 	struct index_mailbox *ibox = mail->ibox;
 	const void *data;
+	uint64_t offset;
 
 	if (ibox->mbox_lock_type == F_UNLCK) {
 		if (mbox_sync(ibox, FALSE, TRUE) < 0)
@@ -34,7 +35,14 @@
 		return -1;
 	}
 
-	istream_raw_mbox_seek(ibox->mbox_stream, *((const uint64_t *)data));
+	offset = *((const uint64_t *)data);
+	if (istream_raw_mbox_seek(ibox->mbox_stream, offset) < 0) {
+		mail_storage_set_critical(ibox->box.storage,
+			"Cached message offset %s is invalid for mbox file %s",
+			dec2str(offset), ibox->path);
+		mail_index_mark_corrupted(ibox->index);
+		return -1;
+	}
 	return 0;
 }
 

Index: mbox-sync-rewrite.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-storage/index/mbox/mbox-sync-rewrite.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- mbox-sync-rewrite.c	14 Jun 2004 23:35:17 -0000	1.13
+++ mbox-sync-rewrite.c	16 Jun 2004 05:38:23 -0000	1.14
@@ -54,6 +54,8 @@
 	const unsigned char *data;
 	void *p;
 
+	i_assert(size < SSIZE_T_MAX);
+
 	/* Append at the end of X-Keywords header,
 	   or X-UID if it doesn't exist */
 	pos = ctx->hdr_pos[MBOX_HDR_X_KEYWORDS] != (size_t)-1 ?
@@ -216,7 +218,7 @@
 	uint32_t old_prev_msg_uid;
 	uoff_t offset;
 
-	i_stream_seek(sync_ctx->file_input, mails[idx].offset);
+	i_stream_seek(sync_ctx->input, mails[idx].offset);
 
 	memset(&mail_ctx, 0, sizeof(mail_ctx));
 	mail_ctx.sync_ctx = sync_ctx;
@@ -228,10 +230,10 @@
 
 	/* mbox_sync_parse_next_mail() checks that UIDs are growing,
 	   so we have to fool it. */
-        old_prev_msg_uid = sync_ctx->prev_msg_uid;
-        sync_ctx->prev_msg_uid = mails[idx].uid-1;
+	old_prev_msg_uid = sync_ctx->prev_msg_uid;
+	sync_ctx->prev_msg_uid = mails[idx].uid-1;
 
-	mbox_sync_parse_next_mail(sync_ctx->file_input, &mail_ctx, TRUE);
+	mbox_sync_parse_next_mail(sync_ctx->input, &mail_ctx, TRUE);
 	if (mails[idx].space != 0)
 		mbox_sync_update_header_from(&mail_ctx, &mails[idx]);
 	else {
@@ -242,7 +244,7 @@
 	}
 
 	i_assert(mail_ctx.mail.space == mails[idx].space);
-        sync_ctx->prev_msg_uid = old_prev_msg_uid;
+	sync_ctx->prev_msg_uid = old_prev_msg_uid;
 
 	if (mail_ctx.mail.space <= 0)
 		mbox_sync_headers_add_space(&mail_ctx, extra_per_mail);
@@ -257,7 +259,7 @@
 	/* now we have to move it. first move the body of the message,
 	   then write the header and leave the extra space to beginning of
 	   headers. */
-	offset = sync_ctx->file_input->v_offset;
+	offset = sync_ctx->input->v_offset;
 	if (mbox_move(sync_ctx, offset + mails[idx+1].space, offset,
 		      *end_offset - offset - mails[idx+1].space) < 0)
 		return -1;
@@ -284,7 +286,8 @@
 	struct mbox_sync_mail_context mail_ctx;
 	uint32_t old_prev_msg_uid;
 
-	i_stream_seek(sync_ctx->file_input, mails[idx].offset);
+	i_assert(start_offset < end_offset);
+	i_stream_seek(sync_ctx->input, mails[idx].offset);
 
 	memset(&mail_ctx, 0, sizeof(mail_ctx));
 	mail_ctx.sync_ctx = sync_ctx;
@@ -299,12 +302,12 @@
         old_prev_msg_uid = sync_ctx->prev_msg_uid;
         sync_ctx->prev_msg_uid = mails[idx].uid-1;
 
-	mbox_sync_parse_next_mail(sync_ctx->file_input, &mail_ctx, TRUE);
+	mbox_sync_parse_next_mail(sync_ctx->input, &mail_ctx, TRUE);
 	mbox_sync_update_header_from(&mail_ctx, &mails[idx]);
 
         sync_ctx->prev_msg_uid = old_prev_msg_uid;
 
-	mbox_sync_headers_add_space(&mail_ctx, end_offset - start_offset);
+	mbox_sync_headers_add_space(&mail_ctx,end_offset - start_offset);
 
 	if (pwrite_full(sync_ctx->fd, str_data(mail_ctx.header),
 			str_len(mail_ctx.header), start_offset) < 0) {

Index: mbox-sync-update.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-storage/index/mbox/mbox-sync-update.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -d -r1.10 -r1.11
--- mbox-sync-update.c	14 Jun 2004 22:44:56 -0000	1.10
+++ mbox-sync-update.c	16 Jun 2004 05:38:23 -0000	1.11
@@ -105,6 +105,12 @@
 	old_hdr_size = ctx->body_offset - ctx->hdr_offset;
 	new_hdr_size = str_len(ctx->header);
 
+	if (new_hdr_size > 0 &&
+	    str_data(ctx->header)[new_hdr_size-1] != '\n') {
+		/* broken header - doesn't end with \n. fix it. */
+		str_append_c(ctx->header, '\n');
+	}
+
 	if (ctx->mail.uid == ctx->sync_ctx->first_uid &&
 	    ctx->hdr_pos[MBOX_HDR_X_IMAPBASE] == (size_t)-1) {
 		if (ctx->sync_ctx->base_uid_validity == 0) {

Index: mbox-sync.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-storage/index/mbox/mbox-sync.c,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -d -r1.24 -r1.25
--- mbox-sync.c	16 Jun 2004 02:12:00 -0000	1.24
+++ mbox-sync.c	16 Jun 2004 05:38:23 -0000	1.25
@@ -112,8 +112,13 @@
 	/* same as before. we'll have to fix mbox stream to contain
 	   correct from_offset, hdr_offset and body_offset. so, seek
 	   to from_offset and read through the header. */
-	istream_raw_mbox_seek(sync_ctx->input, old_from_offset);
-        (void)istream_raw_mbox_get_body_offset(sync_ctx->input);
+	if (istream_raw_mbox_seek(sync_ctx->input, old_from_offset) < 0) {
+		mail_storage_set_critical(ibox->box.storage,
+			"Message offset %s changed unexpectedly for mbox file "
+			"%s", dec2str(old_from_offset), sync_ctx->ibox->path);
+		return 0;
+	}
+	(void)istream_raw_mbox_get_body_offset(sync_ctx->input);
 	i_stream_seek(sync_ctx->input, old_offset);
 	return 1;
 }
@@ -299,8 +304,8 @@
 	if (rec != NULL && rec->uid != uid) {
 		/* new UID in the middle of the mailbox - shouldn't happen */
 		mail_storage_set_critical(sync_ctx->ibox->box.storage,
-			"mbox sync: UID inserted in the middle of mailbox "
-			"(%u > %u)", rec->uid, uid);
+			"mbox sync: UID inserted in the middle of mailbox %s "
+			"(%u > %u)", sync_ctx->ibox->path, rec->uid, uid);
 		mail_index_mark_corrupted(sync_ctx->ibox->index);
 		return -1;
 	}
@@ -620,7 +625,13 @@
 
         /* set to -1, since they're always increased later */
 	sync_ctx->seq = sync_ctx->idx_seq = seq-1;
-	istream_raw_mbox_seek(sync_ctx->input, offset);
+	if (istream_raw_mbox_seek(sync_ctx->input, offset) < 0) {
+		mail_storage_set_critical(sync_ctx->ibox->box.storage,
+			"Cached message offset %s is invalid for mbox file %s",
+			dec2str(offset), sync_ctx->ibox->path);
+		mail_index_mark_corrupted(sync_ctx->ibox->index);
+		return -1;
+	}
         (void)istream_raw_mbox_get_body_offset(sync_ctx->input);
 	return 0;
 }
@@ -634,9 +645,14 @@
 	uoff_t offset;
 	int ret, expunged;
 
-	if (min_message_count != 0)
-		istream_raw_mbox_seek(sync_ctx->input, 0);
-	else {
+	if (min_message_count != 0) {
+		if (istream_raw_mbox_seek(sync_ctx->input, 0) < 0) {
+			/* doesn't begin with a From-line */
+			mail_storage_set_error(sync_ctx->ibox->box.storage,
+				"Mailbox isn't a valid mbox file");
+			return -1;
+		}
+	} else {
 		/* we sync only what we need to. jump to first record that
 		   needs updating */
 		if (sync_ctx->sync_rec.uid1 == 0) {



More information about the dovecot-cvs mailing list