dovecot: Handle race conditions in mmap()ed transaction logs bet...

dovecot at dovecot.org dovecot at dovecot.org
Thu Jun 21 03:42:15 EEST 2007


details:   http://hg.dovecot.org/dovecot/rev/0c0a829a9a63
changeset: 5795:0c0a829a9a63
user:      Timo Sirainen <tss at iki.fi>
date:      Thu Jun 21 03:32:23 2007 +0300
description:
Handle race conditions in mmap()ed transaction logs better.

diffstat:

1 file changed, 97 insertions(+), 53 deletions(-)
src/lib-index/mail-transaction-log-file.c |  150 ++++++++++++++++++-----------

diffs (210 lines):

diff -r ea050869097b -r 0c0a829a9a63 src/lib-index/mail-transaction-log-file.c
--- a/src/lib-index/mail-transaction-log-file.c	Thu Jun 21 00:51:34 2007 +0300
+++ b/src/lib-index/mail-transaction-log-file.c	Thu Jun 21 03:32:23 2007 +0300
@@ -691,6 +691,7 @@ mail_transaction_log_file_sync(struct ma
 {
         const struct mail_transaction_header *hdr;
 	const void *data;
+	struct stat st;
 	size_t size, avail;
 	uint32_t trans_size = 0;
 
@@ -705,7 +706,7 @@ mail_transaction_log_file_sync(struct ma
 		trans_size = mail_index_offset_to_uint32(hdr->size);
 		if (trans_size == 0) {
 			/* unfinished */
-			return 0;
+			return 1;
 		}
 		if (trans_size < sizeof(*hdr)) {
 			mail_transaction_log_file_set_corrupted(file,
@@ -732,11 +733,30 @@ mail_transaction_log_file_sync(struct ma
 		   last record's size wasn't valid, we can't know if it will
 		   be updated unless we've locked the log.
 
-		   Without locking we can be sure only if we're not using
-		   mmaping, because with mmaping the data and the file size
-		   can get updated at any time. */
-		if (file->locked ||
-		    (trans_size != 0 && file->mmap_base == NULL)) {
+		   If the last record size was valid but there isn't enough
+		   data for it, it might or might not be an error:
+
+		   1. If we're locked, it's definitely an error.
+		   2. If not and we're using pread()s, it's also an error.
+		   3. If not and we're using mmap()s, we can't be sure yet.
+		   The data might have changed, so we'll need to fstat()
+		   to check if the file size had changed. If it did, return 0
+		   so the caller will re-mmap() and resync the file. */
+		if (file->locked || trans_size != 0) {
+			if (file->mmap_base != NULL && !file->locked) {
+				i_assert(trans_size != 0);
+
+				if (fstat(file->fd, &st) < 0) {
+					mail_index_file_set_syscall_error(
+						file->log->index,
+						file->filepath, "fstat()");
+					return -1;
+				}
+				if ((uoff_t)st.st_size != file->last_size) {
+					file->last_size = st.st_size;
+					return 0;
+				}
+			}
 			if (trans_size != 0) {
 				mail_transaction_log_file_set_corrupted(file,
 					"hdr.size too large (%u)", trans_size);
@@ -759,7 +779,7 @@ mail_transaction_log_file_sync(struct ma
 		return -1;
 	}
 
-	return 0;
+	return 1;
 }
 
 static int
@@ -848,8 +868,10 @@ mail_transaction_log_file_read(struct ma
 		return -1;
 	}
 
-	if (mail_transaction_log_file_sync(file) < 0)
-		return 0;
+	if ((ret = mail_transaction_log_file_sync(file)) <= 0) {
+		i_assert(ret != 0);
+		return 0;
+	}
 
 	i_assert(file->sync_offset >= file->buffer_offset);
 	buffer_set_used_size(file->buffer,
@@ -878,6 +900,54 @@ log_file_map_check_offsets(struct mail_t
 	}
 
 	return 1;
+}
+
+static int
+mail_transaction_log_file_mmap(struct mail_transaction_log_file *file)
+{
+	if (file->buffer != NULL) {
+		/* in case we just switched to mmaping */
+		buffer_free(file->buffer);
+	}
+	file->mmap_size = file->last_size;
+	file->mmap_base = mmap(NULL, file->mmap_size, PROT_READ, MAP_SHARED,
+			       file->fd, 0);
+	if (file->mmap_base == MAP_FAILED) {
+		file->mmap_base = NULL;
+		file->mmap_size = 0;
+		mail_index_file_set_syscall_error(file->log->index,
+						  file->filepath, "mmap()");
+		return -1;
+	}
+
+	if (file->mmap_size > mmap_get_page_size()) {
+		if (madvise(file->mmap_base, file->mmap_size,
+			    MADV_SEQUENTIAL) < 0) {
+			mail_index_file_set_syscall_error(file->log->index,
+				file->filepath, "madvise()");
+		}
+	}
+
+	file->buffer = buffer_create_const_data(default_pool,
+						file->mmap_base,
+						file->mmap_size);
+	file->buffer_offset = 0;
+	return 0;
+}
+
+static void
+mail_transaction_log_file_munmap(struct mail_transaction_log_file *file)
+{
+	if (file->mmap_base == NULL)
+		return;
+
+	if (munmap(file->mmap_base, file->mmap_size) < 0) {
+		mail_index_file_set_syscall_error(file->log->index,
+						  file->filepath, "munmap()");
+	}
+	file->mmap_base = NULL;
+	file->mmap_size = 0;
+	buffer_free(file->buffer);
 }
 
 int mail_transaction_log_file_map(struct mail_transaction_log_file *file,
@@ -947,58 +1017,32 @@ int mail_transaction_log_file_map(struct
 
 		if ((uoff_t)st.st_size == file->mmap_size) {
 			/* we already have the whole file mmaped */
-			if (mail_transaction_log_file_sync(file) < 0)
+			if ((ret = mail_transaction_log_file_sync(file)) < 0)
 				return 0;
-			return log_file_map_check_offsets(file, start_offset,
-							  end_offset);
-		}
-	}
-
-	if (file->mmap_base != NULL) {
-		if (munmap(file->mmap_base, file->mmap_size) < 0) {
-			mail_index_file_set_syscall_error(index, file->filepath,
-							  "munmap()");
-		}
-		file->mmap_base = NULL;
-		file->mmap_size = 0;
-		buffer_free(file->buffer);
+			if (ret > 0) {
+				return log_file_map_check_offsets(file,
+								  start_offset,
+								  end_offset);
+			}
+			/* size changed, re-mmap */
+		}
 	}
 
 	if (index->mmap_disable) {
+		mail_transaction_log_file_munmap(file);
+
 		ret = mail_transaction_log_file_read(file, start_offset);
 		if (ret <= 0)
 			return ret;
 	} else {
-		if (file->buffer != NULL) {
-			/* in case we just switched to mmaping */
-			buffer_free(file->buffer);
-		}
-		file->mmap_size = st.st_size;
-		file->mmap_base = mmap(NULL, file->mmap_size, PROT_READ,
-				       MAP_SHARED, file->fd, 0);
-		if (file->mmap_base == MAP_FAILED) {
-			file->mmap_base = NULL;
-			file->mmap_size = 0;
-			mail_index_file_set_syscall_error(index, file->filepath,
-							  "mmap()");
-			return -1;
-		}
-
-		if (file->mmap_size > mmap_get_page_size()) {
-			if (madvise(file->mmap_base, file->mmap_size,
-				    MADV_SEQUENTIAL) < 0) {
-				mail_index_file_set_syscall_error(index,
-					file->filepath, "madvise()");
-			}
-		}
-
-		file->buffer = buffer_create_const_data(default_pool,
-							file->mmap_base,
-							file->mmap_size);
-		file->buffer_offset = 0;
-
-		if (mail_transaction_log_file_sync(file) < 0)
-			return 0;
+		do {
+			mail_transaction_log_file_munmap(file);
+
+			if (mail_transaction_log_file_mmap(file) < 0)
+				return -1;
+			if ((ret = mail_transaction_log_file_sync(file)) < 0)
+				return 0;
+		} while (ret == 0);
 	}
 
 	return log_file_map_check_offsets(file, start_offset, end_offset);


More information about the dovecot-cvs mailing list