[dovecot-cvs] dovecot/src/lib-index mail-index-lock.c, 1.1, 1.2 mail-index-private.h, 1.1, 1.2 mail-index-reset.c, 1.1, 1.2 mail-index-sync-update.c, 1.1, 1.2 mail-index-sync.c, 1.1, 1.2 mail-index-transaction-private.h, 1.1, 1.2 mail-index-transaction.c, 1.1, 1.2 mail-index-view-private.h, 1.1, 1.2 mail-index-view-sync.c, 1.1, 1.2 mail-index-view.c, 1.1, 1.2 mail-index.c, 1.106, 1.107 mail-index.h, 1.98, 1.99 mail-transaction-log.c, 1.1, 1.2

cras at procontrol.fi cras at procontrol.fi
Wed Apr 28 03:21:03 EEST 2004


Update of /home/cvs/dovecot/src/lib-index
In directory talvi:/tmp/cvs-serv32334/lib-index

Modified Files:
	mail-index-lock.c mail-index-private.h mail-index-reset.c 
	mail-index-sync-update.c mail-index-sync.c 
	mail-index-transaction-private.h mail-index-transaction.c 
	mail-index-view-private.h mail-index-view-sync.c 
	mail-index-view.c mail-index.c mail-index.h 
	mail-transaction-log.c 
Log Message:
Forced locking to be right with mprotect()ing index file. Support for
disabling mmap for indexes, and disabling just mmap+write().



Index: mail-index-lock.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-lock.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-lock.c	27 Apr 2004 20:25:53 -0000	1.1
+++ mail-index-lock.c	28 Apr 2004 00:21:00 -0000	1.2
@@ -28,6 +28,7 @@
 */
 
 #include "lib.h"
+#include "mmap-util.h"
 #include "file-lock.h"
 #include "write-full.h"
 #include "mail-index-private.h"
@@ -46,7 +47,7 @@
 		mail_index_set_syscall_error(index, "close()");
 	index->fd = fd;
 
-	ret = fd < 0 ? mail_index_try_open(index) :
+	ret = fd < 0 ? mail_index_try_open(index, NULL) :
 		mail_index_map(index, FALSE);
 	if (ret <= 0) {
 		// FIXME: serious problem, we'll just crash later..
@@ -79,7 +80,6 @@
 			   unsigned int timeout_secs, int update_index,
 			   unsigned int *lock_id_r)
 {
-	// FIXME: mprotect() the index to make sure we don't access it unlocked!
 	int ret;
 
 	i_assert(lock_type == F_RDLCK || lock_type == F_WRLCK);
@@ -147,6 +147,17 @@
 		index->excl_lock_count++;
 		*lock_id_r = index->lock_id + 1;
 	}
+
+	if (index->map != NULL &&
+	    !MAIL_INDEX_MAP_IS_IN_MEMORY(index->map)) {
+		int prot = PROT_READ | (lock_type == F_WRLCK ? PROT_WRITE : 0);
+		if (mprotect(index->map->mmap_base,
+			     index->map->file_size, prot) < 0) {
+			mail_index_set_syscall_error(index, "mprotect()");
+			return -1;
+		}
+	}
+
 	return 1;
 }
 
@@ -178,9 +189,10 @@
 	if (fd == -1)
 		return -1;
 
-	ret = 0;
-	if (write_full(fd, index->map->mmap_base,
-		       index->map->mmap_used_size) < 0) {
+	ret = write_full(fd, index->map->hdr, sizeof(*index->map->hdr));
+	if (ret < 0 || write_full(fd, index->map->records,
+				  index->map->records_count *
+				  sizeof(struct mail_index_record)) < 0) {
 		mail_index_file_set_syscall_error(index, path, "write_full()");
 		(void)close(fd);
 		(void)unlink(path);
@@ -209,6 +221,43 @@
 	return 1;
 }
 
+static int mail_index_lock_exclusive_copy(struct mail_index *index)
+{
+	int fd;
+
+	i_assert(index->log_locked);
+
+	if (index->copy_lock_path != NULL) {
+		index->excl_lock_count++;
+		return 1;
+	}
+
+	/* copy the index to index.tmp and use it. when */
+	fd = mail_index_copy(index);
+	if (fd == -1)
+		return -1;
+
+	index->lock_type = F_WRLCK;
+        index->excl_lock_count++;
+
+	if (mail_index_reopen(index, fd) < 0) {
+		i_assert(index->excl_lock_count == 1);
+		i_free(index->copy_lock_path);
+		index->copy_lock_path = NULL;
+
+		/* go back to old index */
+		(void)mail_index_reopen(index, -1);
+
+		index->lock_type = F_UNLCK;
+		index->excl_lock_count = 0;
+		index->shared_lock_count = 0;
+		return -1;
+	}
+
+        i_assert(index->excl_lock_count == 1);
+	return 1;
+}
+
 int mail_index_lock_exclusive(struct mail_index *index,
 			      uint32_t log_file_seq, uoff_t log_file_offset,
 			      unsigned int *lock_id_r)
@@ -253,43 +302,18 @@
 	return mail_index_lock_exclusive_copy(index);
 }
 
-int mail_index_lock_exclusive_copy(struct mail_index *index)
-{
-	int fd;
-
-	if (index->copy_lock_path != NULL) {
-		index->excl_lock_count++;
-		return 1;
-	}
-
-	/* copy the index to index.tmp and use it. when */
-	fd = mail_index_copy(index);
-	if (fd == -1)
-		return -1;
-
-	if (mail_index_reopen(index, fd) < 0) {
-		(void)mail_index_reopen(index, -1);
-		i_free(index->copy_lock_path);
-		index->copy_lock_path = NULL;
-		return -1;
-	}
-
-	index->lock_type = F_WRLCK;
-        index->excl_lock_count++;
-	return 1;
-}
-
-static void mail_index_copy_lock_finish(struct mail_index *index)
+static int mail_index_copy_lock_finish(struct mail_index *index)
 {
 	if (fsync(index->fd) < 0) {
 		mail_index_file_set_syscall_error(index, index->copy_lock_path,
 						  "fsync()");
+		return -1;
 	}
 
 	if (rename(index->copy_lock_path, index->filepath) < 0) {
 		mail_index_set_error(index, "rename(%s, %s) failed: %m",
 				     index->copy_lock_path, index->filepath);
-		// FIXME: this isn't good
+		return -1;
 	}
 
 	i_free(index->copy_lock_path);
@@ -298,15 +322,33 @@
 	index->shared_lock_count = 0;
 	index->lock_id += 2;
 	index->lock_type = F_UNLCK;
+	return 0;
 }
 
 void mail_index_unlock(struct mail_index *index, unsigned int lock_id)
 {
-	if (index->copy_lock_path != NULL) {
+	if (index->copy_lock_path != NULL ||
+	    (index->map != NULL && index->map->write_to_disk)) {
 		i_assert(index->log_locked);
 		i_assert(index->excl_lock_count > 0);
-		if (--index->excl_lock_count == 0)
-			mail_index_copy_lock_finish(index);
+		i_assert(lock_id == index->lock_id+1);
+
+		if (--index->excl_lock_count == 0) {
+			if (index->map != NULL && index->map->write_to_disk) {
+				if (index->copy_lock_path != NULL) {
+					/* new mapping replaces the old */
+					(void)unlink(index->copy_lock_path);
+                                        i_free(index->copy_lock_path);
+                                        index->copy_lock_path = NULL;
+				}
+				if (mail_index_copy(index) < 0) {
+					mail_index_set_inconsistent(index);
+					return;
+				}
+			}
+			if (mail_index_copy_lock_finish(index) < 0)
+				mail_index_set_inconsistent(index);
+		}
 		return;
 	}
 
@@ -326,6 +368,12 @@
 	if (index->shared_lock_count == 0 && index->excl_lock_count == 0) {
 		index->lock_id += 2;
 		index->lock_type = F_UNLCK;
+		if (index->map != NULL) {
+			if (mprotect(index->map->mmap_base,
+				     index->map->file_size, PROT_NONE) < 0)
+				mail_index_set_syscall_error(index,
+							     "mprotect()");
+		}
 		if (file_wait_lock(index->fd, F_UNLCK) < 0)
 			mail_index_set_syscall_error(index, "file_wait_lock()");
 	}

Index: mail-index-private.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-private.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-private.h	27 Apr 2004 20:25:53 -0000	1.1
+++ mail-index-private.h	28 Apr 2004 00:21:00 -0000	1.2
@@ -18,6 +18,10 @@
 #define INDEX_COMPRESS_PERCENTAGE 50
 /* Compress the file when searching deleted records tree has to go this deep */
 #define INDEX_COMPRESS_DEPTH 10
+/* How many times to retry opening index files if read/fstat returns ESTALE.
+   This happens with NFS when the file has been deleted (ie. index file was
+   rewritten by another computer than us). */
+#define INDEX_ESTALE_RETRY_COUNT 10
 
 enum mail_index_mail_flags {
 	MAIL_INDEX_MAIL_FLAG_DIRTY = 0x80,
@@ -36,11 +40,15 @@
 	unsigned int records_count;
 
 	void *mmap_base;
-	size_t mmap_size, mmap_used_size;
+	size_t file_size, file_used_size;
 
 	buffer_t *buffer;
 
-        struct mail_index_header hdr_copy;
+	uint32_t log_file_seq;
+	uoff_t log_file_offset;
+
+	struct mail_index_header hdr_copy;
+	unsigned int write_to_disk:1;
 };
 
 struct mail_index {
@@ -69,7 +77,8 @@
 
 	unsigned int opened:1;
 	unsigned int log_locked:1;
-	unsigned int use_mmap:1;
+	unsigned int mmap_disable:1;
+	unsigned int mmap_no_write:1;
 	unsigned int readonly:1;
 	unsigned int fsck:1;
 };
@@ -79,7 +88,7 @@
 			    const struct mail_index_header *hdr);
 
 int mail_index_create(struct mail_index *index, struct mail_index_header *hdr);
-int mail_index_try_open(struct mail_index *index);
+int mail_index_try_open(struct mail_index *index, unsigned int *lock_id_r);
 int mail_index_create_tmp_file(struct mail_index *index, const char **path_r);
 
 /* Returns 0 = ok, -1 = error. If update_index is TRUE, reopens the index
@@ -91,7 +100,6 @@
 int mail_index_lock_exclusive(struct mail_index *index,
 			      uint32_t log_file_seq, uoff_t log_file_offset,
 			      unsigned int *lock_id_r);
-int mail_index_lock_exclusive_copy(struct mail_index *index);
 void mail_index_unlock(struct mail_index *index, unsigned int lock_id);
 /* Returns 1 if given lock_id is valid, 0 if not. */
 int mail_index_is_locked(struct mail_index *index, unsigned int lock_id);
@@ -118,6 +126,7 @@
 			    const struct mail_transaction_header *hdr,
 			    const void *data, size_t *data_offset);
 
+void mail_index_set_inconsistent(struct mail_index *index);
 int mail_index_mark_corrupted(struct mail_index *index);
 
 int mail_index_set_error(struct mail_index *index, const char *fmt, ...)

Index: mail-index-reset.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-reset.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-reset.c	27 Apr 2004 20:25:53 -0000	1.1
+++ mail-index-reset.c	28 Apr 2004 00:21:00 -0000	1.2
@@ -10,9 +10,6 @@
 {
 	struct mail_index_header hdr;
 
-	/* this invalidates all views even if we fail later */
-	index->indexid = 0;
-
 	if (mail_index_mark_corrupted(index) < 0)
 		return -1;
 

Index: mail-index-sync-update.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-sync-update.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-sync-update.c	27 Apr 2004 20:25:53 -0000	1.1
+++ mail-index-sync-update.c	28 Apr 2004 00:21:00 -0000	1.2
@@ -97,24 +97,31 @@
 
 static int mail_index_grow(struct mail_index *index, unsigned int count)
 {
-	size_t size, mmap_used_size;
+	struct mail_index_map *map = index->map;
+	size_t size, file_used_size;
 	unsigned int records_count;
 
+	if (MAIL_INDEX_MAP_IS_IN_MEMORY(map)) {
+		(void)buffer_append_space_unsafe(map->buffer,
+			count * sizeof(struct mail_index_record));
+		return 0;
+	}
+
 	// FIXME: grow exponentially
-	size = index->map->mmap_used_size +
+	size = map->file_used_size +
 		count * sizeof(struct mail_index_record);
 	if (file_set_size(index->fd, (off_t)size) < 0)
 		return mail_index_set_syscall_error(index, "file_set_size()");
 
-	records_count = index->map->records_count;
-	mmap_used_size = index->map->mmap_used_size;
+	records_count = map->records_count;
+	file_used_size = map->file_used_size;
 
 	if (mail_index_map(index, TRUE) <= 0)
 		return -1;
 
-	i_assert(index->map->mmap_size >= size);
-	index->map->records_count = records_count;
-	index->map->mmap_used_size = mmap_used_size;
+	i_assert(map->file_size >= size);
+	map->records_count = records_count;
+	map->file_used_size = file_used_size;
 	return 0;
 }
 
@@ -127,20 +134,10 @@
 	size_t space;
 	uint32_t next_uid;
 
-	if (!ctx->index->use_mmap) {
-		// FIXME
-	}
-
-	space = (map->mmap_size - map->mmap_used_size) / sizeof(*appends);
+	space = (map->file_size - map->file_used_size) / sizeof(*appends);
 	if (space < count) {
 		if (mail_index_grow(ctx->index, count) < 0)
 			return -1;
-
-		if (mprotect(map->mmap_base, map->mmap_size,
-			     PROT_READ|PROT_WRITE) < 0) {
-			mail_index_set_syscall_error(ctx->index, "mprotect()");
-			return -1;
-		}
 	}
 
 	next_uid = ctx->hdr.next_uid;
@@ -165,7 +162,7 @@
 	memcpy(map->records + map->records_count, appends,
 	       count * sizeof(*appends));
 	map->records_count += count;
-	map->mmap_used_size += count * sizeof(struct mail_index_record);
+	map->file_used_size += count * sizeof(struct mail_index_record);
 	return 0;
 }
 
@@ -179,16 +176,21 @@
 	unsigned int append_count;
 	uint32_t count, file_seq, src_idx, dest_idx;
 	uoff_t file_offset;
-	int ret, locked = FALSE;
-
-	if (mprotect(map->mmap_base, map->mmap_size, PROT_READ|PROT_WRITE) < 0)
-		return mail_index_set_syscall_error(index, "mprotect()");
+	int ret;
 
 	/* rewind */
 	sync_ctx->update_idx = sync_ctx->expunge_idx = 0;
 	sync_ctx->sync_appends =
 		buffer_get_used_size(sync_ctx->appends_buf) != 0;
 
+	if (!mail_index_sync_have_more(sync_ctx)) {
+		/* nothing to sync */
+		return 0;
+	}
+
+	if (MAIL_INDEX_MAP_IS_IN_MEMORY(map))
+		map->write_to_disk = TRUE;
+
 	memset(&ctx, 0, sizeof(ctx));
 	ctx.index = index;
 	ctx.hdr = *index->hdr;
@@ -204,24 +206,25 @@
 			appends = rec.appends;
 			break;
 		case MAIL_INDEX_SYNC_TYPE_EXPUNGE:
-			if (src_idx != 0) {
+			if (src_idx == 0) {
+				/* expunges have to be atomic. so we'll have
+				   to copy the mapping, do the changes there
+				   and then finally replace the whole index
+				   file. to avoid extra disk I/O we copy the
+				   index into memory rather than to temporary
+				   file */
+				map = mail_index_map_to_memory(map);
+				mail_index_unmap(index, index->map);
+				index->map = map;
+				map->write_to_disk = TRUE;
+
+				dest_idx = rec.seq1-1;
+			} else {
 				count = (rec.seq1-1) - src_idx;
 				memmove(map->records + dest_idx,
 					map->records + src_idx,
 					count * sizeof(*map->records));
 				dest_idx += count;
-			} else {
-				dest_idx = rec.seq1-1;
-				if (mail_index_lock_exclusive_copy(index) <= 0)
-					return -1;
-				map = index->map;
-				if (mprotect(map->mmap_base, map->mmap_size,
-					     PROT_READ|PROT_WRITE) < 0) {
-					mail_index_set_syscall_error(index,
-						"mprotect()");
-					return -1;
-				}
-				locked = TRUE;
 			}
 
 			mail_index_sync_update_expunges(&ctx, rec.seq1,
@@ -242,7 +245,7 @@
 		dest_idx += count;
 
 		map->records_count = dest_idx;
-		map->mmap_used_size = index->hdr->header_size +
+		map->file_used_size = index->hdr->header_size +
 			map->records_count * sizeof(struct mail_index_record);
 	}
 
@@ -256,18 +259,11 @@
 	ctx.hdr.log_file_seq = file_seq;
 	ctx.hdr.log_file_offset = file_offset;
 
-	if (index->use_mmap) {
+	if (!MAIL_INDEX_MAP_IS_IN_MEMORY(map)) {
 		memcpy(map->mmap_base, &ctx.hdr, sizeof(ctx.hdr));
-		if (msync(map->mmap_base, map->mmap_used_size, MS_SYNC) < 0)
+		if (msync(map->mmap_base, map->file_used_size, MS_SYNC) < 0)
 			return mail_index_set_syscall_error(index, "msync()");
-	} else {
-		// FIXME
 	}
 
-	if (mprotect(map->mmap_base, map->mmap_size, PROT_READ) < 0)
-		mail_index_set_syscall_error(index, "mprotect()");
-
-	if (locked)
-		mail_index_unlock(index, 0);
 	return ret;
 }

Index: mail-index-sync.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-sync.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-sync.c	27 Apr 2004 20:25:53 -0000	1.1
+++ mail-index-sync.c	28 Apr 2004 00:21:00 -0000	1.2
@@ -365,6 +365,13 @@
 	return 0;
 }
 
+int mail_index_sync_have_more(struct mail_index_sync_ctx *ctx)
+{
+	return (ctx->update_idx != ctx->updates_count) ||
+		(ctx->expunge_idx != ctx->expunges_count) ||
+		ctx->sync_appends;
+}
+
 int mail_index_sync_end(struct mail_index_sync_ctx *ctx)
 {
 	uint32_t seq;

Index: mail-index-transaction-private.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-transaction-private.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-transaction-private.h	27 Apr 2004 20:25:53 -0000	1.1
+++ mail-index-transaction-private.h	28 Apr 2004 00:21:00 -0000	1.2
@@ -5,7 +5,7 @@
 	struct mail_index_view *view;
 
         buffer_t *appends;
-	uint32_t first_new_seq, last_new_seq, next_uid;
+	uint32_t first_new_seq, last_new_seq;
 
 	buffer_t *expunges;
 

Index: mail-index-transaction.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-transaction.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-transaction.c	27 Apr 2004 20:25:53 -0000	1.1
+++ mail-index-transaction.c	28 Apr 2004 00:21:00 -0000	1.2
@@ -19,7 +19,6 @@
 	t = i_new(struct mail_index_transaction, 1);
 	t->view = view;
 	t->hide_transaction = hide;
-	t->next_uid = view->index->hdr->next_uid;
 	return t;
 }
 
@@ -127,8 +126,6 @@
 {
         struct mail_index_record *rec;
 
-	i_assert(uid >= t->next_uid);
-
 	if (t->appends == NULL) {
 		t->appends = buffer_create_dynamic(default_pool,
 						   4096, (size_t)-1);
@@ -146,8 +143,6 @@
 	rec = buffer_append_space_unsafe(t->appends, sizeof(*rec));
 	memset(rec, 0, sizeof(*rec));
 	rec->uid = uid;
-
-	t->next_uid = uid+1;
 }
 
 void mail_index_expunge(struct mail_index_transaction *t, uint32_t seq)

Index: mail-index-view-private.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-view-private.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-view-private.h	27 Apr 2004 20:25:53 -0000	1.1
+++ mail-index-view-private.h	28 Apr 2004 00:21:00 -0000	1.2
@@ -22,6 +22,7 @@
 };
 
 int mail_index_view_lock(struct mail_index_view *view, int update_index);
+int mail_index_view_lock_head(struct mail_index_view *view, int update_index);
 void mail_index_view_add_synced_transaction(struct mail_index_view *view,
 					    uint32_t log_file_seq,
 					    uoff_t log_file_offset);

Index: mail-index-view-sync.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-view-sync.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-view-sync.c	27 Apr 2004 20:25:53 -0000	1.1
+++ mail-index-view-sync.c	28 Apr 2004 00:21:00 -0000	1.2
@@ -69,7 +69,7 @@
 	i_assert(view->transactions == 0);
 	i_assert(!view->syncing);
 
-	if (mail_index_view_lock(view, TRUE) < 0)
+	if (mail_index_view_lock_head(view, TRUE) < 0)
 		return -1;
 
 	hdr = view->index->hdr;

Index: mail-index-view.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-view.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-view.c	27 Apr 2004 20:25:53 -0000	1.1
+++ mail-index-view.c	28 Apr 2004 00:21:00 -0000	1.2
@@ -17,8 +17,8 @@
 	view->map = index->map;
 	view->map->refcount++;
 
-	view->log_file_seq = view->index->hdr->log_file_seq;
-	view->log_file_offset = view->index->hdr->log_file_offset;
+	view->log_file_seq = view->map->log_file_seq;
+	view->log_file_offset = view->map->log_file_offset;
 	return view;
 }
 
@@ -33,16 +33,9 @@
 	i_free(view);
 }
 
-static int
-mail_index_view_lock_head(struct mail_index_view *view, int update_index)
+int mail_index_view_lock_head(struct mail_index_view *view, int update_index)
 {
 	if (!mail_index_is_locked(view->index, view->lock_id)) {
-		if (view->index->indexid != view->map->hdr->indexid) {
-			/* index was rebuilt */
-			view->inconsistent = TRUE;
-			return -1;
-		}
-
 		if (mail_index_lock_shared(view->index, update_index,
 					   &view->lock_id) < 0)
 			return -1;
@@ -51,6 +44,12 @@
 			view->inconsistent = TRUE;
 			return -1;
 		}
+
+		if (view->index->indexid != view->map->hdr->indexid) {
+			/* index was rebuilt */
+			view->inconsistent = TRUE;
+			return -1;
+		}
 	}
 
 	return 0;
@@ -104,10 +103,14 @@
 	view->transactions--;
 }
 
-const struct mail_index_header *
-mail_index_get_header(struct mail_index_view *view)
+int mail_index_get_header(struct mail_index_view *view,
+			  const struct mail_index_header **hdr_r)
 {
-	return view->map->hdr;
+	if (mail_index_view_lock(view, FALSE) < 0)
+		return -1;
+
+	*hdr_r = view->map->hdr;
+	return 0;
 }
 
 int mail_index_lookup(struct mail_index_view *view, uint32_t seq,
@@ -226,6 +229,9 @@
 		return 0;
 	}
 
+	if (last_uid >= view->map->hdr->next_uid)
+		last_uid = view->map->hdr->next_uid-1;
+
 	/* optimization - binary lookup only from right side: */
 	*last_seq_r = mail_index_bsearch_uid(view, last_uid, &left_idx, -1);
 	i_assert(*last_seq_r >= *first_seq_r);

Index: mail-index.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index.c,v
retrieving revision 1.106
retrieving revision 1.107
diff -u -d -r1.106 -r1.107
--- mail-index.c	27 Apr 2004 20:25:53 -0000	1.106
+++ mail-index.c	28 Apr 2004 00:21:00 -0000	1.107
@@ -4,6 +4,7 @@
 #include "buffer.h"
 #include "file-lock.h"
 #include "mmap-util.h"
+#include "read-full.h"
 #include "write-full.h"
 #include "mail-index-private.h"
 #include "mail-transaction-log.h"
@@ -11,6 +12,9 @@
 #include <stdio.h>
 #include <stddef.h>
 #include <time.h>
+#include <sys/stat.h>
+
+static int mail_index_try_open_only(struct mail_index *index);
 
 struct mail_index *mail_index_alloc(const char *dir, const char *prefix)
 {
@@ -35,23 +39,22 @@
 }
 
 static int mail_index_check_quick_header(struct mail_index *index,
-					 struct mail_index_map *map,
-					 const struct mail_index_header *hdr)
+					 struct mail_index_map *map)
 {
-	if ((hdr->flags & MAIL_INDEX_HDR_FLAG_CORRUPTED) != 0) {
+	if ((map->hdr->flags & MAIL_INDEX_HDR_FLAG_CORRUPTED) != 0) {
 		/* either a crash or we've already complained about it */
 		return -1;
 	}
 
-	if (map->mmap_used_size > map->mmap_size) {
+	if (map->file_used_size > map->file_size) {
 		map->records_count =
-			(map->mmap_size - hdr->header_size) /
+			(map->file_size - map->hdr->header_size) /
 			sizeof(struct mail_index_record);
-		map->mmap_used_size = map->mmap_size;
+		map->file_used_size = map->file_size;
 
 		mail_index_set_error(index, "Corrupted index file %s: "
 				     "messages_count too large (%u > %u)",
-				     index->filepath, hdr->messages_count,
+				     index->filepath, map->hdr->messages_count,
 				     map->records_count);
 		return 0;
 	}
@@ -60,9 +63,9 @@
 }
 
 static int mail_index_check_header(struct mail_index *index,
-				   struct mail_index_map *map,
-				   const struct mail_index_header *hdr)
+				   struct mail_index_map *map)
 {
+	const struct mail_index_header *hdr = map->hdr;
 	unsigned char compat_data[3];
 	int ret;
 
@@ -83,7 +86,7 @@
 		return -1;
 	}
 
-	if ((ret = mail_index_check_quick_header(index, map, hdr)) <= 0)
+	if ((ret = mail_index_check_quick_header(index, map)) <= 0)
 		return ret;
 
 	/* following some extra checks that only take a bit of CPU */
@@ -107,47 +110,144 @@
 	return 1;
 }
 
-void mail_index_unmap(struct mail_index *index, struct mail_index_map *map)
+static void mail_index_map_clear(struct mail_index *index,
+				 struct mail_index_map *map)
 {
-	if (--map->refcount > 0)
-		return;
-
 	if (map->buffer != NULL) {
 		i_assert(map->mmap_base == NULL);
 		buffer_free(map->buffer);
-	} else {
+		map->buffer = NULL;
+	} else if (map->mmap_base != NULL) {
 		i_assert(map->buffer == NULL);
-		if (munmap(map->mmap_base, map->mmap_size) < 0)
+		if (munmap(map->mmap_base, map->file_size) < 0)
 			mail_index_set_syscall_error(index, "munmap()");
+		map->mmap_base = NULL;
+	}
+
+	if (map->refcount > 0) {
+		map->file_size = 0;
+		map->file_used_size = 0;
+		map->hdr = NULL;
+		map->records = NULL;
+		map->records_count = 0;
 	}
+}
+
+void mail_index_unmap(struct mail_index *index, struct mail_index_map *map)
+{
+	if (--map->refcount > 0)
+		return;
+
+	i_assert(map->refcount == 0);
+	mail_index_map_clear(index, map);
 	i_free(map);
 }
 
-int mail_index_map(struct mail_index *index, int force)
+static void mail_index_unmap_forced(struct mail_index *index,
+				    struct mail_index_map *map)
 {
-	const struct mail_index_header *hdr;
-	struct mail_index_map *map;
-	size_t used_size;
+	mail_index_map_clear(index, map);
+	mail_index_unmap(index, map);
+}
+
+static int mail_index_read_map(struct mail_index *index,
+			       struct mail_index_map *map)
+{
+	struct stat st;
+	void *data;
+	size_t file_size;
 	int ret;
 
-	if (!index->use_mmap) {
-		// FIXME
+	if (fstat(index->fd, &st) < 0) {
+		if (errno == ESTALE)
+			return 0;
+		mail_index_set_syscall_error(index, "fstat()");
 		return -1;
 	}
+	file_size = st.st_size;
+
+	if (map->buffer == NULL) {
+		map->buffer = buffer_create_dynamic(default_pool,
+						    file_size, (size_t)-1);
+	}
+
+	/* @UNSAFE */
+	buffer_set_used_size(map->buffer, 0);
+	data = buffer_append_space_unsafe(map->buffer, file_size);
+	ret = pread_full(index->fd, data, file_size, 0);
+	if (ret < 0) {
+		if (errno == ESTALE)
+			return 0;
+		mail_index_set_syscall_error(index, "pread_full()");
+		return -1;
+	}
+	if (ret == 0) {
+		mail_index_set_error(index,
+			"Unexpected EOF while reading index file");
+		return -1;
+	}
+
+	map->file_size = file_size;
+	return 1;
+}
+
+static int mail_index_read_map_with_retry(struct mail_index *index,
+					  struct mail_index_map *map)
+{
+	int i, ret;
+
+	for (i = 0; i < INDEX_ESTALE_RETRY_COUNT; i++) {
+		ret = mail_index_read_map(index, map);
+		if (ret != 0)
+			return ret;
+
+		/* ESTALE - reopen index file */
+		if (close(index->fd) < 0)
+			mail_index_set_syscall_error(index, "close()");
+		index->fd = -1;
+
+		ret = mail_index_try_open_only(index);
+		if (ret <= 0) {
+			if (ret == 0) {
+				/* the file was lost */
+				errno = ENOENT;
+				mail_index_set_syscall_error(index, "open()");
+			}
+			return -1;
+		}
+	}
+
+	/* Too many ESTALE retries */
+	mail_index_set_syscall_error(index, "read_map()");
+	return -1;
+}
+
+int mail_index_map(struct mail_index *index, int force)
+{
+	struct mail_index_map *map;
+	size_t used_size;
+	void *base;
+	int ret;
 
 	if (index->map != NULL) {
 		map = index->map;
 
 		/* see if re-mmaping is needed (file has grown) */
-		hdr = map->mmap_base;
-                used_size = hdr->header_size +
-			hdr->messages_count * sizeof(struct mail_index_record);
-		if (map->mmap_size >= used_size && !force)
+                used_size = map->hdr->header_size +
+			map->hdr->messages_count *
+			sizeof(struct mail_index_record);
+		if (map->file_size >= used_size && !force) {
+			/* update log file position in case it has changed */
+			map->log_file_seq = map->hdr->log_file_seq;
+			map->log_file_offset = map->hdr->log_file_offset;
 			return 1;
+		}
 
-		if (munmap(map->mmap_base, map->mmap_size) < 0)
-			mail_index_set_syscall_error(index, "munmap()");
-		map->mmap_base = NULL;
+		if (map->mmap_base != NULL) {
+			if (munmap(map->mmap_base, map->file_size) < 0)
+				mail_index_set_syscall_error(index, "munmap()");
+			map->mmap_base = NULL;
+		}
 	} else {
 		map = i_new(struct mail_index_map, 1);
 		map->refcount = 1;
@@ -156,44 +256,63 @@
 	index->hdr = NULL;
 	index->map = NULL;
 
-	map->mmap_base = mmap_ro_file(index->fd, &map->mmap_size);
-	if (map->mmap_base == MAP_FAILED) {
-		map->mmap_base = NULL;
-		mail_index_set_syscall_error(index, "mmap()");
-		mail_index_unmap(index, map);
-		return -1;
+	/* make sure if we fail we don't try to access anything outside the
+	   buffer */
+	map->file_size = 0;
+	map->file_used_size = 0;
+
+	if (!index->mmap_disable) {
+		map->mmap_base = index->lock_type != F_WRLCK ?
+			mmap_ro_file(index->fd, &map->file_size) :
+			mmap_rw_file(index->fd, &map->file_size);
+		if (map->mmap_base == MAP_FAILED) {
+			map->mmap_base = NULL;
+			mail_index_set_syscall_error(index, "mmap()");
+			mail_index_unmap_forced(index, map);
+			return -1;
+		}
+	} else {
+		if (mail_index_read_map_with_retry(index, map) < 0) {
+			mail_index_unmap_forced(index, map);
+			return -1;
+		}
 	}
 
-	if (map->mmap_size < MAIL_INDEX_HEADER_MIN_SIZE) {
+	if (map->file_size < MAIL_INDEX_HEADER_MIN_SIZE) {
 		mail_index_set_error(index, "Corrupted index file %s: "
 				     "File too small (%"PRIuSIZE_T")",
-				     index->filepath, map->mmap_size);
-		mail_index_unmap(index, map);
+				     index->filepath, map->file_size);
+		mail_index_unmap_forced(index, map);
 		return 0;
 	}
 
-	hdr = map->mmap_base;
-	if (hdr->header_size < sizeof(*hdr)) {
+	base = !MAIL_INDEX_MAP_IS_IN_MEMORY(map) ? map->mmap_base :
+		buffer_get_modifyable_data(map->buffer, NULL);
+	map->hdr = base;
+
+	if (map->hdr->header_size < sizeof(*map->hdr)) {
 		/* header smaller than ours, make a copy so our newer headers
 		   won't have garbage in them */
-		memcpy(&map->hdr_copy, hdr, hdr->header_size);
-		hdr = &map->hdr_copy;
+		memcpy(&map->hdr_copy, map->hdr, map->hdr->header_size);
+		map->hdr = &map->hdr_copy;
 	}
 
-	map->hdr = map->mmap_base;
-	map->records = PTR_OFFSET(map->mmap_base, hdr->header_size);
-	map->records_count = hdr->messages_count;
-	map->mmap_used_size = hdr->header_size +
+	map->records = PTR_OFFSET(base, map->hdr->header_size);
+	map->records_count = map->hdr->messages_count;
+	map->file_used_size = map->hdr->header_size +
 		map->records_count * sizeof(struct mail_index_record);
 
-	ret = mail_index_check_header(index, map, hdr);
+	ret = mail_index_check_header(index, map);
 	if (ret < 0) {
-		mail_index_unmap(index, map);
+		mail_index_unmap_forced(index, map);
 		return 0;
 	}
 	if (ret == 0)
 		index->fsck = TRUE;
 
+	map->log_file_seq = map->hdr->log_file_seq;
+	map->log_file_offset = map->hdr->log_file_offset;
+
 	index->hdr = map->mmap_base;
 	index->map = map;
 	return 1;
@@ -252,22 +371,10 @@
 int mail_index_write_header(struct mail_index *index,
 			    const struct mail_index_header *hdr)
 {
-	if (index->use_mmap) {
-		if (mprotect(index->map->mmap_base, sizeof(*hdr),
-			     PROT_READ | PROT_WRITE) < 0) {
-			mail_index_set_syscall_error(index, "mprotect()");
-			return -1;
-		}
-
+	if (!MAIL_INDEX_MAP_IS_IN_MEMORY(index->map)) {
 		memcpy(index->map->mmap_base, hdr, sizeof(*hdr));
 		if (msync(index->map->mmap_base, sizeof(*hdr), MS_SYNC) < 0)
 			return mail_index_set_syscall_error(index, "msync()");
-
-		if (mprotect(index->map->mmap_base, sizeof(*hdr),
-			     PROT_READ) < 0) {
-			mail_index_set_syscall_error(index, "mprotect()");
-			return -1;
-		}
 	} else {
 		if (pwrite_full(index->fd, hdr, sizeof(*hdr), 0) < 0) {
 			mail_index_set_syscall_error(index, "pwrite_full()");
@@ -314,7 +421,7 @@
 	hdr->log_file_seq = seq;
 	hdr->log_file_offset = offset;
 
-	ret = mail_index_try_open(index);
+	ret = mail_index_try_open(index, NULL);
 	if (ret != 0) {
 		mail_transaction_log_sync_unlock(index->log);
 		return ret;
@@ -358,15 +465,24 @@
 	return 1;
 }
 
-int mail_index_try_open(struct mail_index *index)
+static int mail_index_try_open_only(struct mail_index *index)
 {
-	unsigned int lock_id;
-	int ret;
+	int i;
 
-	index->fd = open(index->filepath, O_RDWR);
-	if (index->fd == -1 && errno == EACCES) {
-		index->fd = open(index->filepath, O_RDONLY);
-		index->readonly = TRUE;
+	for (i = 0; i < 3; i++) {
+		index->fd = open(index->filepath, O_RDWR);
+		if (index->fd == -1 && errno == EACCES) {
+			index->fd = open(index->filepath, O_RDONLY);
+			index->readonly = TRUE;
+		}
+		if (index->fd != -1 || errno != ESTALE)
+			break;
+
+		/* May happen with some OSes with NFS. Try again, although
+		   there's still a race condition with another computer
+		   creating the index file again. However, we can't try forever
+		   as ESTALE happens also if index directory has been deleted
+		   from server.. */
 	}
 	if (index->fd == -1) {
 		if (errno != ENOENT)
@@ -374,29 +490,51 @@
 
 		/* have to create it */
 		return 0;
-	} else {
-		if (mail_index_lock_shared(index, FALSE, &lock_id) < 0)
-			return -1;
-		ret = mail_index_map(index, FALSE);
-		mail_index_unlock(index, lock_id);
+	}
+	return 1;
+}
 
-		if (ret == 0) {
-			/* it's corrupted - recreate it */
-			(void)close(index->fd);
-			index->fd = -1;
-		}
+int mail_index_try_open(struct mail_index *index, unsigned int *lock_id_r)
+{
+	unsigned int lock_id;
+	int ret;
+
+	if (lock_id_r != NULL)
+		*lock_id_r = 0;
+
+	ret = mail_index_try_open_only(index);
+	if (ret <= 0)
 		return ret;
+
+	if (mail_index_lock_shared(index, FALSE, &lock_id) < 0)
+		return -1;
+	ret = mail_index_map(index, FALSE);
+	if (ret == 0) {
+		/* it's corrupted - recreate it */
+		mail_index_unlock(index, lock_id);
+		if (lock_id_r != NULL)
+			*lock_id_r = 0;
+
+		(void)close(index->fd);
+		index->fd = -1;
+	} else {
+		if (lock_id_r != NULL)
+			*lock_id_r = lock_id;
+		else
+			mail_index_unlock(index, lock_id);
 	}
+	return ret;
 }
 
 static int
 mail_index_open2(struct mail_index *index, enum mail_index_open_flags flags)
 {
-        struct mail_index_header hdr;
+	struct mail_index_header hdr;
+	unsigned int lock_id = 0;
 	int ret;
 
-	ret = mail_index_try_open(index);
-	if (ret == 1)
+	ret = mail_index_try_open(index, &lock_id);
+	if (ret > 0)
 		hdr = *index->hdr;
 	else if (ret == 0) {
 		/* doesn't exist, or corrupted */
@@ -412,6 +550,9 @@
 	index->log = mail_transaction_log_open_or_create(index);
 	if (index->log == NULL)
 		return -1;
+
+	if (lock_id != 0)
+		mail_index_unlock(index, lock_id);
 	return index->fd != -1 ? 1 : mail_index_create(index, &hdr);
 }
 
@@ -430,7 +571,10 @@
 		index->nodiskspace = FALSE;
 		index->index_lock_timeout = FALSE;
 		index->log_locked = FALSE;
-		index->use_mmap = (flags & MAIL_INDEX_OPEN_FLAG_NO_MMAP) == 0;
+		index->mmap_disable =
+			(flags & MAIL_INDEX_OPEN_FLAG_MMAP_DISABLE) != 0;
+		index->mmap_no_write =
+			(flags & MAIL_INDEX_OPEN_FLAG_MMAP_NO_WRITE) != 0;
 		index->readonly = FALSE;
 
 		index->filepath = i_strconcat(index->dir, "/",
@@ -508,6 +652,11 @@
 	return -1;
 }
 
+void mail_index_set_inconsistent(struct mail_index *index)
+{
+	index->indexid = 0;
+}
+
 int mail_index_mark_corrupted(struct mail_index *index)
 {
 	struct mail_index_header hdr;
@@ -516,6 +665,15 @@
 	    (index->hdr->flags & MAIL_INDEX_HDR_FLAG_CORRUPTED) != 0)
 		return 0;
 
+	/* make sure we can write the header */
+	if (!MAIL_INDEX_MAP_IS_IN_MEMORY(index->map)) {
+		if (mprotect(index->map->mmap_base, sizeof(hdr),
+			     PROT_READ | PROT_WRITE) < 0) {
+			mail_index_set_syscall_error(index, "mprotect()");
+			return -1;
+		}
+	}
+
 	hdr = *index->hdr;
 	hdr.flags |= MAIL_INDEX_HDR_FLAG_CORRUPTED;
 	if (mail_index_write_header(index, &hdr) < 0)
@@ -523,6 +681,8 @@
 
 	if (fsync(index->fd) < 0)
 		return mail_index_set_syscall_error(index, "fsync()");
+
+	mail_index_set_inconsistent(index);
 	return 0;
 }
 

Index: mail-index.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index.h,v
retrieving revision 1.98
retrieving revision 1.99
diff -u -d -r1.98 -r1.99
--- mail-index.h	27 Apr 2004 20:25:53 -0000	1.98
+++ mail-index.h	28 Apr 2004 00:21:00 -0000	1.99
@@ -19,9 +19,13 @@
 	   delay opening cache/log files unless they're needed. */
 	MAIL_INDEX_OPEN_FLAG_FAST		= 0x02,
 	/* Don't try to mmap() index files */
-	MAIL_INDEX_OPEN_FLAG_NO_MMAP		= 0x04,
+	MAIL_INDEX_OPEN_FLAG_MMAP_DISABLE	= 0x04,
+	/* Don't try to write() to mmap()ed index files. Required for the few
+	   OSes that don't have unified buffer cache
+	   (currently OpenBSD <= 3.5) */
+	MAIL_INDEX_OPEN_FLAG_MMAP_NO_WRITE	= 0x08,
 	/* Use only dotlocking, no fcntl() */
-	MAIL_INDEX_OPEN_FLAG_USE_DOTLOCKS	= 0x08
+	MAIL_INDEX_OPEN_FLAG_USE_DOTLOCKS	= 0x10
 };
 
 enum mail_index_header_compat_flags {
@@ -203,6 +207,8 @@
 /* Returns -1 if error, 0 if sync is finished, 1 if record was filled. */
 int mail_index_sync_next(struct mail_index_sync_ctx *ctx,
 			 struct mail_index_sync_rec *sync_rec);
+/* Returns 1 if there's more to sync, 0 if not. */
+int mail_index_sync_have_more(struct mail_index_sync_ctx *ctx);
 /* End synchronization by unlocking the index and closing the view. */
 int mail_index_sync_end(struct mail_index_sync_ctx *ctx);
 
@@ -228,9 +234,8 @@
 void mail_index_view_sync_end(struct mail_index_view_sync_ctx *ctx);
 
 /* Returns the index header. */
-const struct mail_index_header *
-mail_index_get_header(struct mail_index_view *view);
-
+int mail_index_get_header(struct mail_index_view *view,
+			  const struct mail_index_header **hdr_r);
 /* Returns the given message. */
 int mail_index_lookup(struct mail_index_view *view, uint32_t seq,
 		      const struct mail_index_record **rec_r);

Index: mail-transaction-log.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-transaction-log.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-transaction-log.c	27 Apr 2004 20:25:53 -0000	1.1
+++ mail-transaction-log.c	28 Apr 2004 00:21:00 -0000	1.2
@@ -286,13 +286,13 @@
 			if (mail_index_write_header(index, &idx_hdr) < 0)
 				ret = -1;
 		}
+		hdr.file_seq = index->hdr->log_file_seq;
 		mail_index_unlock(index, lock_id);
 
 		if (ret <= 0) {
 			(void)file_dotlock_delete(path, fd);
 			return -1;
 		}
-		hdr.file_seq = index->hdr->log_file_seq;
 	} else {
 		/* creating new index file */
 		hdr.file_seq = index->hdr->log_file_seq+1;
@@ -555,9 +555,10 @@
 int mail_transaction_log_file_map(struct mail_transaction_log_file *file,
 				  uoff_t start_offset, uoff_t end_offset)
 {
+	struct mail_index *index = file->log->index;
 	size_t size;
 	struct stat st;
-	int ret;
+	int ret, use_mmap;
 
 	i_assert(start_offset <= end_offset);
 
@@ -566,6 +567,11 @@
 		return 0;
 	}
 
+	/* with mmap_no_write we could alternatively just write to log with
+	   msync() rather than pwrite(). that'd cause slightly more disk I/O,
+	   so rather use more memory. */
+	use_mmap = !index->mmap_disable && !index->mmap_no_write;
+
 	if (file->buffer != NULL && file->buffer_offset <= start_offset) {
 		/* see if we already have it */
 		size = buffer_get_used_size(file->buffer);
@@ -574,8 +580,8 @@
 	}
 
 	if (fstat(file->fd, &st) < 0) {
-		mail_index_file_set_syscall_error(file->log->index,
-						  file->filepath, "fstat()");
+		mail_index_file_set_syscall_error(index, file->filepath,
+						  "fstat()");
 		return -1;
 	}
 
@@ -587,14 +593,13 @@
 	}
 
 	if (file->buffer != NULL &&
-	    (file->mmap_base != NULL || file->log->index->use_mmap)) {
+	    (file->mmap_base != NULL || use_mmap)) {
 		buffer_free(file->buffer);
 		file->buffer = NULL;
 	}
 	if (file->mmap_base != NULL) {
 		if (munmap(file->mmap_base, file->mmap_size) < 0) {
-			mail_index_file_set_syscall_error(file->log->index,
-							  file->filepath,
+			mail_index_file_set_syscall_error(index, file->filepath,
 							  "munmap()");
 		}
 		file->mmap_base = NULL;
@@ -619,7 +624,7 @@
 		return -1;
 	}
 
-	if (!file->log->index->use_mmap) {
+	if (!use_mmap) {
 		ret = mail_transaction_log_file_read(file, start_offset);
 		if (ret <= 0) {
 			/* make sure we don't leave ourself in
@@ -640,8 +645,8 @@
 			       MAP_SHARED, file->fd, 0);
 	if (file->mmap_base == MAP_FAILED) {
 		file->mmap_base = NULL;
-		mail_index_file_set_syscall_error(file->log->index,
-						  file->filepath, "mmap()");
+		mail_index_file_set_syscall_error(index, file->filepath,
+						  "mmap()");
 		return -1;
 	}
 	file->buffer = buffer_create_const_data(default_pool, file->mmap_base,



More information about the dovecot-cvs mailing list