[dovecot-cvs] dovecot/src/lib-index mail-index-lock.c, 1.1,
1.2 mail-index-private.h, 1.1, 1.2 mail-index-reset.c, 1.1,
1.2 mail-index-sync-update.c, 1.1, 1.2 mail-index-sync.c, 1.1,
1.2 mail-index-transaction-private.h, 1.1,
1.2 mail-index-transaction.c, 1.1,
1.2 mail-index-view-private.h, 1.1, 1.2 mail-index-view-sync.c,
1.1, 1.2 mail-index-view.c, 1.1, 1.2 mail-index.c, 1.106,
1.107 mail-index.h, 1.98, 1.99 mail-transaction-log.c, 1.1, 1.2
cras at procontrol.fi
cras at procontrol.fi
Wed Apr 28 03:21:03 EEST 2004
Update of /home/cvs/dovecot/src/lib-index
In directory talvi:/tmp/cvs-serv32334/lib-index
Modified Files:
mail-index-lock.c mail-index-private.h mail-index-reset.c
mail-index-sync-update.c mail-index-sync.c
mail-index-transaction-private.h mail-index-transaction.c
mail-index-view-private.h mail-index-view-sync.c
mail-index-view.c mail-index.c mail-index.h
mail-transaction-log.c
Log Message:
Forced locking to be right with mprotect()ing index file. Support for
disabling mmap for indexes, and disabling just mmap+write().
Index: mail-index-lock.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-lock.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-lock.c 27 Apr 2004 20:25:53 -0000 1.1
+++ mail-index-lock.c 28 Apr 2004 00:21:00 -0000 1.2
@@ -28,6 +28,7 @@
*/
#include "lib.h"
+#include "mmap-util.h"
#include "file-lock.h"
#include "write-full.h"
#include "mail-index-private.h"
@@ -46,7 +47,7 @@
mail_index_set_syscall_error(index, "close()");
index->fd = fd;
- ret = fd < 0 ? mail_index_try_open(index) :
+ ret = fd < 0 ? mail_index_try_open(index, NULL) :
mail_index_map(index, FALSE);
if (ret <= 0) {
// FIXME: serious problem, we'll just crash later..
@@ -79,7 +80,6 @@
unsigned int timeout_secs, int update_index,
unsigned int *lock_id_r)
{
- // FIXME: mprotect() the index to make sure we don't access it unlocked!
int ret;
i_assert(lock_type == F_RDLCK || lock_type == F_WRLCK);
@@ -147,6 +147,17 @@
index->excl_lock_count++;
*lock_id_r = index->lock_id + 1;
}
+
+ if (index->map != NULL &&
+ !MAIL_INDEX_MAP_IS_IN_MEMORY(index->map)) {
+ int prot = PROT_READ | (lock_type == F_WRLCK ? PROT_WRITE : 0);
+ if (mprotect(index->map->mmap_base,
+ index->map->file_size, prot) < 0) {
+ mail_index_set_syscall_error(index, "mprotect()");
+ return -1;
+ }
+ }
+
return 1;
}
@@ -178,9 +189,10 @@
if (fd == -1)
return -1;
- ret = 0;
- if (write_full(fd, index->map->mmap_base,
- index->map->mmap_used_size) < 0) {
+ ret = write_full(fd, index->map->hdr, sizeof(*index->map->hdr));
+ if (ret < 0 || write_full(fd, index->map->records,
+ index->map->records_count *
+ sizeof(struct mail_index_record)) < 0) {
mail_index_file_set_syscall_error(index, path, "write_full()");
(void)close(fd);
(void)unlink(path);
@@ -209,6 +221,43 @@
return 1;
}
+static int mail_index_lock_exclusive_copy(struct mail_index *index)
+{
+ int fd;
+
+ i_assert(index->log_locked);
+
+ if (index->copy_lock_path != NULL) {
+ index->excl_lock_count++;
+ return 1;
+ }
+
+ /* copy the index to index.tmp and use it. when */
+ fd = mail_index_copy(index);
+ if (fd == -1)
+ return -1;
+
+ index->lock_type = F_WRLCK;
+ index->excl_lock_count++;
+
+ if (mail_index_reopen(index, fd) < 0) {
+ i_assert(index->excl_lock_count == 1);
+ i_free(index->copy_lock_path);
+ index->copy_lock_path = NULL;
+
+ /* go back to old index */
+ (void)mail_index_reopen(index, -1);
+
+ index->lock_type = F_UNLCK;
+ index->excl_lock_count = 0;
+ index->shared_lock_count = 0;
+ return -1;
+ }
+
+ i_assert(index->excl_lock_count == 1);
+ return 1;
+}
+
int mail_index_lock_exclusive(struct mail_index *index,
uint32_t log_file_seq, uoff_t log_file_offset,
unsigned int *lock_id_r)
@@ -253,43 +302,18 @@
return mail_index_lock_exclusive_copy(index);
}
-int mail_index_lock_exclusive_copy(struct mail_index *index)
-{
- int fd;
-
- if (index->copy_lock_path != NULL) {
- index->excl_lock_count++;
- return 1;
- }
-
- /* copy the index to index.tmp and use it. when */
- fd = mail_index_copy(index);
- if (fd == -1)
- return -1;
-
- if (mail_index_reopen(index, fd) < 0) {
- (void)mail_index_reopen(index, -1);
- i_free(index->copy_lock_path);
- index->copy_lock_path = NULL;
- return -1;
- }
-
- index->lock_type = F_WRLCK;
- index->excl_lock_count++;
- return 1;
-}
-
-static void mail_index_copy_lock_finish(struct mail_index *index)
+static int mail_index_copy_lock_finish(struct mail_index *index)
{
if (fsync(index->fd) < 0) {
mail_index_file_set_syscall_error(index, index->copy_lock_path,
"fsync()");
+ return -1;
}
if (rename(index->copy_lock_path, index->filepath) < 0) {
mail_index_set_error(index, "rename(%s, %s) failed: %m",
index->copy_lock_path, index->filepath);
- // FIXME: this isn't good
+ return -1;
}
i_free(index->copy_lock_path);
@@ -298,15 +322,33 @@
index->shared_lock_count = 0;
index->lock_id += 2;
index->lock_type = F_UNLCK;
+ return 0;
}
void mail_index_unlock(struct mail_index *index, unsigned int lock_id)
{
- if (index->copy_lock_path != NULL) {
+ if (index->copy_lock_path != NULL ||
+ (index->map != NULL && index->map->write_to_disk)) {
i_assert(index->log_locked);
i_assert(index->excl_lock_count > 0);
- if (--index->excl_lock_count == 0)
- mail_index_copy_lock_finish(index);
+ i_assert(lock_id == index->lock_id+1);
+
+ if (--index->excl_lock_count == 0) {
+ if (index->map != NULL && index->map->write_to_disk) {
+ if (index->copy_lock_path != NULL) {
+ /* new mapping replaces the old */
+ (void)unlink(index->copy_lock_path);
+ i_free(index->copy_lock_path);
+ index->copy_lock_path = NULL;
+ }
+ if (mail_index_copy(index) < 0) {
+ mail_index_set_inconsistent(index);
+ return;
+ }
+ }
+ if (mail_index_copy_lock_finish(index) < 0)
+ mail_index_set_inconsistent(index);
+ }
return;
}
@@ -326,6 +368,12 @@
if (index->shared_lock_count == 0 && index->excl_lock_count == 0) {
index->lock_id += 2;
index->lock_type = F_UNLCK;
+ if (index->map != NULL) {
+ if (mprotect(index->map->mmap_base,
+ index->map->file_size, PROT_NONE) < 0)
+ mail_index_set_syscall_error(index,
+ "mprotect()");
+ }
if (file_wait_lock(index->fd, F_UNLCK) < 0)
mail_index_set_syscall_error(index, "file_wait_lock()");
}
Index: mail-index-private.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-private.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-private.h 27 Apr 2004 20:25:53 -0000 1.1
+++ mail-index-private.h 28 Apr 2004 00:21:00 -0000 1.2
@@ -18,6 +18,10 @@
#define INDEX_COMPRESS_PERCENTAGE 50
/* Compress the file when searching deleted records tree has to go this deep */
#define INDEX_COMPRESS_DEPTH 10
+/* How many times to retry opening index files if read/fstat returns ESTALE.
+ This happens with NFS when the file has been deleted (ie. index file was
+ rewritten by another computer than us). */
+#define INDEX_ESTALE_RETRY_COUNT 10
enum mail_index_mail_flags {
MAIL_INDEX_MAIL_FLAG_DIRTY = 0x80,
@@ -36,11 +40,15 @@
unsigned int records_count;
void *mmap_base;
- size_t mmap_size, mmap_used_size;
+ size_t file_size, file_used_size;
buffer_t *buffer;
- struct mail_index_header hdr_copy;
+ uint32_t log_file_seq;
+ uoff_t log_file_offset;
+
+ struct mail_index_header hdr_copy;
+ unsigned int write_to_disk:1;
};
struct mail_index {
@@ -69,7 +77,8 @@
unsigned int opened:1;
unsigned int log_locked:1;
- unsigned int use_mmap:1;
+ unsigned int mmap_disable:1;
+ unsigned int mmap_no_write:1;
unsigned int readonly:1;
unsigned int fsck:1;
};
@@ -79,7 +88,7 @@
const struct mail_index_header *hdr);
int mail_index_create(struct mail_index *index, struct mail_index_header *hdr);
-int mail_index_try_open(struct mail_index *index);
+int mail_index_try_open(struct mail_index *index, unsigned int *lock_id_r);
int mail_index_create_tmp_file(struct mail_index *index, const char **path_r);
/* Returns 0 = ok, -1 = error. If update_index is TRUE, reopens the index
@@ -91,7 +100,6 @@
int mail_index_lock_exclusive(struct mail_index *index,
uint32_t log_file_seq, uoff_t log_file_offset,
unsigned int *lock_id_r);
-int mail_index_lock_exclusive_copy(struct mail_index *index);
void mail_index_unlock(struct mail_index *index, unsigned int lock_id);
/* Returns 1 if given lock_id is valid, 0 if not. */
int mail_index_is_locked(struct mail_index *index, unsigned int lock_id);
@@ -118,6 +126,7 @@
const struct mail_transaction_header *hdr,
const void *data, size_t *data_offset);
+void mail_index_set_inconsistent(struct mail_index *index);
int mail_index_mark_corrupted(struct mail_index *index);
int mail_index_set_error(struct mail_index *index, const char *fmt, ...)
Index: mail-index-reset.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-reset.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-reset.c 27 Apr 2004 20:25:53 -0000 1.1
+++ mail-index-reset.c 28 Apr 2004 00:21:00 -0000 1.2
@@ -10,9 +10,6 @@
{
struct mail_index_header hdr;
- /* this invalidates all views even if we fail later */
- index->indexid = 0;
-
if (mail_index_mark_corrupted(index) < 0)
return -1;
Index: mail-index-sync-update.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-sync-update.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-sync-update.c 27 Apr 2004 20:25:53 -0000 1.1
+++ mail-index-sync-update.c 28 Apr 2004 00:21:00 -0000 1.2
@@ -97,24 +97,31 @@
static int mail_index_grow(struct mail_index *index, unsigned int count)
{
- size_t size, mmap_used_size;
+ struct mail_index_map *map = index->map;
+ size_t size, file_used_size;
unsigned int records_count;
+ if (MAIL_INDEX_MAP_IS_IN_MEMORY(map)) {
+ (void)buffer_append_space_unsafe(map->buffer,
+ count * sizeof(struct mail_index_record));
+ return 0;
+ }
+
// FIXME: grow exponentially
- size = index->map->mmap_used_size +
+ size = map->file_used_size +
count * sizeof(struct mail_index_record);
if (file_set_size(index->fd, (off_t)size) < 0)
return mail_index_set_syscall_error(index, "file_set_size()");
- records_count = index->map->records_count;
- mmap_used_size = index->map->mmap_used_size;
+ records_count = map->records_count;
+ file_used_size = map->file_used_size;
if (mail_index_map(index, TRUE) <= 0)
return -1;
- i_assert(index->map->mmap_size >= size);
- index->map->records_count = records_count;
- index->map->mmap_used_size = mmap_used_size;
+ i_assert(map->file_size >= size);
+ map->records_count = records_count;
+ map->file_used_size = file_used_size;
return 0;
}
@@ -127,20 +134,10 @@
size_t space;
uint32_t next_uid;
- if (!ctx->index->use_mmap) {
- // FIXME
- }
-
- space = (map->mmap_size - map->mmap_used_size) / sizeof(*appends);
+ space = (map->file_size - map->file_used_size) / sizeof(*appends);
if (space < count) {
if (mail_index_grow(ctx->index, count) < 0)
return -1;
-
- if (mprotect(map->mmap_base, map->mmap_size,
- PROT_READ|PROT_WRITE) < 0) {
- mail_index_set_syscall_error(ctx->index, "mprotect()");
- return -1;
- }
}
next_uid = ctx->hdr.next_uid;
@@ -165,7 +162,7 @@
memcpy(map->records + map->records_count, appends,
count * sizeof(*appends));
map->records_count += count;
- map->mmap_used_size += count * sizeof(struct mail_index_record);
+ map->file_used_size += count * sizeof(struct mail_index_record);
return 0;
}
@@ -179,16 +176,21 @@
unsigned int append_count;
uint32_t count, file_seq, src_idx, dest_idx;
uoff_t file_offset;
- int ret, locked = FALSE;
-
- if (mprotect(map->mmap_base, map->mmap_size, PROT_READ|PROT_WRITE) < 0)
- return mail_index_set_syscall_error(index, "mprotect()");
+ int ret;
/* rewind */
sync_ctx->update_idx = sync_ctx->expunge_idx = 0;
sync_ctx->sync_appends =
buffer_get_used_size(sync_ctx->appends_buf) != 0;
+ if (!mail_index_sync_have_more(sync_ctx)) {
+ /* nothing to sync */
+ return 0;
+ }
+
+ if (MAIL_INDEX_MAP_IS_IN_MEMORY(map))
+ map->write_to_disk = TRUE;
+
memset(&ctx, 0, sizeof(ctx));
ctx.index = index;
ctx.hdr = *index->hdr;
@@ -204,24 +206,25 @@
appends = rec.appends;
break;
case MAIL_INDEX_SYNC_TYPE_EXPUNGE:
- if (src_idx != 0) {
+ if (src_idx == 0) {
+ /* expunges have to be atomic. so we'll have
+ to copy the mapping, do the changes there
+ and then finally replace the whole index
+ file. to avoid extra disk I/O we copy the
+ index into memory rather than to temporary
+ file */
+ map = mail_index_map_to_memory(map);
+ mail_index_unmap(index, index->map);
+ index->map = map;
+ map->write_to_disk = TRUE;
+
+ dest_idx = rec.seq1-1;
+ } else {
count = (rec.seq1-1) - src_idx;
memmove(map->records + dest_idx,
map->records + src_idx,
count * sizeof(*map->records));
dest_idx += count;
- } else {
- dest_idx = rec.seq1-1;
- if (mail_index_lock_exclusive_copy(index) <= 0)
- return -1;
- map = index->map;
- if (mprotect(map->mmap_base, map->mmap_size,
- PROT_READ|PROT_WRITE) < 0) {
- mail_index_set_syscall_error(index,
- "mprotect()");
- return -1;
- }
- locked = TRUE;
}
mail_index_sync_update_expunges(&ctx, rec.seq1,
@@ -242,7 +245,7 @@
dest_idx += count;
map->records_count = dest_idx;
- map->mmap_used_size = index->hdr->header_size +
+ map->file_used_size = index->hdr->header_size +
map->records_count * sizeof(struct mail_index_record);
}
@@ -256,18 +259,11 @@
ctx.hdr.log_file_seq = file_seq;
ctx.hdr.log_file_offset = file_offset;
- if (index->use_mmap) {
+ if (!MAIL_INDEX_MAP_IS_IN_MEMORY(map)) {
memcpy(map->mmap_base, &ctx.hdr, sizeof(ctx.hdr));
- if (msync(map->mmap_base, map->mmap_used_size, MS_SYNC) < 0)
+ if (msync(map->mmap_base, map->file_used_size, MS_SYNC) < 0)
return mail_index_set_syscall_error(index, "msync()");
- } else {
- // FIXME
}
- if (mprotect(map->mmap_base, map->mmap_size, PROT_READ) < 0)
- mail_index_set_syscall_error(index, "mprotect()");
-
- if (locked)
- mail_index_unlock(index, 0);
return ret;
}
Index: mail-index-sync.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-sync.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-sync.c 27 Apr 2004 20:25:53 -0000 1.1
+++ mail-index-sync.c 28 Apr 2004 00:21:00 -0000 1.2
@@ -365,6 +365,13 @@
return 0;
}
+int mail_index_sync_have_more(struct mail_index_sync_ctx *ctx)
+{
+ return (ctx->update_idx != ctx->updates_count) ||
+ (ctx->expunge_idx != ctx->expunges_count) ||
+ ctx->sync_appends;
+}
+
int mail_index_sync_end(struct mail_index_sync_ctx *ctx)
{
uint32_t seq;
Index: mail-index-transaction-private.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-transaction-private.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-transaction-private.h 27 Apr 2004 20:25:53 -0000 1.1
+++ mail-index-transaction-private.h 28 Apr 2004 00:21:00 -0000 1.2
@@ -5,7 +5,7 @@
struct mail_index_view *view;
buffer_t *appends;
- uint32_t first_new_seq, last_new_seq, next_uid;
+ uint32_t first_new_seq, last_new_seq;
buffer_t *expunges;
Index: mail-index-transaction.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-transaction.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-transaction.c 27 Apr 2004 20:25:53 -0000 1.1
+++ mail-index-transaction.c 28 Apr 2004 00:21:00 -0000 1.2
@@ -19,7 +19,6 @@
t = i_new(struct mail_index_transaction, 1);
t->view = view;
t->hide_transaction = hide;
- t->next_uid = view->index->hdr->next_uid;
return t;
}
@@ -127,8 +126,6 @@
{
struct mail_index_record *rec;
- i_assert(uid >= t->next_uid);
-
if (t->appends == NULL) {
t->appends = buffer_create_dynamic(default_pool,
4096, (size_t)-1);
@@ -146,8 +143,6 @@
rec = buffer_append_space_unsafe(t->appends, sizeof(*rec));
memset(rec, 0, sizeof(*rec));
rec->uid = uid;
-
- t->next_uid = uid+1;
}
void mail_index_expunge(struct mail_index_transaction *t, uint32_t seq)
Index: mail-index-view-private.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-view-private.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-view-private.h 27 Apr 2004 20:25:53 -0000 1.1
+++ mail-index-view-private.h 28 Apr 2004 00:21:00 -0000 1.2
@@ -22,6 +22,7 @@
};
int mail_index_view_lock(struct mail_index_view *view, int update_index);
+int mail_index_view_lock_head(struct mail_index_view *view, int update_index);
void mail_index_view_add_synced_transaction(struct mail_index_view *view,
uint32_t log_file_seq,
uoff_t log_file_offset);
Index: mail-index-view-sync.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-view-sync.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-view-sync.c 27 Apr 2004 20:25:53 -0000 1.1
+++ mail-index-view-sync.c 28 Apr 2004 00:21:00 -0000 1.2
@@ -69,7 +69,7 @@
i_assert(view->transactions == 0);
i_assert(!view->syncing);
- if (mail_index_view_lock(view, TRUE) < 0)
+ if (mail_index_view_lock_head(view, TRUE) < 0)
return -1;
hdr = view->index->hdr;
Index: mail-index-view.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index-view.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-index-view.c 27 Apr 2004 20:25:53 -0000 1.1
+++ mail-index-view.c 28 Apr 2004 00:21:00 -0000 1.2
@@ -17,8 +17,8 @@
view->map = index->map;
view->map->refcount++;
- view->log_file_seq = view->index->hdr->log_file_seq;
- view->log_file_offset = view->index->hdr->log_file_offset;
+ view->log_file_seq = view->map->log_file_seq;
+ view->log_file_offset = view->map->log_file_offset;
return view;
}
@@ -33,16 +33,9 @@
i_free(view);
}
-static int
-mail_index_view_lock_head(struct mail_index_view *view, int update_index)
+int mail_index_view_lock_head(struct mail_index_view *view, int update_index)
{
if (!mail_index_is_locked(view->index, view->lock_id)) {
- if (view->index->indexid != view->map->hdr->indexid) {
- /* index was rebuilt */
- view->inconsistent = TRUE;
- return -1;
- }
-
if (mail_index_lock_shared(view->index, update_index,
&view->lock_id) < 0)
return -1;
@@ -51,6 +44,12 @@
view->inconsistent = TRUE;
return -1;
}
+
+ if (view->index->indexid != view->map->hdr->indexid) {
+ /* index was rebuilt */
+ view->inconsistent = TRUE;
+ return -1;
+ }
}
return 0;
@@ -104,10 +103,14 @@
view->transactions--;
}
-const struct mail_index_header *
-mail_index_get_header(struct mail_index_view *view)
+int mail_index_get_header(struct mail_index_view *view,
+ const struct mail_index_header **hdr_r)
{
- return view->map->hdr;
+ if (mail_index_view_lock(view, FALSE) < 0)
+ return -1;
+
+ *hdr_r = view->map->hdr;
+ return 0;
}
int mail_index_lookup(struct mail_index_view *view, uint32_t seq,
@@ -226,6 +229,9 @@
return 0;
}
+ if (last_uid >= view->map->hdr->next_uid)
+ last_uid = view->map->hdr->next_uid-1;
+
/* optimization - binary lookup only from right side: */
*last_seq_r = mail_index_bsearch_uid(view, last_uid, &left_idx, -1);
i_assert(*last_seq_r >= *first_seq_r);
Index: mail-index.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index.c,v
retrieving revision 1.106
retrieving revision 1.107
diff -u -d -r1.106 -r1.107
--- mail-index.c 27 Apr 2004 20:25:53 -0000 1.106
+++ mail-index.c 28 Apr 2004 00:21:00 -0000 1.107
@@ -4,6 +4,7 @@
#include "buffer.h"
#include "file-lock.h"
#include "mmap-util.h"
+#include "read-full.h"
#include "write-full.h"
#include "mail-index-private.h"
#include "mail-transaction-log.h"
@@ -11,6 +12,9 @@
#include <stdio.h>
#include <stddef.h>
#include <time.h>
+#include <sys/stat.h>
+
+static int mail_index_try_open_only(struct mail_index *index);
struct mail_index *mail_index_alloc(const char *dir, const char *prefix)
{
@@ -35,23 +39,22 @@
}
static int mail_index_check_quick_header(struct mail_index *index,
- struct mail_index_map *map,
- const struct mail_index_header *hdr)
+ struct mail_index_map *map)
{
- if ((hdr->flags & MAIL_INDEX_HDR_FLAG_CORRUPTED) != 0) {
+ if ((map->hdr->flags & MAIL_INDEX_HDR_FLAG_CORRUPTED) != 0) {
/* either a crash or we've already complained about it */
return -1;
}
- if (map->mmap_used_size > map->mmap_size) {
+ if (map->file_used_size > map->file_size) {
map->records_count =
- (map->mmap_size - hdr->header_size) /
+ (map->file_size - map->hdr->header_size) /
sizeof(struct mail_index_record);
- map->mmap_used_size = map->mmap_size;
+ map->file_used_size = map->file_size;
mail_index_set_error(index, "Corrupted index file %s: "
"messages_count too large (%u > %u)",
- index->filepath, hdr->messages_count,
+ index->filepath, map->hdr->messages_count,
map->records_count);
return 0;
}
@@ -60,9 +63,9 @@
}
static int mail_index_check_header(struct mail_index *index,
- struct mail_index_map *map,
- const struct mail_index_header *hdr)
+ struct mail_index_map *map)
{
+ const struct mail_index_header *hdr = map->hdr;
unsigned char compat_data[3];
int ret;
@@ -83,7 +86,7 @@
return -1;
}
- if ((ret = mail_index_check_quick_header(index, map, hdr)) <= 0)
+ if ((ret = mail_index_check_quick_header(index, map)) <= 0)
return ret;
/* following some extra checks that only take a bit of CPU */
@@ -107,47 +110,144 @@
return 1;
}
-void mail_index_unmap(struct mail_index *index, struct mail_index_map *map)
+static void mail_index_map_clear(struct mail_index *index,
+ struct mail_index_map *map)
{
- if (--map->refcount > 0)
- return;
-
if (map->buffer != NULL) {
i_assert(map->mmap_base == NULL);
buffer_free(map->buffer);
- } else {
+ map->buffer = NULL;
+ } else if (map->mmap_base != NULL) {
i_assert(map->buffer == NULL);
- if (munmap(map->mmap_base, map->mmap_size) < 0)
+ if (munmap(map->mmap_base, map->file_size) < 0)
mail_index_set_syscall_error(index, "munmap()");
+ map->mmap_base = NULL;
+ }
+
+ if (map->refcount > 0) {
+ map->file_size = 0;
+ map->file_used_size = 0;
+ map->hdr = NULL;
+ map->records = NULL;
+ map->records_count = 0;
}
+}
+
+void mail_index_unmap(struct mail_index *index, struct mail_index_map *map)
+{
+ if (--map->refcount > 0)
+ return;
+
+ i_assert(map->refcount == 0);
+ mail_index_map_clear(index, map);
i_free(map);
}
-int mail_index_map(struct mail_index *index, int force)
+static void mail_index_unmap_forced(struct mail_index *index,
+ struct mail_index_map *map)
{
- const struct mail_index_header *hdr;
- struct mail_index_map *map;
- size_t used_size;
+ mail_index_map_clear(index, map);
+ mail_index_unmap(index, map);
+}
+
+static int mail_index_read_map(struct mail_index *index,
+ struct mail_index_map *map)
+{
+ struct stat st;
+ void *data;
+ size_t file_size;
int ret;
- if (!index->use_mmap) {
- // FIXME
+ if (fstat(index->fd, &st) < 0) {
+ if (errno == ESTALE)
+ return 0;
+ mail_index_set_syscall_error(index, "fstat()");
return -1;
}
+ file_size = st.st_size;
+
+ if (map->buffer == NULL) {
+ map->buffer = buffer_create_dynamic(default_pool,
+ file_size, (size_t)-1);
+ }
+
+ /* @UNSAFE */
+ buffer_set_used_size(map->buffer, 0);
+ data = buffer_append_space_unsafe(map->buffer, file_size);
+ ret = pread_full(index->fd, data, file_size, 0);
+ if (ret < 0) {
+ if (errno == ESTALE)
+ return 0;
+ mail_index_set_syscall_error(index, "pread_full()");
+ return -1;
+ }
+ if (ret == 0) {
+ mail_index_set_error(index,
+ "Unexpected EOF while reading index file");
+ return -1;
+ }
+
+ map->file_size = file_size;
+ return 1;
+}
+
+static int mail_index_read_map_with_retry(struct mail_index *index,
+ struct mail_index_map *map)
+{
+ int i, ret;
+
+ for (i = 0; i < INDEX_ESTALE_RETRY_COUNT; i++) {
+ ret = mail_index_read_map(index, map);
+ if (ret != 0)
+ return ret;
+
+ /* ESTALE - reopen index file */
+ if (close(index->fd) < 0)
+ mail_index_set_syscall_error(index, "close()");
+ index->fd = -1;
+
+ ret = mail_index_try_open_only(index);
+ if (ret <= 0) {
+ if (ret == 0) {
+ /* the file was lost */
+ errno = ENOENT;
+ mail_index_set_syscall_error(index, "open()");
+ }
+ return -1;
+ }
+ }
+
+ /* Too many ESTALE retries */
+ mail_index_set_syscall_error(index, "read_map()");
+ return -1;
+}
+
+int mail_index_map(struct mail_index *index, int force)
+{
+ struct mail_index_map *map;
+ size_t used_size;
+ void *base;
+ int ret;
if (index->map != NULL) {
map = index->map;
/* see if re-mmaping is needed (file has grown) */
- hdr = map->mmap_base;
- used_size = hdr->header_size +
- hdr->messages_count * sizeof(struct mail_index_record);
- if (map->mmap_size >= used_size && !force)
+ used_size = map->hdr->header_size +
+ map->hdr->messages_count *
+ sizeof(struct mail_index_record);
+ if (map->file_size >= used_size && !force) {
+ /* update log file position in case it has changed */
+ map->log_file_seq = map->hdr->log_file_seq;
+ map->log_file_offset = map->hdr->log_file_offset;
return 1;
+ }
- if (munmap(map->mmap_base, map->mmap_size) < 0)
- mail_index_set_syscall_error(index, "munmap()");
- map->mmap_base = NULL;
+ if (map->mmap_base != NULL) {
+ if (munmap(map->mmap_base, map->file_size) < 0)
+ mail_index_set_syscall_error(index, "munmap()");
+ map->mmap_base = NULL;
+ }
} else {
map = i_new(struct mail_index_map, 1);
map->refcount = 1;
@@ -156,44 +256,63 @@
index->hdr = NULL;
index->map = NULL;
- map->mmap_base = mmap_ro_file(index->fd, &map->mmap_size);
- if (map->mmap_base == MAP_FAILED) {
- map->mmap_base = NULL;
- mail_index_set_syscall_error(index, "mmap()");
- mail_index_unmap(index, map);
- return -1;
+ /* make sure if we fail we don't try to access anything outside the
+ buffer */
+ map->file_size = 0;
+ map->file_used_size = 0;
+
+ if (!index->mmap_disable) {
+ map->mmap_base = index->lock_type != F_WRLCK ?
+ mmap_ro_file(index->fd, &map->file_size) :
+ mmap_rw_file(index->fd, &map->file_size);
+ if (map->mmap_base == MAP_FAILED) {
+ map->mmap_base = NULL;
+ mail_index_set_syscall_error(index, "mmap()");
+ mail_index_unmap_forced(index, map);
+ return -1;
+ }
+ } else {
+ if (mail_index_read_map_with_retry(index, map) < 0) {
+ mail_index_unmap_forced(index, map);
+ return -1;
+ }
}
- if (map->mmap_size < MAIL_INDEX_HEADER_MIN_SIZE) {
+ if (map->file_size < MAIL_INDEX_HEADER_MIN_SIZE) {
mail_index_set_error(index, "Corrupted index file %s: "
"File too small (%"PRIuSIZE_T")",
- index->filepath, map->mmap_size);
- mail_index_unmap(index, map);
+ index->filepath, map->file_size);
+ mail_index_unmap_forced(index, map);
return 0;
}
- hdr = map->mmap_base;
- if (hdr->header_size < sizeof(*hdr)) {
+ base = !MAIL_INDEX_MAP_IS_IN_MEMORY(map) ? map->mmap_base :
+ buffer_get_modifyable_data(map->buffer, NULL);
+ map->hdr = base;
+
+ if (map->hdr->header_size < sizeof(*map->hdr)) {
/* header smaller than ours, make a copy so our newer headers
won't have garbage in them */
- memcpy(&map->hdr_copy, hdr, hdr->header_size);
- hdr = &map->hdr_copy;
+ memcpy(&map->hdr_copy, map->hdr, map->hdr->header_size);
+ map->hdr = &map->hdr_copy;
}
- map->hdr = map->mmap_base;
- map->records = PTR_OFFSET(map->mmap_base, hdr->header_size);
- map->records_count = hdr->messages_count;
- map->mmap_used_size = hdr->header_size +
+ map->records = PTR_OFFSET(base, map->hdr->header_size);
+ map->records_count = map->hdr->messages_count;
+ map->file_used_size = map->hdr->header_size +
map->records_count * sizeof(struct mail_index_record);
- ret = mail_index_check_header(index, map, hdr);
+ ret = mail_index_check_header(index, map);
if (ret < 0) {
- mail_index_unmap(index, map);
+ mail_index_unmap_forced(index, map);
return 0;
}
if (ret == 0)
index->fsck = TRUE;
+ map->log_file_seq = map->hdr->log_file_seq;
+ map->log_file_offset = map->hdr->log_file_offset;
+
index->hdr = map->mmap_base;
index->map = map;
return 1;
@@ -252,22 +371,10 @@
int mail_index_write_header(struct mail_index *index,
const struct mail_index_header *hdr)
{
- if (index->use_mmap) {
- if (mprotect(index->map->mmap_base, sizeof(*hdr),
- PROT_READ | PROT_WRITE) < 0) {
- mail_index_set_syscall_error(index, "mprotect()");
- return -1;
- }
-
+ if (!MAIL_INDEX_MAP_IS_IN_MEMORY(index->map)) {
memcpy(index->map->mmap_base, hdr, sizeof(*hdr));
if (msync(index->map->mmap_base, sizeof(*hdr), MS_SYNC) < 0)
return mail_index_set_syscall_error(index, "msync()");
-
- if (mprotect(index->map->mmap_base, sizeof(*hdr),
- PROT_READ) < 0) {
- mail_index_set_syscall_error(index, "mprotect()");
- return -1;
- }
} else {
if (pwrite_full(index->fd, hdr, sizeof(*hdr), 0) < 0) {
mail_index_set_syscall_error(index, "pwrite_full()");
@@ -314,7 +421,7 @@
hdr->log_file_seq = seq;
hdr->log_file_offset = offset;
- ret = mail_index_try_open(index);
+ ret = mail_index_try_open(index, NULL);
if (ret != 0) {
mail_transaction_log_sync_unlock(index->log);
return ret;
@@ -358,15 +465,24 @@
return 1;
}
-int mail_index_try_open(struct mail_index *index)
+static int mail_index_try_open_only(struct mail_index *index)
{
- unsigned int lock_id;
- int ret;
+ int i;
- index->fd = open(index->filepath, O_RDWR);
- if (index->fd == -1 && errno == EACCES) {
- index->fd = open(index->filepath, O_RDONLY);
- index->readonly = TRUE;
+ for (i = 0; i < 3; i++) {
+ index->fd = open(index->filepath, O_RDWR);
+ if (index->fd == -1 && errno == EACCES) {
+ index->fd = open(index->filepath, O_RDONLY);
+ index->readonly = TRUE;
+ }
+ if (index->fd != -1 || errno != ESTALE)
+ break;
+
+ /* May happen with some OSes with NFS. Try again, although
+ there's still a race condition with another computer
+ creating the index file again. However, we can't try forever
+ as ESTALE happens also if index directory has been deleted
+ from server.. */
}
if (index->fd == -1) {
if (errno != ENOENT)
@@ -374,29 +490,51 @@
/* have to create it */
return 0;
- } else {
- if (mail_index_lock_shared(index, FALSE, &lock_id) < 0)
- return -1;
- ret = mail_index_map(index, FALSE);
- mail_index_unlock(index, lock_id);
+ }
+ return 1;
+}
- if (ret == 0) {
- /* it's corrupted - recreate it */
- (void)close(index->fd);
- index->fd = -1;
- }
+int mail_index_try_open(struct mail_index *index, unsigned int *lock_id_r)
+{
+ unsigned int lock_id;
+ int ret;
+
+ if (lock_id_r != NULL)
+ *lock_id_r = 0;
+
+ ret = mail_index_try_open_only(index);
+ if (ret <= 0)
return ret;
+
+ if (mail_index_lock_shared(index, FALSE, &lock_id) < 0)
+ return -1;
+ ret = mail_index_map(index, FALSE);
+ if (ret == 0) {
+ /* it's corrupted - recreate it */
+ mail_index_unlock(index, lock_id);
+ if (lock_id_r != NULL)
+ *lock_id_r = 0;
+
+ (void)close(index->fd);
+ index->fd = -1;
+ } else {
+ if (lock_id_r != NULL)
+ *lock_id_r = lock_id;
+ else
+ mail_index_unlock(index, lock_id);
}
+ return ret;
}
static int
mail_index_open2(struct mail_index *index, enum mail_index_open_flags flags)
{
- struct mail_index_header hdr;
+ struct mail_index_header hdr;
+ unsigned int lock_id = 0;
int ret;
- ret = mail_index_try_open(index);
- if (ret == 1)
+ ret = mail_index_try_open(index, &lock_id);
+ if (ret > 0)
hdr = *index->hdr;
else if (ret == 0) {
/* doesn't exist, or corrupted */
@@ -412,6 +550,9 @@
index->log = mail_transaction_log_open_or_create(index);
if (index->log == NULL)
return -1;
+
+ if (lock_id != 0)
+ mail_index_unlock(index, lock_id);
return index->fd != -1 ? 1 : mail_index_create(index, &hdr);
}
@@ -430,7 +571,10 @@
index->nodiskspace = FALSE;
index->index_lock_timeout = FALSE;
index->log_locked = FALSE;
- index->use_mmap = (flags & MAIL_INDEX_OPEN_FLAG_NO_MMAP) == 0;
+ index->mmap_disable =
+ (flags & MAIL_INDEX_OPEN_FLAG_MMAP_DISABLE) != 0;
+ index->mmap_no_write =
+ (flags & MAIL_INDEX_OPEN_FLAG_MMAP_NO_WRITE) != 0;
index->readonly = FALSE;
index->filepath = i_strconcat(index->dir, "/",
@@ -508,6 +652,11 @@
return -1;
}
+void mail_index_set_inconsistent(struct mail_index *index)
+{
+ index->indexid = 0;
+}
+
int mail_index_mark_corrupted(struct mail_index *index)
{
struct mail_index_header hdr;
@@ -516,6 +665,15 @@
(index->hdr->flags & MAIL_INDEX_HDR_FLAG_CORRUPTED) != 0)
return 0;
+ /* make sure we can write the header */
+ if (!MAIL_INDEX_MAP_IS_IN_MEMORY(index->map)) {
+ if (mprotect(index->map->mmap_base, sizeof(hdr),
+ PROT_READ | PROT_WRITE) < 0) {
+ mail_index_set_syscall_error(index, "mprotect()");
+ return -1;
+ }
+ }
+
hdr = *index->hdr;
hdr.flags |= MAIL_INDEX_HDR_FLAG_CORRUPTED;
if (mail_index_write_header(index, &hdr) < 0)
@@ -523,6 +681,8 @@
if (fsync(index->fd) < 0)
return mail_index_set_syscall_error(index, "fsync()");
+
+ mail_index_set_inconsistent(index);
return 0;
}
Index: mail-index.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-index.h,v
retrieving revision 1.98
retrieving revision 1.99
diff -u -d -r1.98 -r1.99
--- mail-index.h 27 Apr 2004 20:25:53 -0000 1.98
+++ mail-index.h 28 Apr 2004 00:21:00 -0000 1.99
@@ -19,9 +19,13 @@
delay opening cache/log files unless they're needed. */
MAIL_INDEX_OPEN_FLAG_FAST = 0x02,
/* Don't try to mmap() index files */
- MAIL_INDEX_OPEN_FLAG_NO_MMAP = 0x04,
+ MAIL_INDEX_OPEN_FLAG_MMAP_DISABLE = 0x04,
+ /* Don't try to write() to mmap()ed index files. Required for the few
+ OSes that don't have unified buffer cache
+ (currently OpenBSD <= 3.5) */
+ MAIL_INDEX_OPEN_FLAG_MMAP_NO_WRITE = 0x08,
/* Use only dotlocking, no fcntl() */
- MAIL_INDEX_OPEN_FLAG_USE_DOTLOCKS = 0x08
+ MAIL_INDEX_OPEN_FLAG_USE_DOTLOCKS = 0x10
};
enum mail_index_header_compat_flags {
@@ -203,6 +207,8 @@
/* Returns -1 if error, 0 if sync is finished, 1 if record was filled. */
int mail_index_sync_next(struct mail_index_sync_ctx *ctx,
struct mail_index_sync_rec *sync_rec);
+/* Returns 1 if there's more to sync, 0 if not. */
+int mail_index_sync_have_more(struct mail_index_sync_ctx *ctx);
/* End synchronization by unlocking the index and closing the view. */
int mail_index_sync_end(struct mail_index_sync_ctx *ctx);
@@ -228,9 +234,8 @@
void mail_index_view_sync_end(struct mail_index_view_sync_ctx *ctx);
/* Returns the index header. */
-const struct mail_index_header *
-mail_index_get_header(struct mail_index_view *view);
-
+int mail_index_get_header(struct mail_index_view *view,
+ const struct mail_index_header **hdr_r);
/* Returns the given message. */
int mail_index_lookup(struct mail_index_view *view, uint32_t seq,
const struct mail_index_record **rec_r);
Index: mail-transaction-log.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-index/mail-transaction-log.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- mail-transaction-log.c 27 Apr 2004 20:25:53 -0000 1.1
+++ mail-transaction-log.c 28 Apr 2004 00:21:00 -0000 1.2
@@ -286,13 +286,13 @@
if (mail_index_write_header(index, &idx_hdr) < 0)
ret = -1;
}
+ hdr.file_seq = index->hdr->log_file_seq;
mail_index_unlock(index, lock_id);
if (ret <= 0) {
(void)file_dotlock_delete(path, fd);
return -1;
}
- hdr.file_seq = index->hdr->log_file_seq;
} else {
/* creating new index file */
hdr.file_seq = index->hdr->log_file_seq+1;
@@ -555,9 +555,10 @@
int mail_transaction_log_file_map(struct mail_transaction_log_file *file,
uoff_t start_offset, uoff_t end_offset)
{
+ struct mail_index *index = file->log->index;
size_t size;
struct stat st;
- int ret;
+ int ret, use_mmap;
i_assert(start_offset <= end_offset);
@@ -566,6 +567,11 @@
return 0;
}
+ /* with mmap_no_write we could alternatively just write to log with
+ msync() rather than pwrite(). that'd cause slightly more disk I/O,
+ so rather use more memory. */
+ use_mmap = !index->mmap_disable && !index->mmap_no_write;
+
if (file->buffer != NULL && file->buffer_offset <= start_offset) {
/* see if we already have it */
size = buffer_get_used_size(file->buffer);
@@ -574,8 +580,8 @@
}
if (fstat(file->fd, &st) < 0) {
- mail_index_file_set_syscall_error(file->log->index,
- file->filepath, "fstat()");
+ mail_index_file_set_syscall_error(index, file->filepath,
+ "fstat()");
return -1;
}
@@ -587,14 +593,13 @@
}
if (file->buffer != NULL &&
- (file->mmap_base != NULL || file->log->index->use_mmap)) {
+ (file->mmap_base != NULL || use_mmap)) {
buffer_free(file->buffer);
file->buffer = NULL;
}
if (file->mmap_base != NULL) {
if (munmap(file->mmap_base, file->mmap_size) < 0) {
- mail_index_file_set_syscall_error(file->log->index,
- file->filepath,
+ mail_index_file_set_syscall_error(index, file->filepath,
"munmap()");
}
file->mmap_base = NULL;
@@ -619,7 +624,7 @@
return -1;
}
- if (!file->log->index->use_mmap) {
+ if (!use_mmap) {
ret = mail_transaction_log_file_read(file, start_offset);
if (ret <= 0) {
/* make sure we don't leave ourself in
@@ -640,8 +645,8 @@
MAP_SHARED, file->fd, 0);
if (file->mmap_base == MAP_FAILED) {
file->mmap_base = NULL;
- mail_index_file_set_syscall_error(file->log->index,
- file->filepath, "mmap()");
+ mail_index_file_set_syscall_error(index, file->filepath,
+ "mmap()");
return -1;
}
file->buffer = buffer_create_const_data(default_pool, file->mmap_base,
More information about the dovecot-cvs
mailing list