dovecot-2.2: lib-index: Replaced cache's deleted_space with dele...

dovecot at dovecot.org dovecot at dovecot.org
Sat Nov 3 20:25:55 EET 2012


details:   http://hg.dovecot.org/dovecot-2.2/rev/debecd057f9c
changeset: 15374:debecd057f9c
user:      Timo Sirainen <tss at iki.fi>
date:      Sat Nov 03 20:25:17 2012 +0200
description:
lib-index: Replaced cache's deleted_space with deleted_record_count.
This way when expunging messages we don't need to actually read through the
cache file and calculate how many bytes will be freed, we can simply
increase the deleted_record_count by 1. We'll still compress the cache file
approximately after 20% of the space (records) has been deleted.

This also converts the old "hole offset" header field to tracking the number
of records in the cache file. Otherwise we'd have to assume that all of the
messages in mailbox have a cache record, but that isn't true for users who
have only temporary cache fields. This "hole offset" cannot be trusted
completely, because nothing prevents old Dovecot version from simply writing
a hole offset there and messing up the record counts. Because of this there
are a couple of extra sanity checks for it. (Unfortunately there wasn't any
easy way to simply add a new field to header without breaking backwards
compatibility.)

diffstat:

 src/doveadm/doveadm-dump-index.c       |   4 +-
 src/lib-index/mail-cache-compress.c    |   6 ++-
 src/lib-index/mail-cache-private.h     |  15 ++++---
 src/lib-index/mail-cache-sync-update.c |   2 +-
 src/lib-index/mail-cache-transaction.c |  59 ++++++++++----------------------
 src/lib-index/mail-cache.c             |  61 +++++++++++++++++++++++----------
 6 files changed, 76 insertions(+), 71 deletions(-)

diffs (296 lines):

diff -r 8323e81785e2 -r debecd057f9c src/doveadm/doveadm-dump-index.c
--- a/src/doveadm/doveadm-dump-index.c	Sat Nov 03 19:37:47 2012 +0200
+++ b/src/doveadm/doveadm-dump-index.c	Sat Nov 03 20:25:17 2012 +0200
@@ -325,9 +325,9 @@
 	       hdr->file_seq, unixdate2str(hdr->file_seq),
 	       hdr->file_seq - hdr->indexid);
 	printf("continued_record_count = %u\n", hdr->continued_record_count);
-	printf("hole_offset (unused) . = %u\n", hdr->unused_old_hole_offset);
+	printf("record_count ......... = %u\n", hdr->record_count);
 	printf("used_file_size (old) . = %u\n", hdr->backwards_compat_used_file_size);
-	printf("deleted_space ........ = %u\n", hdr->deleted_space);
+	printf("deleted_record_count . = %u\n", hdr->deleted_record_count);
 	printf("field_header_offset .. = %u (0x%08x nontranslated)\n",
 	       mail_index_offset_to_uint32(hdr->field_header_offset),
 	       hdr->field_header_offset);
diff -r 8323e81785e2 -r debecd057f9c src/lib-index/mail-cache-compress.c
--- a/src/lib-index/mail-cache-compress.c	Sat Nov 03 19:37:47 2012 +0200
+++ b/src/lib-index/mail-cache-compress.c	Sat Nov 03 20:25:17 2012 +0200
@@ -163,7 +163,7 @@
 	struct mail_cache_record cache_rec;
 	struct ostream *output;
 	uint32_t message_count, seq, first_new_seq, ext_offset;
-	unsigned int i, used_fields_count, orig_fields_count;
+	unsigned int i, used_fields_count, orig_fields_count, record_count;
 	time_t max_drop_time;
 
 	view = mail_index_transaction_get_view(trans);
@@ -231,7 +231,7 @@
 	first_new_seq = mail_cache_get_first_new_seq(view);
 	message_count = mail_index_view_get_messages_count(view);
 
-	i_array_init(ext_offsets, message_count);
+	i_array_init(ext_offsets, message_count); record_count = 0;
 	for (seq = 1; seq <= message_count; seq++) {
 		if (mail_index_transaction_is_expunged(trans, seq)) {
 			array_append_zero(ext_offsets);
@@ -264,12 +264,14 @@
 			buffer_write(ctx.buffer, 0, &cache_rec,
 				     sizeof(cache_rec));
 			o_stream_nsend(output, ctx.buffer->data, cache_rec.size);
+			record_count++;
 		}
 
 		array_append(ext_offsets, &ext_offset, 1);
 	}
 	i_assert(orig_fields_count == cache->fields_count);
 
+	hdr.record_count = record_count;
 	hdr.field_header_offset = mail_index_uint32_to_offset(output->offset);
 	mail_cache_compress_get_fields(&ctx, used_fields_count);
 	o_stream_nsend(output, ctx.buffer->data, ctx.buffer->used);
diff -r 8323e81785e2 -r debecd057f9c src/lib-index/mail-cache-private.h
--- a/src/lib-index/mail-cache-private.h	Sat Nov 03 19:37:47 2012 +0200
+++ b/src/lib-index/mail-cache-private.h	Sat Nov 03 20:25:17 2012 +0200
@@ -12,10 +12,10 @@
 #define MAIL_CACHE_FIELD_DROP_SECS (3600*24*30)
 
 /* Never compress the file if it's smaller than this */
-#define MAIL_CACHE_COMPRESS_MIN_SIZE (1024*50)
+#define MAIL_CACHE_COMPRESS_MIN_SIZE (1024*32)
 
-/* Compress the file when deleted space reaches n% of total size */
-#define MAIL_CACHE_COMPRESS_PERCENTAGE 20
+/* Compress the file when n% of records are deleted */
+#define MAIL_CACHE_COMPRESS_DELETE_PERCENTAGE 20
 
 /* Compress the file when n% of rows contain continued rows.
    200% means that there's 2 continued rows per record. */
@@ -47,9 +47,11 @@
 
 	uint32_t continued_record_count;
 
-	uint32_t unused_old_hole_offset;
+	/* NOTE: old versions used this for hole offset, so we can't fully
+	   rely on it */
+	uint32_t record_count;
 	uint32_t backwards_compat_used_file_size;
-	uint32_t deleted_space;
+	uint32_t deleted_record_count;
 
 	uint32_t field_header_offset;
 };
@@ -247,8 +249,7 @@
 void mail_cache_file_close(struct mail_cache *cache);
 int mail_cache_reopen(struct mail_cache *cache);
 
-/* Mark record in given offset to be deleted. */
-int mail_cache_delete(struct mail_cache *cache, uint32_t offset);
+void mail_cache_delete(struct mail_cache *cache);
 
 /* Notify the decision handling code that field was looked up for seq.
    This should be called even for fields that aren't currently in cache file */
diff -r 8323e81785e2 -r debecd057f9c src/lib-index/mail-cache-sync-update.c
--- a/src/lib-index/mail-cache-sync-update.c	Sat Nov 03 19:37:47 2012 +0200
+++ b/src/lib-index/mail-cache-sync-update.c	Sat Nov 03 20:25:17 2012 +0200
@@ -95,6 +95,6 @@
 
 	if (!MAIL_CACHE_IS_UNUSABLE(cache) &&
 	    cache_file_seq == cache->hdr->file_seq)
-		(void)mail_cache_delete(cache, *cache_offset);
+		mail_cache_delete(cache);
 	return 0;
 }
diff -r 8323e81785e2 -r debecd057f9c src/lib-index/mail-cache-transaction.c
--- a/src/lib-index/mail-cache-transaction.c	Sat Nov 03 19:37:47 2012 +0200
+++ b/src/lib-index/mail-cache-transaction.c	Sat Nov 03 20:25:17 2012 +0200
@@ -37,7 +37,7 @@
 	uint32_t prev_seq, min_seq;
 	size_t last_rec_pos;
 
-	uoff_t bytes_written;
+	unsigned int records_written;
 
 	unsigned int tried_compression:1;
 	unsigned int changes:1;
@@ -133,12 +133,12 @@
 
 	*_ctx = NULL;
 
-	if (ctx->bytes_written > 0) {
+	if (ctx->records_written > 0) {
 		/* we already wrote to the cache file. we can't (or don't want
 		   to) delete that data, so just mark it as deleted space */
 		if (mail_cache_transaction_lock(ctx) > 0) {
-			ctx->cache->hdr_copy.deleted_space +=
-				ctx->bytes_written;
+			ctx->cache->hdr_copy.deleted_record_count +=
+				ctx->records_written;
 			(void)mail_cache_unlock(ctx->cache);
 		}
 	}
@@ -347,7 +347,8 @@
 			/* link this record to previous one */
 			rec->prev_offset = prev_offset;
 			ctx->cache->hdr_copy.continued_record_count++;
-			ctx->cache->hdr_modified = TRUE;
+		} else {
+			ctx->cache->hdr_copy.record_count++;
 		}
 		*offsetp = write_offset;
 
@@ -355,6 +356,7 @@
 		rec = PTR_OFFSET(rec, rec->size);
 	}
 	array_free(&seq_offsets);
+	ctx->cache->hdr_modified = TRUE;
 	return 0;
 }
 
@@ -401,7 +403,7 @@
 		ret = -1;
 	else {
 		/* update records' cache offsets to index */
-		ctx->bytes_written += ctx->last_rec_pos;
+		ctx->records_written++;
 		ret = mail_cache_transaction_update_index(ctx, write_offset);
 	}
 	if (mail_cache_unlock(ctx->cache) < 0)
@@ -476,7 +478,7 @@
 			ret = -1;
 		else {
 			/* successfully wrote everything */
-			ctx->bytes_written = 0;
+			ctx->records_written = 0;
 		}
 		/* Here would be a good place to do fdatasync() to make sure
 		   everything is written before offsets are updated to index.
@@ -737,42 +739,17 @@
 	return mail_cache_field_exists(ctx->view, seq, field_idx) == 0;
 }
 
-static int mail_cache_delete_real(struct mail_cache *cache, uint32_t offset)
+void mail_cache_delete(struct mail_cache *cache)
 {
-	const struct mail_cache_record *rec;
-	struct mail_cache_loop_track loop_track;
-	int ret = 0;
-
 	i_assert(cache->locked);
 
-	/* we'll only update the deleted_space in header. we can't really
-	   do any actual deleting as other processes might still be using
-	   the data. also it's actually useful as some index views are still
-	   able to ask cached data from messages that have already been
+	/* we'll only update the deleted record count in the header. we can't
+	   really do any actual deleting as other processes might still be
+	   using the data. also it's actually useful as old index views are
+	   still able to ask cached data for messages that have already been
 	   expunged. */
-	memset(&loop_track, 0, sizeof(loop_track));
-	while (offset != 0 &&
-	       (ret = mail_cache_get_record(cache, offset, &rec)) == 0) {
-		if (mail_cache_track_loops(&loop_track, offset, rec->size)) {
-			mail_cache_set_corrupted(cache,
-						 "record list is circular");
-			return -1;
-		}
-
-		cache->hdr_copy.deleted_space += rec->size;
-		offset = rec->prev_offset;
-	}
-	return ret;
+	cache->hdr_copy.deleted_record_count++;
+	if (cache->hdr_copy.record_count > 0)
+		cache->hdr_copy.record_count--;
+	cache->hdr_modified = TRUE;
 }
-
-int mail_cache_delete(struct mail_cache *cache, uint32_t offset)
-{
-	int ret;
-
-	i_assert(cache->locked);
-	T_BEGIN {
-		ret = mail_cache_delete_real(cache, offset);
-	} T_END;
-	cache->hdr_modified = TRUE;
-	return ret;
-}
diff -r 8323e81785e2 -r debecd057f9c src/lib-index/mail-cache.c
--- a/src/lib-index/mail-cache.c	Sat Nov 03 19:37:47 2012 +0200
+++ b/src/lib-index/mail-cache.c	Sat Nov 03 20:25:17 2012 +0200
@@ -197,30 +197,55 @@
 {
 	const struct mail_cache_header *hdr = cache->hdr;
 	struct stat st;
-	unsigned int cont_percentage;
-	uoff_t file_size, max_del_space;
+	unsigned int msg_count;
+	unsigned int records_count, cont_percentage, delete_percentage;
+	bool want_compress = FALSE;
 
-	if (fstat(cache->fd, &st) < 0) {
-		if (!ESTALE_FSTAT(errno))
-			mail_cache_set_syscall_error(cache, "fstat()");
+	if (hdr->minor_version == 0) {
+		/* compress to get ourself into the new header version */
+		cache->need_compress_file_seq = hdr->file_seq;
 		return;
 	}
-	file_size = st.st_size;
 
-        cont_percentage = hdr->continued_record_count * 100 /
-		(cache->index->map->rec_map->records_count == 0 ? 1 :
-		 cache->index->map->rec_map->records_count);
-	if (cont_percentage >= MAIL_CACHE_COMPRESS_CONTINUED_PERCENTAGE &&
-	    file_size >= MAIL_CACHE_COMPRESS_MIN_SIZE) {
-		/* too many continued rows, compress */
-		cache->need_compress_file_seq = hdr->file_seq;
+	msg_count = cache->index->map->rec_map->records_count;
+	if (msg_count == 0)
+		records_count = 1;
+	else if (hdr->record_count == 0 || hdr->record_count > msg_count*2) {
+		/* probably not the real record_count, but hole offset that
+		   Dovecot <=v2.1 versions used to use in this position.
+		   we already checked that minor_version>0, but this could
+		   happen if old Dovecot was used to access mailbox after
+		   it had been updated. */
+		records_count = I_MAX(msg_count, 1);
+		cache->hdr_copy.record_count = msg_count;
+		cache->hdr_modified = TRUE;
+	} else {
+		records_count = hdr->record_count;
 	}
 
-	/* see if we've reached the max. deleted space in file */
-	max_del_space = file_size / 100 * MAIL_CACHE_COMPRESS_PERCENTAGE;
-	if (hdr->deleted_space >= max_del_space &&
-	    file_size >= MAIL_CACHE_COMPRESS_MIN_SIZE)
-		cache->need_compress_file_seq = hdr->file_seq;
+	cont_percentage = hdr->continued_record_count * 100 / records_count;
+	if (cont_percentage >= MAIL_CACHE_COMPRESS_CONTINUED_PERCENTAGE) {
+		/* too many continued rows, compress */
+		want_compress = TRUE;
+	}
+
+	delete_percentage = hdr->deleted_record_count * 100 /
+		(records_count + hdr->deleted_record_count);
+	if (delete_percentage >= MAIL_CACHE_COMPRESS_DELETE_PERCENTAGE) {
+		/* too many deleted records, compress */
+		want_compress = TRUE;
+	}
+
+	if (want_compress) {
+		if (fstat(cache->fd, &st) < 0) {
+			if (!ESTALE_FSTAT(errno))
+				mail_cache_set_syscall_error(cache, "fstat()");
+			return;
+		}
+		if (st.st_size >= MAIL_CACHE_COMPRESS_MIN_SIZE)
+			cache->need_compress_file_seq = hdr->file_seq;
+	}
+
 }
 
 static bool mail_cache_verify_header(struct mail_cache *cache,


More information about the dovecot-cvs mailing list