dovecot-2.2: mdbox rebuild: Use mail size instead of record size...
dovecot at dovecot.org
dovecot at dovecot.org
Wed Jan 9 07:02:36 EET 2013
details: http://hg.dovecot.org/dovecot-2.2/rev/b0bd7b2ff1c5
changeset: 15613:b0bd7b2ff1c5
user: Timo Sirainen <tss at iki.fi>
date: Wed Jan 09 06:57:34 2013 +0200
description:
mdbox rebuild: Use mail size instead of record size when guessing if the mails are the same.
diffstat:
src/lib-storage/index/dbox-multi/mdbox-storage-rebuild.c | 26 ++++++++++-----
1 files changed, 17 insertions(+), 9 deletions(-)
diffs (72 lines):
diff -r df06c834d7e8 -r b0bd7b2ff1c5 src/lib-storage/index/dbox-multi/mdbox-storage-rebuild.c
--- a/src/lib-storage/index/dbox-multi/mdbox-storage-rebuild.c Wed Jan 09 05:35:20 2013 +0200
+++ b/src/lib-storage/index/dbox-multi/mdbox-storage-rebuild.c Wed Jan 09 06:57:34 2013 +0200
@@ -24,7 +24,8 @@
guid_128_t guid_128;
uint32_t file_id;
uint32_t offset;
- uint32_t size;
+ uint32_t rec_size;
+ uoff_t mail_size;
uint32_t map_uid;
uint16_t refcount;
@@ -106,9 +107,9 @@
if ((*m1)->offset > (*m2)->offset)
return 1;
- if ((*m1)->size < (*m2)->size)
+ if ((*m1)->rec_size < (*m2)->rec_size)
return -1;
- if ((*m1)->size > (*m2)->size)
+ if ((*m1)->rec_size > (*m2)->rec_size)
return 1;
return 0;
}
@@ -175,7 +176,8 @@
rec = p_new(ctx->pool, struct mdbox_rebuild_msg, 1);
rec->file_id = file_id;
rec->offset = offset;
- rec->size = file->input->v_offset - offset;
+ rec->rec_size = file->input->v_offset - offset;
+ rec->mail_size = dbox_file_get_plaintext_size(file);
mail_generate_guid_128_hash(guid, rec->guid_128);
i_assert(!guid_128_is_empty(rec->guid_128));
array_append(&ctx->msgs, &rec, 1);
@@ -183,9 +185,15 @@
old_rec = hash_table_lookup(ctx->guid_hash, rec->guid_128);
if (old_rec == NULL)
hash_table_insert(ctx->guid_hash, rec->guid_128, rec);
- else if (rec->size == old_rec->size) {
- /* duplicate. save this as a refcount=0 to map,
- so it will eventually be deleted. */
+ else if (rec->mail_size == old_rec->mail_size) {
+ /* two mails' GUID and size are the same, which quite
+ likely means that their contents are the same as
+ well. we'll compare the mail sizes instead of the
+ record sizes, because the records' metadata may
+ differ.
+
+ save this duplicate mail with refcount=0 to the map,
+ so it will eventually be purged. */
rec->seen_zero_ref_in_map = TRUE;
} else {
/* duplicate GUID, but not a duplicate message. */
@@ -290,7 +298,7 @@
rec.file_id = msgs[i]->file_id;
rec.offset = msgs[i]->offset;
- rec.size = msgs[i]->size;
+ rec.size = msgs[i]->rec_size;
msgs[i]->map_uid = next_uid++;
mail_index_append(ctx->atomic->sync_trans,
@@ -326,7 +334,7 @@
the (file_id, offset, size) triplet */
search_msg.file_id = rec.rec.file_id;
search_msg.offset = rec.rec.offset;
- search_msg.size = rec.rec.size;
+ search_msg.rec_size = rec.rec.size;
pos = bsearch(&search_msgp, msgs, count, sizeof(*msgs),
mdbox_rebuild_msg_offset_cmp);
if (pos == NULL || (*pos)->map_uid != 0) {
More information about the dovecot-cvs
mailing list