dovecot: Initial code to support expunging from squat indexes, p...

dovecot at dovecot.org dovecot at dovecot.org
Sun Feb 3 22:44:13 EET 2008


details:   http://hg.dovecot.org/dovecot/rev/f5f77a3ae203
changeset: 7210:f5f77a3ae203
user:      Timo Sirainen <tss at iki.fi>
date:      Sun Feb 03 22:44:09 2008 +0200
description:
Initial code to support expunging from squat indexes, probably still buggy.

diffstat:

6 files changed, 316 insertions(+), 41 deletions(-)
src/plugins/fts-squat/fts-backend-squat.c |   33 +++
src/plugins/fts-squat/squat-test.c        |    2 
src/plugins/fts-squat/squat-trie.c        |  259 +++++++++++++++++++++++++----
src/plugins/fts-squat/squat-trie.h        |    7 
src/plugins/fts-squat/squat-uidlist.c     |   50 +++++
src/plugins/fts-squat/squat-uidlist.h     |    6 

diffs (truncated from 511 to 300 lines):

diff -r d7d885b6dd46 -r f5f77a3ae203 src/plugins/fts-squat/fts-backend-squat.c
--- a/src/plugins/fts-squat/fts-backend-squat.c	Sun Feb 03 22:42:46 2008 +0200
+++ b/src/plugins/fts-squat/fts-backend-squat.c	Sun Feb 03 22:44:09 2008 +0200
@@ -102,14 +102,45 @@ fts_backend_squat_build_more(struct fts_
 				     data, size);
 }
 
+static int get_all_msg_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids)
+{
+	struct mailbox_transaction_context *t;
+	struct mail_search_context *search_ctx;
+	struct mail_search_arg search_arg;
+	struct mail *mail;
+	int ret = 0;
+
+	t = mailbox_transaction_begin(box, 0);
+	memset(&search_arg, 0, sizeof(search_arg));
+	search_arg.type = SEARCH_ALL;
+
+	mail = mail_alloc(t, 0, NULL);
+	search_ctx = mailbox_search_init(t, NULL, &search_arg, NULL);
+	while ((ret = mailbox_search_next(search_ctx, mail)) > 0)
+		seq_range_array_add(uids, 0, mail->uid);
+	if (mailbox_search_deinit(&search_ctx) < 0)
+		ret = -1;
+	mail_free(&mail);
+	(void)mailbox_transaction_commit(&t);
+	return ret;
+}
+
 static int
 fts_backend_squat_build_deinit(struct fts_backend_build_context *_ctx)
 {
 	struct squat_fts_backend_build_context *ctx =
 		(struct squat_fts_backend_build_context *)_ctx;
+	ARRAY_TYPE(seq_range) uids;
 	int ret;
 
-	ret = squat_trie_build_deinit(&ctx->build_ctx);
+	i_array_init(&uids, 1024);
+	if (get_all_msg_uids(ctx->ctx.backend->box, &uids) < 0)
+		ret = squat_trie_build_deinit(&ctx->build_ctx, NULL);
+	else {
+		seq_range_array_invert(&uids, 1, (uint32_t)-2);
+		ret = squat_trie_build_deinit(&ctx->build_ctx, &uids);
+	}
+	array_free(&uids);
 	i_free(ctx);
 	return ret;
 }
diff -r d7d885b6dd46 -r f5f77a3ae203 src/plugins/fts-squat/squat-test.c
--- a/src/plugins/fts-squat/squat-test.c	Sun Feb 03 22:42:46 2008 +0200
+++ b/src/plugins/fts-squat/squat-test.c	Sun Feb 03 22:44:09 2008 +0200
@@ -129,7 +129,7 @@ int main(int argc ATTR_UNUSED, char *arg
 		}
 	}
 	buffer_free(&valid);
-	if (squat_trie_build_deinit(&build_ctx) < 0)
+	if (squat_trie_build_deinit(&build_ctx, NULL) < 0)
 		ret = -1;
 	if (ret < 0) {
 		printf("build broken\n");
diff -r d7d885b6dd46 -r f5f77a3ae203 src/plugins/fts-squat/squat-trie.c
--- a/src/plugins/fts-squat/squat-trie.c	Sun Feb 03 22:42:46 2008 +0200
+++ b/src/plugins/fts-squat/squat-trie.c	Sun Feb 03 22:44:09 2008 +0200
@@ -43,6 +43,7 @@ struct squat_trie_build_context {
 
 struct squat_trie_iterate_node {
 	struct squat_node *node;
+	ARRAY_TYPE(seq_range) shifts;
 	unsigned int idx;
 };
 
@@ -1013,8 +1014,16 @@ static int
 static int
 squat_trie_iterate_deinit(struct squat_trie_iterate_context *ctx)
 {
+	struct squat_trie_iterate_node *nodes;
+	unsigned int i, count;
 	int ret = ctx->failed ? -1 : 0;
 
+	if (array_is_created(&ctx->cur.shifts)) {
+		nodes = array_get_modifiable(&ctx->parents, &count);
+		for (i = 0; i < count; i++)
+			array_free(&nodes[i].shifts);
+		array_free(&ctx->cur.shifts);
+	}
 	array_free(&ctx->parents);
 	i_free(ctx);
 	return ret;
@@ -1033,70 +1042,253 @@ squat_trie_iterate_first(struct squat_tr
 }
 
 static struct squat_node *
-squat_trie_iterate_next(struct squat_trie_iterate_context *ctx)
+squat_trie_iterate_next(struct squat_trie_iterate_context *ctx,
+			ARRAY_TYPE(seq_range) *shifts_r)
 {
 	struct squat_trie_iterate_node *iter_nodes;
 	struct squat_node *children;
-	unsigned int count;
-
-	while (ctx->cur.idx == ctx->cur.node->child_count) {
+	unsigned int count, shift_count = 0;
+
+	while (ctx->cur.idx == ctx->cur.node->child_count ||
+	       ctx->cur.node->uid_list_idx == 0)
+	{
 		iter_nodes = array_get_modifiable(&ctx->parents, &count);
 		if (count == 0)
 			return NULL;
+
+		if (array_is_created(&ctx->cur.shifts))
+			array_free(&ctx->cur.shifts);
 		ctx->cur = iter_nodes[count-1];
 		array_delete(&ctx->parents, count-1, 1);
 	}
 
-	ctx->cur.idx++;
+	*shifts_r = ctx->cur.shifts;
+	if (array_is_created(&ctx->cur.shifts)) {
+		shift_count = array_count(&ctx->cur.shifts);
+		i_assert(shift_count > 0);
+	}
+
+	children = NODE_CHILDREN_NODES(ctx->cur.node);
+	while (children[ctx->cur.idx++].uid_list_idx == 0) {
+		if (ctx->cur.idx == ctx->cur.node->child_count) {
+			/* no more non-empty children in this node */
+			return squat_trie_iterate_next(ctx, shifts_r);
+		}
+	}
 	array_append(&ctx->parents, &ctx->cur, 1);
-	children = NODE_CHILDREN_NODES(ctx->cur.node);
 	ctx->cur.node = &children[ctx->cur.idx-1];
 	ctx->cur.idx = 0;
+	if (shift_count != 0)
+		i_array_init(&ctx->cur.shifts, shift_count);
 	return ctx->cur.node;
 }
 
+static void
+squat_uidlist_update_expunged_uids(const ARRAY_TYPE(seq_range) *shifts_arr,
+				   ARRAY_TYPE(seq_range) *child_shifts,
+				   ARRAY_TYPE(seq_range) *uids_arr,
+				   bool do_shifts)
+{
+	const struct seq_range *shifts;
+	struct seq_range *uids, shift;
+	unsigned int i, uid_idx, uid_count, shift_count;
+	uint32_t child_shift_seq1, child_shift_count, seq_high;
+	unsigned int shift_sum = 0, child_sum = 0;
+
+	/* self shift */
+	uids = array_get_modifiable(uids_arr, &uid_count);
+	shifts = array_get(shifts_arr, &shift_count);
+	for (i = 0, uid_idx = 0, seq_high = 0;; ) {
+		/* skip UID ranges until we skip/overlap shifts */
+		while (uid_idx < uid_count &&
+		       (i == shift_count ||
+			I_MAX(shifts[i].seq1, seq_high) > uids[uid_idx].seq2))
+		{
+			i_assert(uids[uid_idx].seq1 >= shift_sum);
+			uids[uid_idx].seq1 -= shift_sum;
+			uids[uid_idx].seq2 -= shift_sum;
+			child_sum += uids[uid_idx].seq2 -
+				uids[uid_idx].seq1 + 1;
+
+			if (uid_idx > 0 &&
+			    uids[uid_idx-1].seq2 == uids[uid_idx].seq1 - 1) {
+				/* we can merge this and the previous range */
+				uids[uid_idx-1].seq2 = uids[uid_idx].seq2;
+				array_delete(uids_arr, uid_idx, 1);
+				uids = array_get_modifiable(uids_arr,
+							    &uid_count);
+			} else {
+				uid_idx++;
+			}
+		}
+		if (uid_idx == uid_count)
+			break;
+
+		shift.seq1 = I_MAX(shifts[i].seq1, seq_high);
+		shift.seq2 = shifts[i].seq2;
+		if (shift.seq2 < uids[uid_idx].seq1) {
+			/* shift is entirely before UID range */
+			shift_sum += shift.seq2 - shift.seq1 + 1;
+			i++;
+		} else {
+			/* handle shifts before UID range */
+			if (shift.seq1 < uids[uid_idx].seq1) {
+				shift_sum += uids[uid_idx].seq1 - shift.seq1;
+				shift.seq1 = uids[uid_idx].seq1;
+			}
+			/* update child shifts */
+			child_shift_seq1 = child_sum +
+				shift.seq1 - uids[uid_idx].seq1;
+			child_shift_count =
+				I_MIN(shift.seq2, uids[uid_idx].seq2) -
+				shift.seq1 + 1;
+			seq_range_array_add_range(child_shifts,
+						  child_shift_seq1,
+						  child_shift_seq1 +
+						  child_shift_count - 1);
+			child_sum += child_shift_count;
+
+			/* if the shifts continue after the UID range,
+			   treat it in the next loop iteration */
+			if (shift.seq2 <= uids[uid_idx].seq2)
+				i++;
+			else
+				seq_high = uids[uid_idx].seq2 + 1;
+
+			/* update UIDs - no matter where within the UID range
+			   the shifts happened, the result is the same:
+			   shift number of UIDs are removed, and the rest
+			   are decreased by shift_sum.
+
+			      123     uids child_shifts
+			   a) s    -> 12   1
+			   b)  s   -> 12   2
+			   c)   s  -> 12   3
+			*/
+			if (uids[uid_idx].seq1 +
+			    child_shift_count > uids[uid_idx].seq2) {
+				/* removed completely */
+				array_delete(uids_arr, uid_idx, 1);
+				uids = array_get_modifiable(uids_arr,
+							    &uid_count);
+			} else {
+				/* the next loop iteration fixes the UIDs */
+				uids[uid_idx].seq1 += child_shift_count;
+			}
+			shift_sum += child_shift_count;
+		}
+		if (!do_shifts) {
+			/* root node - UIDs are only removed, not shifted */
+			shift_sum = 0;
+		}
+	}
+}
+
 static int
-squat_trie_renumber_uidlists(struct squat_trie_build_context *ctx,
-			     bool compress)
-{
-	struct squat_trie_iterate_context *iter;
+squat_trie_expunge_uidlists(struct squat_trie_build_context *ctx,
+			    struct squat_uidlist_rebuild_context *rebuild_ctx,
+			    struct squat_trie_iterate_context *iter,
+			    const ARRAY_TYPE(seq_range) *expunged_uids)
+{
 	struct squat_node *node;
-	struct squat_uidlist_rebuild_context *rebuild_ctx;
+	ARRAY_TYPE(seq_range) uid_range, shifts;
+	bool shift = FALSE;
+	int ret = 0;
+
+	node = squat_trie_iterate_first(iter);
+	if (node->uid_list_idx == 0)
+		return 0;
+
+	i_array_init(&uid_range, 1024);
+	i_array_init(&shifts, array_count(expunged_uids));
+	array_append_array(&shifts, expunged_uids);
+
+	while (node != NULL) {
+		i_assert(node->uid_list_idx != 0);
+		array_clear(&uid_range);
+		if (squat_uidlist_get_seqrange(ctx->trie->uidlist,
+					       node->uid_list_idx,
+					       &uid_range) < 0) {
+			ret = -1;
+			break;
+		}
+		squat_uidlist_update_expunged_uids(&shifts, &iter->cur.shifts,
+						   &uid_range, shift);
+		node->uid_list_idx =
+			squat_uidlist_rebuild_nextu(rebuild_ctx, &uid_range);
+		if (node->uid_list_idx == 0) {
+			node->child_count = 0;
+			node->next_uid = 0;
+		}
+		node = squat_trie_iterate_next(iter, &shifts);
+		i_assert(array_count(&shifts) > 0);
+		shift = TRUE;
+	}
+	array_free(&uid_range);
+	return ret;
+}
+
+static int
+squat_trie_renumber_uidlists2(struct squat_trie_build_context *ctx,
+			      struct squat_uidlist_rebuild_context *rebuild_ctx,
+			      struct squat_trie_iterate_context *iter)
+{
+	struct squat_node *node;
+	ARRAY_TYPE(seq_range) shifts;
 	ARRAY_TYPE(uint32_t) uids;
-	uint32_t new_uid_list_idx, max_count=0;


More information about the dovecot-cvs mailing list