dovecot: Initial code to support expunging from squat indexes, p...
dovecot at dovecot.org
dovecot at dovecot.org
Sun Feb 3 22:44:13 EET 2008
details: http://hg.dovecot.org/dovecot/rev/f5f77a3ae203
changeset: 7210:f5f77a3ae203
user: Timo Sirainen <tss at iki.fi>
date: Sun Feb 03 22:44:09 2008 +0200
description:
Initial code to support expunging from squat indexes, probably still buggy.
diffstat:
6 files changed, 316 insertions(+), 41 deletions(-)
src/plugins/fts-squat/fts-backend-squat.c | 33 +++
src/plugins/fts-squat/squat-test.c | 2
src/plugins/fts-squat/squat-trie.c | 259 +++++++++++++++++++++++++----
src/plugins/fts-squat/squat-trie.h | 7
src/plugins/fts-squat/squat-uidlist.c | 50 +++++
src/plugins/fts-squat/squat-uidlist.h | 6
diffs (truncated from 511 to 300 lines):
diff -r d7d885b6dd46 -r f5f77a3ae203 src/plugins/fts-squat/fts-backend-squat.c
--- a/src/plugins/fts-squat/fts-backend-squat.c Sun Feb 03 22:42:46 2008 +0200
+++ b/src/plugins/fts-squat/fts-backend-squat.c Sun Feb 03 22:44:09 2008 +0200
@@ -102,14 +102,45 @@ fts_backend_squat_build_more(struct fts_
data, size);
}
+static int get_all_msg_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids)
+{
+ struct mailbox_transaction_context *t;
+ struct mail_search_context *search_ctx;
+ struct mail_search_arg search_arg;
+ struct mail *mail;
+ int ret = 0;
+
+ t = mailbox_transaction_begin(box, 0);
+ memset(&search_arg, 0, sizeof(search_arg));
+ search_arg.type = SEARCH_ALL;
+
+ mail = mail_alloc(t, 0, NULL);
+ search_ctx = mailbox_search_init(t, NULL, &search_arg, NULL);
+ while ((ret = mailbox_search_next(search_ctx, mail)) > 0)
+ seq_range_array_add(uids, 0, mail->uid);
+ if (mailbox_search_deinit(&search_ctx) < 0)
+ ret = -1;
+ mail_free(&mail);
+ (void)mailbox_transaction_commit(&t);
+ return ret;
+}
+
static int
fts_backend_squat_build_deinit(struct fts_backend_build_context *_ctx)
{
struct squat_fts_backend_build_context *ctx =
(struct squat_fts_backend_build_context *)_ctx;
+ ARRAY_TYPE(seq_range) uids;
int ret;
- ret = squat_trie_build_deinit(&ctx->build_ctx);
+ i_array_init(&uids, 1024);
+ if (get_all_msg_uids(ctx->ctx.backend->box, &uids) < 0)
+ ret = squat_trie_build_deinit(&ctx->build_ctx, NULL);
+ else {
+ seq_range_array_invert(&uids, 1, (uint32_t)-2);
+ ret = squat_trie_build_deinit(&ctx->build_ctx, &uids);
+ }
+ array_free(&uids);
i_free(ctx);
return ret;
}
diff -r d7d885b6dd46 -r f5f77a3ae203 src/plugins/fts-squat/squat-test.c
--- a/src/plugins/fts-squat/squat-test.c Sun Feb 03 22:42:46 2008 +0200
+++ b/src/plugins/fts-squat/squat-test.c Sun Feb 03 22:44:09 2008 +0200
@@ -129,7 +129,7 @@ int main(int argc ATTR_UNUSED, char *arg
}
}
buffer_free(&valid);
- if (squat_trie_build_deinit(&build_ctx) < 0)
+ if (squat_trie_build_deinit(&build_ctx, NULL) < 0)
ret = -1;
if (ret < 0) {
printf("build broken\n");
diff -r d7d885b6dd46 -r f5f77a3ae203 src/plugins/fts-squat/squat-trie.c
--- a/src/plugins/fts-squat/squat-trie.c Sun Feb 03 22:42:46 2008 +0200
+++ b/src/plugins/fts-squat/squat-trie.c Sun Feb 03 22:44:09 2008 +0200
@@ -43,6 +43,7 @@ struct squat_trie_build_context {
struct squat_trie_iterate_node {
struct squat_node *node;
+ ARRAY_TYPE(seq_range) shifts;
unsigned int idx;
};
@@ -1013,8 +1014,16 @@ static int
static int
squat_trie_iterate_deinit(struct squat_trie_iterate_context *ctx)
{
+ struct squat_trie_iterate_node *nodes;
+ unsigned int i, count;
int ret = ctx->failed ? -1 : 0;
+ if (array_is_created(&ctx->cur.shifts)) {
+ nodes = array_get_modifiable(&ctx->parents, &count);
+ for (i = 0; i < count; i++)
+ array_free(&nodes[i].shifts);
+ array_free(&ctx->cur.shifts);
+ }
array_free(&ctx->parents);
i_free(ctx);
return ret;
@@ -1033,70 +1042,253 @@ squat_trie_iterate_first(struct squat_tr
}
static struct squat_node *
-squat_trie_iterate_next(struct squat_trie_iterate_context *ctx)
+squat_trie_iterate_next(struct squat_trie_iterate_context *ctx,
+ ARRAY_TYPE(seq_range) *shifts_r)
{
struct squat_trie_iterate_node *iter_nodes;
struct squat_node *children;
- unsigned int count;
-
- while (ctx->cur.idx == ctx->cur.node->child_count) {
+ unsigned int count, shift_count = 0;
+
+ while (ctx->cur.idx == ctx->cur.node->child_count ||
+ ctx->cur.node->uid_list_idx == 0)
+ {
iter_nodes = array_get_modifiable(&ctx->parents, &count);
if (count == 0)
return NULL;
+
+ if (array_is_created(&ctx->cur.shifts))
+ array_free(&ctx->cur.shifts);
ctx->cur = iter_nodes[count-1];
array_delete(&ctx->parents, count-1, 1);
}
- ctx->cur.idx++;
+ *shifts_r = ctx->cur.shifts;
+ if (array_is_created(&ctx->cur.shifts)) {
+ shift_count = array_count(&ctx->cur.shifts);
+ i_assert(shift_count > 0);
+ }
+
+ children = NODE_CHILDREN_NODES(ctx->cur.node);
+ while (children[ctx->cur.idx++].uid_list_idx == 0) {
+ if (ctx->cur.idx == ctx->cur.node->child_count) {
+ /* no more non-empty children in this node */
+ return squat_trie_iterate_next(ctx, shifts_r);
+ }
+ }
array_append(&ctx->parents, &ctx->cur, 1);
- children = NODE_CHILDREN_NODES(ctx->cur.node);
ctx->cur.node = &children[ctx->cur.idx-1];
ctx->cur.idx = 0;
+ if (shift_count != 0)
+ i_array_init(&ctx->cur.shifts, shift_count);
return ctx->cur.node;
}
+static void
+squat_uidlist_update_expunged_uids(const ARRAY_TYPE(seq_range) *shifts_arr,
+ ARRAY_TYPE(seq_range) *child_shifts,
+ ARRAY_TYPE(seq_range) *uids_arr,
+ bool do_shifts)
+{
+ const struct seq_range *shifts;
+ struct seq_range *uids, shift;
+ unsigned int i, uid_idx, uid_count, shift_count;
+ uint32_t child_shift_seq1, child_shift_count, seq_high;
+ unsigned int shift_sum = 0, child_sum = 0;
+
+ /* self shift */
+ uids = array_get_modifiable(uids_arr, &uid_count);
+ shifts = array_get(shifts_arr, &shift_count);
+ for (i = 0, uid_idx = 0, seq_high = 0;; ) {
+ /* skip UID ranges until we skip/overlap shifts */
+ while (uid_idx < uid_count &&
+ (i == shift_count ||
+ I_MAX(shifts[i].seq1, seq_high) > uids[uid_idx].seq2))
+ {
+ i_assert(uids[uid_idx].seq1 >= shift_sum);
+ uids[uid_idx].seq1 -= shift_sum;
+ uids[uid_idx].seq2 -= shift_sum;
+ child_sum += uids[uid_idx].seq2 -
+ uids[uid_idx].seq1 + 1;
+
+ if (uid_idx > 0 &&
+ uids[uid_idx-1].seq2 == uids[uid_idx].seq1 - 1) {
+ /* we can merge this and the previous range */
+ uids[uid_idx-1].seq2 = uids[uid_idx].seq2;
+ array_delete(uids_arr, uid_idx, 1);
+ uids = array_get_modifiable(uids_arr,
+ &uid_count);
+ } else {
+ uid_idx++;
+ }
+ }
+ if (uid_idx == uid_count)
+ break;
+
+ shift.seq1 = I_MAX(shifts[i].seq1, seq_high);
+ shift.seq2 = shifts[i].seq2;
+ if (shift.seq2 < uids[uid_idx].seq1) {
+ /* shift is entirely before UID range */
+ shift_sum += shift.seq2 - shift.seq1 + 1;
+ i++;
+ } else {
+ /* handle shifts before UID range */
+ if (shift.seq1 < uids[uid_idx].seq1) {
+ shift_sum += uids[uid_idx].seq1 - shift.seq1;
+ shift.seq1 = uids[uid_idx].seq1;
+ }
+ /* update child shifts */
+ child_shift_seq1 = child_sum +
+ shift.seq1 - uids[uid_idx].seq1;
+ child_shift_count =
+ I_MIN(shift.seq2, uids[uid_idx].seq2) -
+ shift.seq1 + 1;
+ seq_range_array_add_range(child_shifts,
+ child_shift_seq1,
+ child_shift_seq1 +
+ child_shift_count - 1);
+ child_sum += child_shift_count;
+
+ /* if the shifts continue after the UID range,
+ treat it in the next loop iteration */
+ if (shift.seq2 <= uids[uid_idx].seq2)
+ i++;
+ else
+ seq_high = uids[uid_idx].seq2 + 1;
+
+ /* update UIDs - no matter where within the UID range
+ the shifts happened, the result is the same:
+ shift number of UIDs are removed, and the rest
+ are decreased by shift_sum.
+
+ 123 uids child_shifts
+ a) s -> 12 1
+ b) s -> 12 2
+ c) s -> 12 3
+ */
+ if (uids[uid_idx].seq1 +
+ child_shift_count > uids[uid_idx].seq2) {
+ /* removed completely */
+ array_delete(uids_arr, uid_idx, 1);
+ uids = array_get_modifiable(uids_arr,
+ &uid_count);
+ } else {
+ /* the next loop iteration fixes the UIDs */
+ uids[uid_idx].seq1 += child_shift_count;
+ }
+ shift_sum += child_shift_count;
+ }
+ if (!do_shifts) {
+ /* root node - UIDs are only removed, not shifted */
+ shift_sum = 0;
+ }
+ }
+}
+
static int
-squat_trie_renumber_uidlists(struct squat_trie_build_context *ctx,
- bool compress)
-{
- struct squat_trie_iterate_context *iter;
+squat_trie_expunge_uidlists(struct squat_trie_build_context *ctx,
+ struct squat_uidlist_rebuild_context *rebuild_ctx,
+ struct squat_trie_iterate_context *iter,
+ const ARRAY_TYPE(seq_range) *expunged_uids)
+{
struct squat_node *node;
- struct squat_uidlist_rebuild_context *rebuild_ctx;
+ ARRAY_TYPE(seq_range) uid_range, shifts;
+ bool shift = FALSE;
+ int ret = 0;
+
+ node = squat_trie_iterate_first(iter);
+ if (node->uid_list_idx == 0)
+ return 0;
+
+ i_array_init(&uid_range, 1024);
+ i_array_init(&shifts, array_count(expunged_uids));
+ array_append_array(&shifts, expunged_uids);
+
+ while (node != NULL) {
+ i_assert(node->uid_list_idx != 0);
+ array_clear(&uid_range);
+ if (squat_uidlist_get_seqrange(ctx->trie->uidlist,
+ node->uid_list_idx,
+ &uid_range) < 0) {
+ ret = -1;
+ break;
+ }
+ squat_uidlist_update_expunged_uids(&shifts, &iter->cur.shifts,
+ &uid_range, shift);
+ node->uid_list_idx =
+ squat_uidlist_rebuild_nextu(rebuild_ctx, &uid_range);
+ if (node->uid_list_idx == 0) {
+ node->child_count = 0;
+ node->next_uid = 0;
+ }
+ node = squat_trie_iterate_next(iter, &shifts);
+ i_assert(array_count(&shifts) > 0);
+ shift = TRUE;
+ }
+ array_free(&uid_range);
+ return ret;
+}
+
+static int
+squat_trie_renumber_uidlists2(struct squat_trie_build_context *ctx,
+ struct squat_uidlist_rebuild_context *rebuild_ctx,
+ struct squat_trie_iterate_context *iter)
+{
+ struct squat_node *node;
+ ARRAY_TYPE(seq_range) shifts;
ARRAY_TYPE(uint32_t) uids;
- uint32_t new_uid_list_idx, max_count=0;
More information about the dovecot-cvs
mailing list