[dovecot-cvs] dovecot/src/plugins/fts fts-storage.c,1.13,1.14
tss at dovecot.org
tss at dovecot.org
Thu Dec 21 13:16:35 UTC 2006
Update of /var/lib/cvs/dovecot/src/plugins/fts
In directory talvi:/tmp/cvs-serv24065
Modified Files:
fts-storage.c
Log Message:
Make indexing non-blocking. Optimize also header searches.
Index: fts-storage.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts/fts-storage.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- fts-storage.c 20 Dec 2006 21:26:37 -0000 1.13
+++ fts-storage.c 21 Dec 2006 13:16:32 -0000 1.14
@@ -17,6 +17,8 @@
*((void **)array_idx_modifiable(&(obj)->module_contexts, \
fts_storage_module_id))
+#define FTS_SEARCH_NONBLOCK_COUNT 10
+
struct fts_mailbox {
struct mailbox_vfuncs super;
struct fts_backend *backend_exact;
@@ -30,10 +32,25 @@
ARRAY_TYPE(seq_range) result;
unsigned int result_pos;
+ struct mail_search_arg *args, *best_arg;
struct fts_backend *backend;
+ struct fts_storage_build_context *build_ctx;
+
unsigned int locked:1;
};
+struct fts_storage_build_context {
+ struct mail_search_context *search_ctx;
+ struct mail_search_seqset seqset;
+ struct mail_search_arg search_arg;
+ struct mail *mail;
+ struct fts_backend_build_context *build;
+
+ uint32_t uid;
+ string_t *headers;
+ bool save_part;
+};
+
struct fts_transaction_context {
bool expunges;
};
@@ -83,13 +100,6 @@
return 0;
}
-struct fts_storage_build_context {
- struct fts_backend_build_context *build;
- uint32_t uid;
- string_t *headers;
- bool save_part;
-};
-
static int fts_build_mail_flush(struct fts_storage_build_context *ctx)
{
if (str_len(ctx->headers) == 0)
@@ -140,8 +150,7 @@
return fts_build_mail_flush(ctx);
}
-static int
-fts_build_mail(struct fts_storage_build_context *ctx, struct mail *mail)
+static int fts_build_mail(struct fts_storage_build_context *ctx)
{
struct istream *input;
struct message_parser_ctx *parser;
@@ -150,9 +159,9 @@
struct message_part *prev_part, *skip_part;
int ret;
- ctx->uid = mail->uid;
+ ctx->uid = ctx->mail->uid;
- input = mail_get_stream(mail, NULL, NULL);
+ input = mail_get_stream(ctx->mail, NULL, NULL);
if (input == NULL)
return -1;
@@ -196,7 +205,7 @@
break;
}
} else {
- if (fts_backend_build_more(ctx->build, mail->uid,
+ if (fts_backend_build_more(ctx->build, ctx->mail->uid,
block.data,
block.size) < 0) {
ret = -1;
@@ -209,16 +218,14 @@
return ret;
}
-static int fts_build_new(struct fts_backend *backend,
- struct mailbox_transaction_context *t)
+static int fts_build_init(struct fts_search_context *fctx,
+ struct mailbox_transaction_context *t)
{
- struct fts_storage_build_context ctx;
- struct mail_search_context *search_ctx;
+ struct fts_backend *backend = fctx->backend;
+ struct fts_storage_build_context *ctx;
+ struct fts_backend_build_context *build;
struct mail_search_seqset seqset;
- struct mail_search_arg search_arg;
- struct mail *mail;
uint32_t last_uid, last_uid_locked;
- int ret = 0;
if (fts_backend_get_last_uid(backend, &last_uid) < 0)
return -1;
@@ -232,8 +239,7 @@
return 0;
}
- memset(&ctx, 0, sizeof(ctx));
- ctx.build = fts_backend_build_init(backend, &last_uid_locked);
+ build = fts_backend_build_init(backend, &last_uid_locked);
if (last_uid != last_uid_locked) {
/* changed, need to get again the sequences */
i_assert(last_uid < last_uid_locked);
@@ -241,138 +247,292 @@
last_uid = last_uid_locked;
if (mailbox_get_uids(t->box, last_uid+1, (uint32_t)-1,
&seqset.seq1, &seqset.seq2) < 0) {
- (void)fts_backend_build_deinit(ctx.build);
+ (void)fts_backend_build_deinit(build);
return -1;
}
if (seqset.seq1 == 0) {
/* no new messages */
- (void)fts_backend_build_deinit(ctx.build);
+ (void)fts_backend_build_deinit(build);
return 0;
}
}
- memset(&search_arg, 0, sizeof(search_arg));
- search_arg.type = SEARCH_SEQSET;
- search_arg.value.seqset = &seqset;
- ctx.headers = str_new(default_pool, 512);
- mail = mail_alloc(t, 0, NULL);
- search_ctx = mailbox_search_init(t, NULL, &search_arg, NULL);
- while (mailbox_search_next(search_ctx, mail) > 0) {
- t_push();
- ret = fts_build_mail(&ctx, mail);
- t_pop();
+ ctx = i_new(struct fts_storage_build_context, 1);
+ ctx->build = build;
+ ctx->seqset = seqset;
+ ctx->search_arg.type = SEARCH_SEQSET;
+ ctx->search_arg.value.seqset = &ctx->seqset;
- if (ret < 0)
- break;
- }
- if (mailbox_search_deinit(&search_ctx) < 0)
+ ctx->headers = str_new(default_pool, 512);
+ ctx->mail = mail_alloc(t, 0, NULL);
+ ctx->search_ctx = mailbox_search_init(t, NULL, &ctx->search_arg, NULL);
+
+ fctx->build_ctx = ctx;
+ return 0;
+}
+
+static int fts_build_deinit(struct fts_storage_build_context *ctx)
+{
+ int ret = 0;
+
+ if (mailbox_search_deinit(&ctx->search_ctx) < 0)
ret = -1;
- mail_free(&mail);
+ mail_free(&ctx->mail);
- if (fts_backend_build_deinit(ctx.build) < 0)
+ if (fts_backend_build_deinit(ctx->build) < 0)
ret = -1;
- str_free(&ctx.headers);
+ str_free(&ctx->headers);
+ i_free(ctx);
return ret;
}
-#define SEARCH_ARG_IS_OPTIMIZABLE(backend, arg) \
- ((arg)->type == SEARCH_BODY_FAST || (arg)->type == SEARCH_TEXT_FAST || \
- ((backend) != NULL && \
- ((backend)->flags & FTS_BACKEND_FLAG_EXACT_LOOKUPS) != 0 && \
- ((arg)->type == SEARCH_BODY || (arg)->type == SEARCH_TEXT)))
+static int fts_build_more(struct fts_storage_build_context *ctx)
+{
+ unsigned int count = 0;
+ int ret;
+ while (mailbox_search_next(ctx->search_ctx, ctx->mail) > 0) {
+ t_push();
+ ret = fts_build_mail(ctx);
+ t_pop();
-static struct mail_search_context *
-fts_mailbox_search_init(struct mailbox_transaction_context *t,
- const char *charset, struct mail_search_arg *args,
- const enum mail_sort_type *sort_program)
-{
- struct fts_mailbox *fbox = FTS_CONTEXT(t->box);
- struct mail_search_context *ctx;
- struct fts_search_context *fctx;
- struct fts_backend *backend = NULL;
- struct mail_search_arg *tmp;
- ARRAY_TYPE(seq_range) uid_result;
+ if (ret < 0)
+ return -1;
- ctx = fbox->super.search_init(t, charset, args, sort_program);
+ if (++count == FTS_SEARCH_NONBLOCK_COUNT)
+ return 0;
+ }
+ return 1;
+}
- fctx = i_new(struct fts_search_context, 1);
- array_idx_set(&ctx->module_contexts, fts_storage_module_id, &fctx);
+static void fts_search_filter_args(struct fts_search_context *fctx,
+ struct mail_search_arg *args,
+ ARRAY_TYPE(seq_range) *uid_result)
+{
+ const char *key;
- if (fbox->backend_exact == NULL && fbox->backend_fast == NULL)
- return ctx;
+ for (; args != NULL; args = args->next) {
+ switch (args->type) {
+ case SEARCH_BODY_FAST:
+ case SEARCH_TEXT_FAST:
+ if ((fctx->backend->flags &
+ FTS_BACKEND_FLAG_EXACT_LOOKUPS) == 0)
+ break;
+ /* fall through */
+ case SEARCH_BODY:
+ case SEARCH_TEXT:
+ case SEARCH_HEADER:
+ if (args == fctx->best_arg) {
+ /* already handled this one */
+ break;
+ }
- /* FIXME: handle AND/OR. Maybe also header lookups? */
+ key = args->value.str;
+ if (*key == '\0') {
+ i_assert(args->type == SEARCH_HEADER);
- /* first check if we can use backend_fast */
- for (tmp = args; tmp != NULL; tmp = tmp->next) {
- if (args->type == SEARCH_BODY_FAST ||
- args->type == SEARCH_TEXT_FAST)
- break;
- }
- if (tmp != NULL) {
- /* yes */
- backend = fbox->backend_fast;
- } else if (fbox->backend_exact != NULL) {
- /* nope. what about backend_exact? */
- for (tmp = args; tmp != NULL; tmp = tmp->next) {
- if (args->type == SEARCH_BODY ||
- args->type == SEARCH_TEXT) {
- backend = fbox->backend_exact;
+ /* we're only checking the existence
+ of the header. */
+ key = args->hdr_field_name;
+ }
+
+ if (fts_backend_filter(fctx->backend, key,
+ uid_result) < 0) {
+ /* failed, but we already have limited
+ the search, so just ignore this */
break;
}
+ if (args->type != SEARCH_HEADER &&
+ (fctx->backend->flags &
+ FTS_BACKEND_FLAG_DEFINITE_LOOKUPS) != 0) {
+ args->match_always = TRUE;
+ args->result = 1;
+ }
+ break;
+ case SEARCH_OR:
+ case SEARCH_SUB:
+ fts_search_filter_args(fctx, args->value.subargs,
+ uid_result);
+ break;
+ default:
+ break;
}
}
- args = tmp;
-
- if (backend == NULL)
- return ctx;
+}
- /* update the backend */
- if (fts_build_new(backend, t) < 0)
- return ctx;
+static void fts_search_init(struct mailbox *box,
+ struct fts_search_context *fctx)
+{
+ struct fts_backend *backend = fctx->backend;
+ const char *key;
+ ARRAY_TYPE(seq_range) uid_result;
if (fts_backend_lock(backend) <= 0)
- return ctx;
- fctx->backend = backend;
+ return;
fctx->locked = TRUE;
+ key = fctx->best_arg->value.str;
+ if (*key == '\0') {
+ i_assert(fctx->best_arg->type == SEARCH_HEADER);
+
+ /* we're only checking the existence
+ of the header. */
+ key = fctx->best_arg->hdr_field_name;
+ }
+
i_array_init(&uid_result, 64);
- if (fts_backend_lookup(backend, args->value.str, &uid_result) < 0) {
+ if (fts_backend_lookup(backend, key, &uid_result) < 0) {
/* failed, fallback to reading everything */
array_free(&uid_result);
+ return;
}
if ((backend->flags & FTS_BACKEND_FLAG_DEFINITE_LOOKUPS) != 0) {
- args->match_always = TRUE;
- args->result = 1;
+ fctx->best_arg->match_always = TRUE;
+ fctx->best_arg->result = 1;
}
- args = args->next;
- while (args != NULL) {
- if (SEARCH_ARG_IS_OPTIMIZABLE(backend, args)) {
- if ((backend->flags &
- FTS_BACKEND_FLAG_DEFINITE_LOOKUPS) != 0) {
+
+ fts_search_filter_args(fctx, fctx->args, &uid_result);
+
+ (void)uid_range_to_seq(box, &uid_result, &fctx->result);
+ array_free(&uid_result);
+}
+
+static bool arg_is_better(const struct mail_search_arg *new_arg,
+ const struct mail_search_arg *old_arg)
+{
+ if (old_arg == NULL)
+ return TRUE;
+
+ /* prefer not to use headers. they have a larger possibility of
+ having lots of identical strings */
+ if (old_arg->type == SEARCH_HEADER)
+ return TRUE;
+ else if (new_arg->type == SEARCH_HEADER)
+ return FALSE;
+
+ return strlen(new_arg->value.str) > strlen(old_arg->value.str);
+}
+
+static void fts_search_args_check(struct mail_search_arg *args,
+ bool *have_fast_r, bool *have_exact_r,
+ struct mail_search_arg **best_fast_arg,
+ struct mail_search_arg **best_exact_arg)
+{
+ for (; args != NULL; args = args->next) {
+ switch (args->type) {
+ case SEARCH_BODY_FAST:
+ case SEARCH_TEXT_FAST:
+ if (*args->value.str == '\0') {
+ /* this matches everything */
args->match_always = TRUE;
args->result = 1;
+ break;
}
- if (fts_backend_filter(backend, args->value.str,
- &uid_result) < 0) {
- /* failed, but we already have limited
- the search, so just ignore this */
+ if (arg_is_better(args, *best_fast_arg)) {
+ *best_fast_arg = args;
+ *have_fast_r = TRUE;
+ }
+ break;
+ case SEARCH_BODY:
+ case SEARCH_TEXT:
+ if (*args->value.str == '\0') {
+ /* this matches everything */
+ args->match_always = TRUE;
+ args->result = 1;
break;
}
+ case SEARCH_HEADER:
+ if (arg_is_better(args, *best_exact_arg)) {
+ *best_exact_arg = args;
+ *have_exact_r = TRUE;
+ }
+ break;
+ case SEARCH_OR:
+ case SEARCH_SUB:
+ fts_search_args_check(args->value.subargs,
+ have_fast_r, have_exact_r,
+ best_fast_arg, best_exact_arg);
+ break;
+ default:
+ break;
}
- args = args->next;
}
+}
- if (array_is_created(&uid_result)) {
- (void)uid_range_to_seq(t->box, &uid_result, &fctx->result);
- array_free(&uid_result);
+static struct mail_search_context *
+fts_mailbox_search_init(struct mailbox_transaction_context *t,
+ const char *charset, struct mail_search_arg *args,
+ const enum mail_sort_type *sort_program)
+{
+ struct fts_mailbox *fbox = FTS_CONTEXT(t->box);
+ struct mail_search_context *ctx;
+ struct fts_search_context *fctx;
+ struct mail_search_arg *best_fast_arg, *best_exact_arg;
+ bool have_fast, have_exact;
+
+ ctx = fbox->super.search_init(t, charset, args, sort_program);
+
+ fctx = i_new(struct fts_search_context, 1);
+ fctx->args = args;
+ array_idx_set(&ctx->module_contexts, fts_storage_module_id, &fctx);
+
+ if (fbox->backend_exact == NULL && fbox->backend_fast == NULL)
+ return ctx;
+
+ have_fast = have_exact = FALSE;
+ best_fast_arg = best_exact_arg = NULL;
+ fts_search_args_check(args, &have_fast, &have_exact,
+ &best_fast_arg, &best_exact_arg);
+ if (have_fast && fbox->backend_fast != NULL) {
+ /* use fast backend whenever possible */
+ fctx->backend = fbox->backend_fast;
+ fctx->best_arg = best_fast_arg;
+ } else if (have_exact || have_fast) {
+ fctx->backend = fbox->backend_exact;
+ fctx->best_arg = arg_is_better(best_exact_arg, best_fast_arg) ?
+ best_exact_arg : best_fast_arg;
}
+
+ if (fctx->backend != NULL) {
+ if (fts_build_init(fctx, t) < 0)
+ fctx->backend = NULL;
+ else if (fctx->build_ctx == NULL) {
+ /* the index was up to date */
+ fts_search_init(t->box, fctx);
+ }
+ }
+
return ctx;
}
+static int fts_mailbox_search_next_nonblock(struct mail_search_context *ctx,
+ struct mail *mail, bool *tryagain_r)
+{
+ struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
+ struct fts_search_context *fctx = FTS_CONTEXT(ctx);
+ int ret;
+
+ if (fctx->build_ctx != NULL) {
+ /* still building the index */
+ ret = fts_build_more(fctx->build_ctx);
+ if (ret == 0) {
+ *tryagain_r = TRUE;
+ return 0;
+ }
+
+ /* finished / error */
+ fts_build_deinit(fctx->build_ctx);
+ fctx->build_ctx = NULL;
+
+ if (ret == 0)
+ fts_search_init(ctx->transaction->box, fctx);
+ }
+ return fbox->super.search_next_nonblock(ctx, mail, tryagain_r);
+
+}
+
static int fts_mailbox_search_next_update_seq(struct mail_search_context *ctx)
{
struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
@@ -418,6 +578,11 @@
struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
struct fts_search_context *fctx = FTS_CONTEXT(ctx);
+ if (fctx->build_ctx != NULL) {
+ /* the search was cancelled */
+ fts_build_deinit(fctx->build_ctx);
+ }
+
if (fctx->locked)
fts_backend_unlock(fctx->backend);
@@ -574,6 +739,7 @@
fbox->super = box->v;
box->v.close = fts_mailbox_close;
box->v.search_init = fts_mailbox_search_init;
+ box->v.search_next_nonblock = fts_mailbox_search_next_nonblock;
box->v.search_next_update_seq = fts_mailbox_search_next_update_seq;
box->v.search_deinit = fts_mailbox_search_deinit;
box->v.mail_alloc = fts_mail_alloc;
More information about the dovecot-cvs
mailing list