dovecot-1.2: Finished making full text search indexes work fast ...
dovecot at dovecot.org
dovecot at dovecot.org
Sun Nov 30 01:27:25 EET 2008
details: http://hg.dovecot.org/dovecot-1.2/rev/ffb37c392166
changeset: 8495:ffb37c392166
user: Timo Sirainen <tss at iki.fi>
date: Sat Nov 29 22:39:44 2008 +0200
description:
Finished making full text search indexes work fast with virtual mailboxes.
diffstat:
21 files changed, 486 insertions(+), 40 deletions(-)
src/lib-storage/index/cydir/cydir-storage.c | 2
src/lib-storage/index/dbox/dbox-storage.c | 2
src/lib-storage/index/maildir/maildir-storage.c | 2
src/lib-storage/index/mbox/mbox-storage.c | 2
src/lib-storage/index/raw/raw-storage.c | 2
src/lib-storage/mail-storage-private.h | 6
src/lib-storage/mail-storage.c | 23 ++
src/lib-storage/mail-storage.h | 13 +
src/plugins/fts-lucene/fts-backend-lucene.c | 1
src/plugins/fts-solr/fts-backend-solr.c | 138 ++++++++++++-
src/plugins/fts-solr/solr-connection.c | 10
src/plugins/fts-solr/solr-connection.h | 2
src/plugins/fts-squat/fts-backend-squat.c | 1
src/plugins/fts/fts-api-private.h | 2
src/plugins/fts/fts-api.c | 6
src/plugins/fts/fts-api.h | 15 +
src/plugins/fts/fts-storage.c | 231 ++++++++++++++++++++---
src/plugins/fts/fts-storage.h | 11 +
src/plugins/virtual/virtual-config.c | 25 ++
src/plugins/virtual/virtual-storage.c | 29 ++
src/plugins/virtual/virtual-storage.h | 3
diffs (truncated from 906 to 300 lines):
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/index/cydir/cydir-storage.c
--- a/src/lib-storage/index/cydir/cydir-storage.c Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/index/cydir/cydir-storage.c Sat Nov 29 22:39:44 2008 +0200
@@ -447,6 +447,8 @@ struct mailbox cydir_mailbox = {
index_storage_get_uid_range,
index_storage_get_expunged_uids,
NULL,
+ NULL,
+ NULL,
index_mail_alloc,
index_header_lookup_init,
index_header_lookup_ref,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/index/dbox/dbox-storage.c
--- a/src/lib-storage/index/dbox/dbox-storage.c Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/index/dbox/dbox-storage.c Sat Nov 29 22:39:44 2008 +0200
@@ -718,6 +718,8 @@ struct mailbox dbox_mailbox = {
index_storage_get_uid_range,
index_storage_get_expunged_uids,
NULL,
+ NULL,
+ NULL,
dbox_mail_alloc,
index_header_lookup_init,
index_header_lookup_ref,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/index/maildir/maildir-storage.c
--- a/src/lib-storage/index/maildir/maildir-storage.c Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/index/maildir/maildir-storage.c Sat Nov 29 22:39:44 2008 +0200
@@ -1106,6 +1106,8 @@ struct mailbox maildir_mailbox = {
index_storage_get_uid_range,
index_storage_get_expunged_uids,
NULL,
+ NULL,
+ NULL,
index_mail_alloc,
index_header_lookup_init,
index_header_lookup_ref,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/index/mbox/mbox-storage.c
--- a/src/lib-storage/index/mbox/mbox-storage.c Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/index/mbox/mbox-storage.c Sat Nov 29 22:39:44 2008 +0200
@@ -1028,6 +1028,8 @@ struct mailbox mbox_mailbox = {
index_storage_get_uid_range,
index_storage_get_expunged_uids,
NULL,
+ NULL,
+ NULL,
index_mail_alloc,
index_header_lookup_init,
index_header_lookup_ref,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/index/raw/raw-storage.c
--- a/src/lib-storage/index/raw/raw-storage.c Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/index/raw/raw-storage.c Sat Nov 29 22:39:44 2008 +0200
@@ -297,6 +297,8 @@ struct mailbox raw_mailbox = {
index_storage_get_uid_range,
index_storage_get_expunged_uids,
NULL,
+ NULL,
+ NULL,
index_mail_alloc,
index_header_lookup_init,
index_header_lookup_ref,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/mail-storage-private.h
--- a/src/lib-storage/mail-storage-private.h Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/mail-storage-private.h Sat Nov 29 22:39:44 2008 +0200
@@ -142,6 +142,12 @@ struct mailbox_vfuncs {
const char *backend_mailbox,
uint32_t backend_uidvalidity,
uint32_t backend_uid, uint32_t *uid_r);
+ void (*get_virtual_backend_boxes)(struct mailbox *box,
+ ARRAY_TYPE(mailboxes) *mailboxes,
+ bool only_with_msgs);
+ void (*get_virtual_box_patterns)(struct mailbox *box,
+ ARRAY_TYPE(const_string) *includes,
+ ARRAY_TYPE(const_string) *excludes);
struct mail *
(*mail_alloc)(struct mailbox_transaction_context *t,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/mail-storage.c
--- a/src/lib-storage/mail-storage.c Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/mail-storage.c Sat Nov 29 22:39:44 2008 +0200
@@ -640,6 +640,29 @@ bool mailbox_get_virtual_uid(struct mail
backend_uid, uid_r);
}
+void mailbox_get_virtual_backend_boxes(struct mailbox *box,
+ ARRAY_TYPE(mailboxes) *mailboxes,
+ bool only_with_msgs)
+{
+ if (box->v.get_virtual_backend_boxes == NULL)
+ array_append(mailboxes, &box, 1);
+ else
+ box->v.get_virtual_backend_boxes(box, mailboxes, only_with_msgs);
+}
+
+void mailbox_get_virtual_box_patterns(struct mailbox *box,
+ ARRAY_TYPE(const_string) *includes,
+ ARRAY_TYPE(const_string) *excludes)
+{
+ if (box->v.get_virtual_box_patterns == NULL) {
+ const char *name = box->name;
+
+ array_append(includes, &name, 1);
+ } else {
+ box->v.get_virtual_box_patterns(box, includes, excludes);
+ }
+}
+
struct mailbox_header_lookup_ctx *
mailbox_header_lookup_init(struct mailbox *box, const char *const headers[])
{
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/mail-storage.h
--- a/src/lib-storage/mail-storage.h Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/mail-storage.h Sat Nov 29 22:39:44 2008 +0200
@@ -234,6 +234,8 @@ struct mail_storage_callbacks {
};
+ARRAY_DEFINE_TYPE(mailboxes, struct mailbox *);
+
typedef void mailbox_notify_callback_t(struct mailbox *box, void *context);
void mail_storage_init(void);
@@ -421,6 +423,17 @@ bool mailbox_get_virtual_uid(struct mail
bool mailbox_get_virtual_uid(struct mailbox *box, const char *backend_mailbox,
uint32_t backend_uidvalidity,
uint32_t backend_uid, uint32_t *uid_r);
+/* If box is a virtual mailbox, return all backend mailboxes. If
+ only_with_msgs=TRUE, return only those mailboxes that have at least one
+ message existing in the virtual mailbox. */
+void mailbox_get_virtual_backend_boxes(struct mailbox *box,
+ ARRAY_TYPE(mailboxes) *mailboxes,
+ bool only_with_msgs);
+/* If mailbox is a virtual mailbox, return all mailbox list patterns that
+ are used to figure out which mailboxes belong to the virtual mailbox. */
+void mailbox_get_virtual_box_patterns(struct mailbox *box,
+ ARRAY_TYPE(const_string) *includes,
+ ARRAY_TYPE(const_string) *excludes);
/* Initialize header lookup for given headers. */
struct mailbox_header_lookup_ctx *
diff -r 57e704c361a7 -r ffb37c392166 src/plugins/fts-lucene/fts-backend-lucene.c
--- a/src/plugins/fts-lucene/fts-backend-lucene.c Sat Nov 29 22:39:04 2008 +0200
+++ b/src/plugins/fts-lucene/fts-backend-lucene.c Sat Nov 29 22:39:44 2008 +0200
@@ -210,6 +210,7 @@ struct fts_backend fts_backend_lucene =
fts_backend_lucene_init,
fts_backend_lucene_deinit,
fts_backend_lucene_get_last_uid,
+ NULL,
fts_backend_lucene_build_init,
fts_backend_lucene_build_more,
fts_backend_lucene_build_deinit,
diff -r 57e704c361a7 -r ffb37c392166 src/plugins/fts-solr/fts-backend-solr.c
--- a/src/plugins/fts-solr/fts-backend-solr.c Sat Nov 29 22:39:04 2008 +0200
+++ b/src/plugins/fts-solr/fts-backend-solr.c Sat Nov 29 22:39:44 2008 +0200
@@ -3,12 +3,13 @@
#include "lib.h"
#include "array.h"
#include "str.h"
+#include "strescape.h"
#include "mail-storage-private.h"
#include "mail-namespace.h"
#include "solr-connection.h"
#include "fts-solr-plugin.h"
-#include <curl/curl.h>
+#include <ctype.h>
#define SOLR_CMDBUF_SIZE (1024*64)
#define SOLR_MAX_ROWS 100000
@@ -25,6 +26,11 @@ struct solr_fts_backend_build_context {
uint32_t prev_uid, uid_validity;
string_t *cmd;
bool headers;
+};
+
+struct fts_backend_solr_get_last_uids_context {
+ pool_t pool;
+ ARRAY_TYPE(fts_backend_uid_map) *last_uids;
};
static struct solr_connection *solr_conn = NULL;
@@ -199,6 +205,120 @@ static int fts_backend_solr_get_last_uid
return 0;
}
+static bool
+solr_virtual_get_last_uids(const char *mailbox, uint32_t uidvalidity,
+ uint32_t *uid, void *context)
+{
+ struct fts_backend_solr_get_last_uids_context *ctx = context;
+ struct fts_backend_uid_map *map;
+
+ map = array_append_space(ctx->last_uids);
+ map->mailbox = p_strdup(ctx->pool, mailbox);
+ map->uidvalidity = uidvalidity;
+ map->uid = *uid;
+ return FALSE;
+}
+
+static void add_pattern_as_solr(string_t *str, const char *pattern)
+{
+ const char *p;
+
+ /* first check if there are any wildcards in the pattern */
+ for (p = pattern; *p != '\0'; p++) {
+ if (*p == '%' || *p == '*')
+ break;
+ }
+ if (*p == '\0') {
+ /* full mailbox name */
+ str_append_c(str, '"');
+ str_append(str, str_escape(pattern));
+ str_append_c(str, '"');
+ return;
+ }
+
+ /* there are at least some wildcards. */
+ for (p = pattern; *p != '\0'; p++) {
+ if (*p == '%' || *p == '*') {
+ if (p == pattern || (p[-1] != '%' && p[-1] != '*'))
+ str_append_c(str, '*');
+ } else {
+ if (!i_isalnum(*p))
+ str_append_c(str, '\\');
+ str_append_c(str, *p);
+ }
+ }
+}
+
+static void
+fts_backend_solr_filter_mailboxes(struct solr_connection *solr_conn,
+ string_t *str, struct mailbox *box)
+{
+ ARRAY_TYPE(const_string) includes_arr, excludes_arr;
+ const char *const *includes, *const *excludes;
+ unsigned int i, inc_count, exc_count;
+ string_t *fq;
+
+ t_array_init(&includes_arr, 16);
+ t_array_init(&excludes_arr, 16);
+ mailbox_get_virtual_box_patterns(box, &includes_arr, &excludes_arr);
+ includes = array_get(&includes_arr, &inc_count);
+ excludes = array_get(&excludes_arr, &exc_count);
+ i_assert(inc_count > 0);
+
+ /* First see if there are any patterns that begin with a wildcard.
+ Solr doesn't allow them, so in that case we'll need to return
+ all mailboxes. */
+ for (i = 0; i < inc_count; i++) {
+ if (*includes[i] == '*' || *includes[i] == '%')
+ break;
+ }
+
+ fq = t_str_new(128);
+ if (i == inc_count) {
+ /* we can filter what mailboxes we want returned */
+ str_append_c(fq, '(');
+ for (i = 0; i < inc_count; i++) {
+ if (i != 0)
+ str_append(fq, " OR ");
+ str_append(fq, "box:");
+ add_pattern_as_solr(fq, includes[i]);
+ }
+ str_append_c(fq, ')');
+ }
+ for (i = 0; i < exc_count; i++) {
+ if (str_len(fq) > 0)
+ str_append_c(fq, ' ');
+ str_append(fq, "-box:");
+ add_pattern_as_solr(fq, excludes[i]);
+ }
+ if (str_len(fq) > 0) {
+ str_append(str, "&fq=");
+ solr_connection_http_escape(solr_conn, str, str_c(fq));
+ }
+}
+
+static int
+fts_backend_solr_get_all_last_uids(struct fts_backend *backend, pool_t pool,
+ ARRAY_TYPE(fts_backend_uid_map) *last_uids)
+{
+ struct fts_backend_solr_get_last_uids_context ctx;
+ string_t *str;
+
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.pool = pool;
+ ctx.last_uids = last_uids;
+
+ str = t_str_new(256);
+ str_printfa(str, "fl=uid,box,uidv&rows=%u&q=last_uid:TRUE%%20user:",
+ SOLR_MAX_ROWS);
+ solr_quote_str(str, backend->box->storage->ns->user->username);
+ fts_backend_solr_filter_mailboxes(solr_conn, str, backend->box);
+
+ return solr_connection_select(solr_conn, str_c(str),
+ solr_virtual_get_last_uids, &ctx,
+ NULL, NULL);
+}
+
static int
fts_backend_solr_build_init(struct fts_backend *backend, uint32_t *last_uid_r,
More information about the dovecot-cvs
mailing list