dovecot-1.2: Finished making full text search indexes work fast ...

dovecot at dovecot.org dovecot at dovecot.org
Sun Nov 30 01:27:25 EET 2008


details:   http://hg.dovecot.org/dovecot-1.2/rev/ffb37c392166
changeset: 8495:ffb37c392166
user:      Timo Sirainen <tss at iki.fi>
date:      Sat Nov 29 22:39:44 2008 +0200
description:
Finished making full text search indexes work fast with virtual mailboxes.

diffstat:

21 files changed, 486 insertions(+), 40 deletions(-)
src/lib-storage/index/cydir/cydir-storage.c     |    2 
src/lib-storage/index/dbox/dbox-storage.c       |    2 
src/lib-storage/index/maildir/maildir-storage.c |    2 
src/lib-storage/index/mbox/mbox-storage.c       |    2 
src/lib-storage/index/raw/raw-storage.c         |    2 
src/lib-storage/mail-storage-private.h          |    6 
src/lib-storage/mail-storage.c                  |   23 ++
src/lib-storage/mail-storage.h                  |   13 +
src/plugins/fts-lucene/fts-backend-lucene.c     |    1 
src/plugins/fts-solr/fts-backend-solr.c         |  138 ++++++++++++-
src/plugins/fts-solr/solr-connection.c          |   10 
src/plugins/fts-solr/solr-connection.h          |    2 
src/plugins/fts-squat/fts-backend-squat.c       |    1 
src/plugins/fts/fts-api-private.h               |    2 
src/plugins/fts/fts-api.c                       |    6 
src/plugins/fts/fts-api.h                       |   15 +
src/plugins/fts/fts-storage.c                   |  231 ++++++++++++++++++++---
src/plugins/fts/fts-storage.h                   |   11 +
src/plugins/virtual/virtual-config.c            |   25 ++
src/plugins/virtual/virtual-storage.c           |   29 ++
src/plugins/virtual/virtual-storage.h           |    3 

diffs (truncated from 906 to 300 lines):

diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/index/cydir/cydir-storage.c
--- a/src/lib-storage/index/cydir/cydir-storage.c	Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/index/cydir/cydir-storage.c	Sat Nov 29 22:39:44 2008 +0200
@@ -447,6 +447,8 @@ struct mailbox cydir_mailbox = {
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
 		NULL,
+		NULL,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/index/dbox/dbox-storage.c
--- a/src/lib-storage/index/dbox/dbox-storage.c	Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/index/dbox/dbox-storage.c	Sat Nov 29 22:39:44 2008 +0200
@@ -718,6 +718,8 @@ struct mailbox dbox_mailbox = {
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
 		NULL,
+		NULL,
+		NULL,
 		dbox_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/index/maildir/maildir-storage.c
--- a/src/lib-storage/index/maildir/maildir-storage.c	Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/index/maildir/maildir-storage.c	Sat Nov 29 22:39:44 2008 +0200
@@ -1106,6 +1106,8 @@ struct mailbox maildir_mailbox = {
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
 		NULL,
+		NULL,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/index/mbox/mbox-storage.c
--- a/src/lib-storage/index/mbox/mbox-storage.c	Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/index/mbox/mbox-storage.c	Sat Nov 29 22:39:44 2008 +0200
@@ -1028,6 +1028,8 @@ struct mailbox mbox_mailbox = {
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
 		NULL,
+		NULL,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/index/raw/raw-storage.c
--- a/src/lib-storage/index/raw/raw-storage.c	Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/index/raw/raw-storage.c	Sat Nov 29 22:39:44 2008 +0200
@@ -297,6 +297,8 @@ struct mailbox raw_mailbox = {
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
 		NULL,
+		NULL,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/mail-storage-private.h
--- a/src/lib-storage/mail-storage-private.h	Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/mail-storage-private.h	Sat Nov 29 22:39:44 2008 +0200
@@ -142,6 +142,12 @@ struct mailbox_vfuncs {
 				const char *backend_mailbox,
 				uint32_t backend_uidvalidity,
 				uint32_t backend_uid, uint32_t *uid_r);
+	void (*get_virtual_backend_boxes)(struct mailbox *box,
+					  ARRAY_TYPE(mailboxes) *mailboxes,
+					  bool only_with_msgs);
+	void (*get_virtual_box_patterns)(struct mailbox *box,
+					 ARRAY_TYPE(const_string) *includes,
+					 ARRAY_TYPE(const_string) *excludes);
 
 	struct mail *
 		(*mail_alloc)(struct mailbox_transaction_context *t,
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/mail-storage.c
--- a/src/lib-storage/mail-storage.c	Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/mail-storage.c	Sat Nov 29 22:39:44 2008 +0200
@@ -640,6 +640,29 @@ bool mailbox_get_virtual_uid(struct mail
 				      backend_uid, uid_r);
 }
 
+void mailbox_get_virtual_backend_boxes(struct mailbox *box,
+				       ARRAY_TYPE(mailboxes) *mailboxes,
+				       bool only_with_msgs)
+{
+	if (box->v.get_virtual_backend_boxes == NULL)
+		array_append(mailboxes, &box, 1);
+	else
+		box->v.get_virtual_backend_boxes(box, mailboxes, only_with_msgs);
+}
+
+void mailbox_get_virtual_box_patterns(struct mailbox *box,
+				      ARRAY_TYPE(const_string) *includes,
+				      ARRAY_TYPE(const_string) *excludes)
+{
+	if (box->v.get_virtual_box_patterns == NULL) {
+		const char *name = box->name;
+
+		array_append(includes, &name, 1);
+	} else {
+		box->v.get_virtual_box_patterns(box, includes, excludes);
+	}
+}
+
 struct mailbox_header_lookup_ctx *
 mailbox_header_lookup_init(struct mailbox *box, const char *const headers[])
 {
diff -r 57e704c361a7 -r ffb37c392166 src/lib-storage/mail-storage.h
--- a/src/lib-storage/mail-storage.h	Sat Nov 29 22:39:04 2008 +0200
+++ b/src/lib-storage/mail-storage.h	Sat Nov 29 22:39:44 2008 +0200
@@ -234,6 +234,8 @@ struct mail_storage_callbacks {
 
 };
 
+ARRAY_DEFINE_TYPE(mailboxes, struct mailbox *);
+
 typedef void mailbox_notify_callback_t(struct mailbox *box, void *context);
 
 void mail_storage_init(void);
@@ -421,6 +423,17 @@ bool mailbox_get_virtual_uid(struct mail
 bool mailbox_get_virtual_uid(struct mailbox *box, const char *backend_mailbox,
 			     uint32_t backend_uidvalidity,
 			     uint32_t backend_uid, uint32_t *uid_r);
+/* If box is a virtual mailbox, return all backend mailboxes. If
+   only_with_msgs=TRUE, return only those mailboxes that have at least one
+   message existing in the virtual mailbox. */
+void mailbox_get_virtual_backend_boxes(struct mailbox *box,
+				       ARRAY_TYPE(mailboxes) *mailboxes,
+				       bool only_with_msgs);
+/* If mailbox is a virtual mailbox, return all mailbox list patterns that
+   are used to figure out which mailboxes belong to the virtual mailbox. */
+void mailbox_get_virtual_box_patterns(struct mailbox *box,
+				      ARRAY_TYPE(const_string) *includes,
+				      ARRAY_TYPE(const_string) *excludes);
 
 /* Initialize header lookup for given headers. */
 struct mailbox_header_lookup_ctx *
diff -r 57e704c361a7 -r ffb37c392166 src/plugins/fts-lucene/fts-backend-lucene.c
--- a/src/plugins/fts-lucene/fts-backend-lucene.c	Sat Nov 29 22:39:04 2008 +0200
+++ b/src/plugins/fts-lucene/fts-backend-lucene.c	Sat Nov 29 22:39:44 2008 +0200
@@ -210,6 +210,7 @@ struct fts_backend fts_backend_lucene = 
 		fts_backend_lucene_init,
 		fts_backend_lucene_deinit,
 		fts_backend_lucene_get_last_uid,
+		NULL,
 		fts_backend_lucene_build_init,
 		fts_backend_lucene_build_more,
 		fts_backend_lucene_build_deinit,
diff -r 57e704c361a7 -r ffb37c392166 src/plugins/fts-solr/fts-backend-solr.c
--- a/src/plugins/fts-solr/fts-backend-solr.c	Sat Nov 29 22:39:04 2008 +0200
+++ b/src/plugins/fts-solr/fts-backend-solr.c	Sat Nov 29 22:39:44 2008 +0200
@@ -3,12 +3,13 @@
 #include "lib.h"
 #include "array.h"
 #include "str.h"
+#include "strescape.h"
 #include "mail-storage-private.h"
 #include "mail-namespace.h"
 #include "solr-connection.h"
 #include "fts-solr-plugin.h"
 
-#include <curl/curl.h>
+#include <ctype.h>
 
 #define SOLR_CMDBUF_SIZE (1024*64)
 #define SOLR_MAX_ROWS 100000
@@ -25,6 +26,11 @@ struct solr_fts_backend_build_context {
 	uint32_t prev_uid, uid_validity;
 	string_t *cmd;
 	bool headers;
+};
+
+struct fts_backend_solr_get_last_uids_context {
+	pool_t pool;
+	ARRAY_TYPE(fts_backend_uid_map) *last_uids;
 };
 
 static struct solr_connection *solr_conn = NULL;
@@ -199,6 +205,120 @@ static int fts_backend_solr_get_last_uid
 	return 0;
 }
 
+static bool
+solr_virtual_get_last_uids(const char *mailbox, uint32_t uidvalidity,
+			   uint32_t *uid, void *context)
+{
+	struct fts_backend_solr_get_last_uids_context *ctx = context;
+	struct fts_backend_uid_map *map;
+
+	map = array_append_space(ctx->last_uids);
+	map->mailbox = p_strdup(ctx->pool, mailbox);
+	map->uidvalidity = uidvalidity;
+	map->uid = *uid;
+	return FALSE;
+}
+
+static void add_pattern_as_solr(string_t *str, const char *pattern)
+{
+	const char *p;
+
+	/* first check if there are any wildcards in the pattern */
+	for (p = pattern; *p != '\0'; p++) {
+		if (*p == '%' || *p == '*')
+			break;
+	}
+	if (*p == '\0') {
+		/* full mailbox name */
+		str_append_c(str, '"');
+		str_append(str, str_escape(pattern));
+		str_append_c(str, '"');
+		return;
+	}
+
+	/* there are at least some wildcards. */
+	for (p = pattern; *p != '\0'; p++) {
+		if (*p == '%' || *p == '*') {
+			if (p == pattern || (p[-1] != '%' && p[-1] != '*'))
+				str_append_c(str, '*');
+		} else {
+			if (!i_isalnum(*p))
+				str_append_c(str, '\\');
+			str_append_c(str, *p);
+		}
+	}
+}
+
+static void
+fts_backend_solr_filter_mailboxes(struct solr_connection *solr_conn,
+				  string_t *str, struct mailbox *box)
+{
+	ARRAY_TYPE(const_string) includes_arr, excludes_arr;
+	const char *const *includes, *const *excludes;
+	unsigned int i, inc_count, exc_count;
+	string_t *fq;
+
+	t_array_init(&includes_arr, 16);
+	t_array_init(&excludes_arr, 16);
+	mailbox_get_virtual_box_patterns(box, &includes_arr, &excludes_arr);
+	includes = array_get(&includes_arr, &inc_count);
+	excludes = array_get(&excludes_arr, &exc_count);
+	i_assert(inc_count > 0);
+
+	/* First see if there are any patterns that begin with a wildcard.
+	   Solr doesn't allow them, so in that case we'll need to return
+	   all mailboxes. */
+	for (i = 0; i < inc_count; i++) {
+		if (*includes[i] == '*' || *includes[i] == '%')
+			break;
+	}
+
+	fq = t_str_new(128);
+	if (i == inc_count) {
+		/* we can filter what mailboxes we want returned */
+		str_append_c(fq, '(');
+		for (i = 0; i < inc_count; i++) {
+			if (i != 0)
+				str_append(fq, " OR ");
+			str_append(fq, "box:");
+			add_pattern_as_solr(fq, includes[i]);
+		}
+		str_append_c(fq, ')');
+	}
+	for (i = 0; i < exc_count; i++) {
+		if (str_len(fq) > 0)
+			str_append_c(fq, ' ');
+		str_append(fq, "-box:");
+		add_pattern_as_solr(fq, excludes[i]);
+	}
+	if (str_len(fq) > 0) {
+		str_append(str, "&fq=");
+		solr_connection_http_escape(solr_conn, str, str_c(fq));
+	}
+}
+
+static int
+fts_backend_solr_get_all_last_uids(struct fts_backend *backend, pool_t pool,
+				   ARRAY_TYPE(fts_backend_uid_map) *last_uids)
+{
+	struct fts_backend_solr_get_last_uids_context ctx;
+	string_t *str;
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.pool = pool;
+	ctx.last_uids = last_uids;
+
+	str = t_str_new(256);
+	str_printfa(str, "fl=uid,box,uidv&rows=%u&q=last_uid:TRUE%%20user:",
+		    SOLR_MAX_ROWS);
+	solr_quote_str(str, backend->box->storage->ns->user->username);
+	fts_backend_solr_filter_mailboxes(solr_conn, str, backend->box);
+
+	return solr_connection_select(solr_conn, str_c(str),
+				      solr_virtual_get_last_uids, &ctx,
+				      NULL, NULL);
+}
+
 static int
 fts_backend_solr_build_init(struct fts_backend *backend, uint32_t *last_uid_r,


More information about the dovecot-cvs mailing list