dovecot-1.2: Initial support for getting full text search workin...

dovecot at dovecot.org dovecot at dovecot.org
Sun Nov 23 03:59:59 EET 2008


details:   http://hg.dovecot.org/dovecot-1.2/rev/db78eab2ba5d
changeset: 8484:db78eab2ba5d
user:      Timo Sirainen <tss at iki.fi>
date:      Sun Nov 23 03:59:55 2008 +0200
description:
Initial support for getting full text search working nicely with virtual mailboxes.

diffstat:

15 files changed, 206 insertions(+), 24 deletions(-)
src/lib-storage/index/cydir/cydir-storage.c     |    1 
src/lib-storage/index/dbox/dbox-storage.c       |    1 
src/lib-storage/index/maildir/maildir-storage.c |    1 
src/lib-storage/index/mbox/mbox-storage.c       |    1 
src/lib-storage/index/raw/raw-storage.c         |    1 
src/lib-storage/mail-storage-private.h          |    4 +
src/lib-storage/mail-storage.c                  |   10 +++
src/lib-storage/mail-storage.h                  |    5 +
src/plugins/fts-solr/fts-backend-solr.c         |   47 ++++++++++++---
src/plugins/fts-solr/solr-connection.c          |   68 ++++++++++++++++++-----
src/plugins/fts-solr/solr-connection.h          |    5 +
src/plugins/fts/fts-storage.c                   |   33 ++++++++++-
src/plugins/fts/fts-storage.h                   |    1 
src/plugins/virtual/virtual-storage.c           |   49 ++++++++++++++++
src/plugins/virtual/virtual-storage.h           |    3 +

diffs (truncated from 524 to 300 lines):

diff -r b12705704329 -r db78eab2ba5d src/lib-storage/index/cydir/cydir-storage.c
--- a/src/lib-storage/index/cydir/cydir-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/index/cydir/cydir-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -446,6 +446,7 @@ struct mailbox cydir_mailbox = {
 		index_storage_get_seq_range,
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
diff -r b12705704329 -r db78eab2ba5d src/lib-storage/index/dbox/dbox-storage.c
--- a/src/lib-storage/index/dbox/dbox-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/index/dbox/dbox-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -717,6 +717,7 @@ struct mailbox dbox_mailbox = {
 		index_storage_get_seq_range,
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
+		NULL,
 		dbox_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
diff -r b12705704329 -r db78eab2ba5d src/lib-storage/index/maildir/maildir-storage.c
--- a/src/lib-storage/index/maildir/maildir-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/index/maildir/maildir-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -1105,6 +1105,7 @@ struct mailbox maildir_mailbox = {
 		index_storage_get_seq_range,
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
diff -r b12705704329 -r db78eab2ba5d src/lib-storage/index/mbox/mbox-storage.c
--- a/src/lib-storage/index/mbox/mbox-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/index/mbox/mbox-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -1027,6 +1027,7 @@ struct mailbox mbox_mailbox = {
 		index_storage_get_seq_range,
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
diff -r b12705704329 -r db78eab2ba5d src/lib-storage/index/raw/raw-storage.c
--- a/src/lib-storage/index/raw/raw-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/index/raw/raw-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -296,6 +296,7 @@ struct mailbox raw_mailbox = {
 		index_storage_get_seq_range,
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
diff -r b12705704329 -r db78eab2ba5d src/lib-storage/mail-storage-private.h
--- a/src/lib-storage/mail-storage-private.h	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/mail-storage-private.h	Sun Nov 23 03:59:55 2008 +0200
@@ -138,6 +138,10 @@ struct mailbox_vfuncs {
 	bool (*get_expunged_uids)(struct mailbox *box, uint64_t modseq,
 				  const ARRAY_TYPE(seq_range) *uids,
 				  ARRAY_TYPE(seq_range) *expunged_uids);
+	bool (*get_virtual_uid)(struct mailbox *box,
+				const char *backend_mailbox,
+				uint32_t backend_uidvalidity,
+				uint32_t backend_uid, uint32_t *uid_r);
 
 	struct mail *
 		(*mail_alloc)(struct mailbox_transaction_context *t,
diff -r b12705704329 -r db78eab2ba5d src/lib-storage/mail-storage.c
--- a/src/lib-storage/mail-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/mail-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -630,6 +630,16 @@ bool mailbox_get_expunged_uids(struct ma
 	return box->v.get_expunged_uids(box, modseq, uids, expunged_uids);
 }
 
+bool mailbox_get_virtual_uid(struct mailbox *box, const char *backend_mailbox,
+			     uint32_t backend_uidvalidity,
+			     uint32_t backend_uid, uint32_t *uid_r)
+{
+	if (box->v.get_virtual_uid == NULL)
+		return FALSE;
+	return box->v.get_virtual_uid(box, backend_mailbox, backend_uidvalidity,
+				      backend_uid, uid_r);
+}
+
 struct mailbox_header_lookup_ctx *
 mailbox_header_lookup_init(struct mailbox *box, const char *const headers[])
 {
diff -r b12705704329 -r db78eab2ba5d src/lib-storage/mail-storage.h
--- a/src/lib-storage/mail-storage.h	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/mail-storage.h	Sun Nov 23 03:59:55 2008 +0200
@@ -416,6 +416,11 @@ bool mailbox_get_expunged_uids(struct ma
 bool mailbox_get_expunged_uids(struct mailbox *box, uint64_t modseq,
 			       const ARRAY_TYPE(seq_range) *uids,
 			       ARRAY_TYPE(seq_range) *expunged_uids);
+/* If box is a virtual mailbox, look up UID for the given backend message.
+   Returns TRUE if found, FALSE if not. */
+bool mailbox_get_virtual_uid(struct mailbox *box, const char *backend_mailbox,
+			     uint32_t backend_uidvalidity,
+			     uint32_t backend_uid, uint32_t *uid_r);
 
 /* Initialize header lookup for given headers. */
 struct mailbox_header_lookup_ctx *
diff -r b12705704329 -r db78eab2ba5d src/plugins/fts-solr/fts-backend-solr.c
--- a/src/plugins/fts-solr/fts-backend-solr.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/plugins/fts-solr/fts-backend-solr.c	Sun Nov 23 03:59:55 2008 +0200
@@ -11,6 +11,7 @@
 #include <curl/curl.h>
 
 #define SOLR_CMDBUF_SIZE (1024*64)
+#define SOLR_MAX_ROWS 100000
 
 struct solr_fts_backend_build_context {
 	struct fts_backend_build_context ctx;
@@ -97,7 +98,8 @@ static int fts_backend_solr_get_last_uid
 	solr_quote_str(str, backend->box->storage->ns->user->username);
 
 	t_array_init(&uids, 1);
-	if (solr_connection_select(solr_conn, str_c(str), &uids, NULL) < 0)
+	if (solr_connection_select(solr_conn, str_c(str),
+				   NULL, NULL, &uids, NULL) < 0)
 		return -1;
 
 	uidvals = array_get(&uids, &count);
@@ -256,6 +258,15 @@ static void fts_backend_solr_unlock(stru
 {
 }
 
+static bool solr_virtual_uid_map(const char *mailbox, uint32_t uidvalidity,
+				 uint32_t *uid, void *context)
+{
+	struct mailbox *box = context;
+
+	return mailbox_get_virtual_uid(box, mailbox, uidvalidity,
+				       *uid, uid);
+}
+
 static int fts_backend_solr_lookup(struct fts_backend_lookup_context *ctx,
 				   ARRAY_TYPE(seq_range) *definite_uids,
 				   ARRAY_TYPE(seq_range) *maybe_uids,
@@ -266,12 +277,20 @@ static int fts_backend_solr_lookup(struc
 	unsigned int i, count;
 	struct mailbox_status status;
 	string_t *str;
-
+	bool virtual;
+
+	virtual = strcmp(box->storage->name, "virtual") == 0;
 	mailbox_get_status(box, STATUS_UIDVALIDITY, &status);
 
 	str = t_str_new(256);
-	str_printfa(str, "fl=uid,score&rows=%u&sort=uid%%20asc&q=",
-		    status.uidnext);
+	if (!virtual) {
+		str_printfa(str, "fl=uid,score&rows=%u&sort=uid%%20asc&q=",
+			    status.uidnext);
+	} else {
+		str_printfa(str, "fl=uid,score,box,uidv&rows=%u"
+			    "&sort=box%%20asc,uid%%20asc&q=",
+			    SOLR_MAX_ROWS);
+	}
 
 	/* build a lucene search query from the fields */
 	fields = array_get(&ctx->fields, &count);
@@ -298,14 +317,24 @@ static int fts_backend_solr_lookup(struc
 
 	/* use a separate filter query for selecting the mailbox. it shouldn't
 	   affect the score and there could be some caching benefits too. */
-	str_printfa(str, "&fq=uidv:%u%%20box:", status.uidvalidity);
-	solr_quote_str(str, box->name);
-	str_append(str, "%20user:");
+	str_append(str, "&fq=user:");
 	solr_quote_str(str, box->storage->ns->user->username);
 
+	/* FIXME: limit what mailboxes to search with virtual storage */
+	if (!virtual) {
+		str_printfa(str, "%%20uidv:%u%%20box:", status.uidvalidity);
+		solr_quote_str(str, box->name);
+	}
+
 	array_clear(maybe_uids);
-	return solr_connection_select(solr_conn, str_c(str),
-				      definite_uids, scores);
+	if (!virtual) {
+		return solr_connection_select(solr_conn, str_c(str), NULL, NULL,
+					      definite_uids, scores);
+	} else {
+		return solr_connection_select(solr_conn, str_c(str),
+					      solr_virtual_uid_map, box,
+					      definite_uids, scores);
+	}
 }
 
 struct fts_backend fts_backend_solr = {
diff -r b12705704329 -r db78eab2ba5d src/plugins/fts-solr/solr-connection.c
--- a/src/plugins/fts-solr/solr-connection.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/plugins/fts-solr/solr-connection.c	Sun Nov 23 03:59:55 2008 +0200
@@ -22,7 +22,9 @@ enum solr_xml_content_state {
 enum solr_xml_content_state {
 	SOLR_XML_CONTENT_STATE_NONE = 0,
 	SOLR_XML_CONTENT_STATE_UID,
-	SOLR_XML_CONTENT_STATE_SCORE
+	SOLR_XML_CONTENT_STATE_SCORE,
+	SOLR_XML_CONTENT_STATE_MAILBOX,
+	SOLR_XML_CONTENT_STATE_UIDVALIDITY
 };
 
 struct solr_lookup_xml_context {
@@ -30,8 +32,12 @@ struct solr_lookup_xml_context {
 	enum solr_xml_content_state content_state;
 	int depth;
 
-	uint32_t uid;
+	uint32_t uid, uidvalidity;
 	float score;
+	char *mailbox;
+
+	solr_uid_map_callback_t *callback;
+	void *context;
 
 	ARRAY_TYPE(seq_range) *uids;
 	ARRAY_TYPE(fts_score_map) *scores;
@@ -234,6 +240,8 @@ solr_lookup_xml_start(void *context, con
 			ctx->state++;
 			ctx->uid = 0;
 			ctx->score = 0;
+			i_free_and_null(ctx->mailbox);
+			ctx->uidvalidity = 0;
 		}
 		break;
 	case SOLR_XML_RESPONSE_STATE_DOC:
@@ -242,6 +250,10 @@ solr_lookup_xml_start(void *context, con
 			ctx->content_state = SOLR_XML_CONTENT_STATE_UID;
 		else if (strcmp(name_attr, "score") == 0)
 			ctx->content_state = SOLR_XML_CONTENT_STATE_SCORE;
+		else if (strcmp(name_attr, "box") == 0)
+			ctx->content_state = SOLR_XML_CONTENT_STATE_MAILBOX;
+		else if (strcmp(name_attr, "uidv") == 0)
+			ctx->content_state = SOLR_XML_CONTENT_STATE_UIDVALIDITY;
 		else 
 			break;
 		ctx->state++;
@@ -256,8 +268,18 @@ static void solr_lookup_add_doc(struct s
 	struct fts_score_map *score;
 
 	if (ctx->uid == 0) {
-		i_error("fts_solr: missing uid");
+		i_error("fts_solr: Query didn't return uid");
 		return;
+	}
+
+	if (ctx->callback != NULL) {
+		if (ctx->mailbox == NULL) {
+			i_error("fts_solr: Query didn't return mailbox");
+			return;
+		}
+		if (!ctx->callback(ctx->mailbox, ctx->uidvalidity,
+				   &ctx->uid, ctx->context))
+			return;
 	}
 
 	seq_range_array_add(ctx->uids, 0, ctx->uid);
@@ -283,36 +305,52 @@ static void solr_lookup_xml_end(void *co
 	ctx->depth--;
 }
 
+static int uint32_parse(const char *str, int len, uint32_t *value_r)
+{
+	uint32_t value = 0;
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (str[i] < '0' || str[i] > '9')
+			break;
+		value = value*10 + str[i]-'0';
+	}
+	if (i != len)
+		return -1;
+
+	*value_r = value;
+	return 0;
+}
+
 static void solr_lookup_xml_data(void *context, const char *str, int len)
 {
 	struct solr_lookup_xml_context *ctx = context;
-	uint32_t uid;
-	int i;
 
 	switch (ctx->content_state) {
 	case SOLR_XML_CONTENT_STATE_NONE:
 		break;
 	case SOLR_XML_CONTENT_STATE_UID:
-		for (i = 0, uid = 0; i < len; i++) {
-			if (str[i] < '0' || str[i] > '9')
-				break;
-			uid = uid*10 + str[i]-'0';
-		}
-		if (i != len) {


More information about the dovecot-cvs mailing list