dovecot-1.1: fts: Added a new lookup API where the backend can l...

dovecot at dovecot.org dovecot at dovecot.org
Sun Jul 13 18:16:28 EEST 2008


details:   http://hg.dovecot.org/dovecot-1.1/rev/5b53bd0d8f6d
changeset: 7753:5b53bd0d8f6d
user:      Timo Sirainen <tss at iki.fi>
date:      Sun Jul 13 18:16:21 2008 +0300
description:
fts: Added a new lookup API where the backend can look up all the fields
using a single query. Implemented it to fts-solr.

diffstat:

8 files changed, 246 insertions(+), 109 deletions(-)
src/plugins/fts-lucene/fts-backend-lucene.c |    1 
src/plugins/fts-solr/fts-backend-solr.c     |   68 ++++++----
src/plugins/fts-squat/fts-backend-squat.c   |    1 
src/plugins/fts/fts-api-private.h           |   21 +++
src/plugins/fts/fts-api.c                   |  172 ++++++++++++++++++---------
src/plugins/fts/fts-api.h                   |   22 +--
src/plugins/fts/fts-search.c                |   69 +++++++---
src/plugins/fts/fts-storage.h               |    1 

diffs (truncated from 517 to 300 lines):

diff -r c6d592c69bfd -r 5b53bd0d8f6d src/plugins/fts-lucene/fts-backend-lucene.c
--- a/src/plugins/fts-lucene/fts-backend-lucene.c	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts-lucene/fts-backend-lucene.c	Sun Jul 13 18:16:21 2008 +0300
@@ -216,6 +216,7 @@ struct fts_backend fts_backend_lucene = 
 		fts_backend_lucene_lock,
 		fts_backend_lucene_unlock,
 		fts_backend_lucene_lookup,
+		NULL,
 		NULL
 	}
 };
diff -r c6d592c69bfd -r 5b53bd0d8f6d src/plugins/fts-solr/fts-backend-solr.c
--- a/src/plugins/fts-solr/fts-backend-solr.c	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts-solr/fts-backend-solr.c	Sun Jul 13 18:16:21 2008 +0300
@@ -255,37 +255,50 @@ static void fts_backend_solr_unlock(stru
 {
 }
 
-static int
-fts_backend_solr_lookup(struct fts_backend *backend, const char *key,
-			enum fts_lookup_flags flags,
-			ARRAY_TYPE(seq_range) *definite_uids,
-			ARRAY_TYPE(seq_range) *maybe_uids)
-{
+static int fts_backend_solr_lookup(struct fts_backend_lookup_context *ctx,
+				   ARRAY_TYPE(seq_range) *definite_uids,
+				   ARRAY_TYPE(seq_range) *maybe_uids)
+{
+	struct mailbox *box = ctx->backend->box;
+	const struct fts_backend_lookup_field *fields;
+	unsigned int i, count;
 	struct mailbox_status status;
 	string_t *str;
 
-	i_assert((flags & FTS_LOOKUP_FLAG_INVERT) == 0);
+	mailbox_get_status(box, STATUS_UIDVALIDITY, &status);
 
 	str = t_str_new(256);
-	str_append(str, "fl=uid&q=");
-	if ((flags & FTS_LOOKUP_FLAG_HEADER) == 0) {
-		/* body only */
-		i_assert((flags & FTS_LOOKUP_FLAG_BODY) != 0);
-		str_append(str, "body:");
-	} else if ((flags & FTS_LOOKUP_FLAG_BODY) == 0) {
-		/* header only */
-		str_append(str, "hdr:");
-	} else {
-		/* both */
-		str_append(str, "any:");
-	}
-	solr_quote_str(str, key);
-
-	mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
-	str_printfa(str, "%%20uidv:%u%%20box:", status.uidvalidity);
-	solr_quote_str(str, backend->box->name);
+	str_printfa(str, "fl=uid&rows=%u&q=", status.uidnext);
+
+	/* build a lucene search query from the fields */
+	fields = array_get(&ctx->fields, &count);
+	for (i = 0; i < count; i++) {
+		if (i > 0)
+			str_append(str, "%20");
+
+		if ((fields[i].flags & FTS_LOOKUP_FLAG_INVERT) != 0)
+			str_append_c(str, '-');
+
+		if ((fields[i].flags & FTS_LOOKUP_FLAG_HEADER) == 0) {
+			/* body only */
+			i_assert((fields[i].flags & FTS_LOOKUP_FLAG_BODY) != 0);
+			str_append(str, "body:");
+		} else if ((fields[i].flags & FTS_LOOKUP_FLAG_BODY) == 0) {
+			/* header only */
+			str_append(str, "hdr:");
+		} else {
+			/* both */
+			str_append(str, "any:");
+		}
+		solr_quote_str(str, fields[i].key);
+	}
+
+	/* use a separate filter query for selecting the mailbox. it shouldn't
+	   affect the score and there could be some caching benefits too. */
+	str_printfa(str, "&fq=uidv:%u%%20box:", status.uidvalidity);
+	solr_quote_str(str, box->name);
 	str_append(str, "%20user:");
-	solr_quote_str(str, backend->box->storage->user);
+	solr_quote_str(str, box->storage->user);
 
 	array_clear(maybe_uids);
 	return solr_connection_select(solr_conn, str_c(str), definite_uids);
@@ -306,7 +319,8 @@ struct fts_backend fts_backend_solr = {
 		fts_backend_solr_expunge_finish,
 		fts_backend_solr_lock,
 		fts_backend_solr_unlock,
-		fts_backend_solr_lookup,
-		NULL
+		NULL,
+		NULL,
+		fts_backend_solr_lookup
 	}
 };
diff -r c6d592c69bfd -r 5b53bd0d8f6d src/plugins/fts-squat/fts-backend-squat.c
--- a/src/plugins/fts-squat/fts-backend-squat.c	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts-squat/fts-backend-squat.c	Sun Jul 13 18:16:21 2008 +0300
@@ -250,6 +250,7 @@ struct fts_backend fts_backend_squat = {
 		fts_backend_squat_lock,
 		fts_backend_squat_unlock,
 		fts_backend_squat_lookup,
+		NULL,
 		NULL
 	}
 };
diff -r c6d592c69bfd -r 5b53bd0d8f6d src/plugins/fts/fts-api-private.h
--- a/src/plugins/fts/fts-api-private.h	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts/fts-api-private.h	Sun Jul 13 18:16:21 2008 +0300
@@ -30,6 +30,10 @@ struct fts_backend_vfuncs {
 		      enum fts_lookup_flags flags,
 		      ARRAY_TYPE(seq_range) *definite_uids,
 		      ARRAY_TYPE(seq_range) *maybe_uids);
+
+	int (*lookup2)(struct fts_backend_lookup_context *ctx,
+		       ARRAY_TYPE(seq_range) *definite_uids,
+		       ARRAY_TYPE(seq_range) *maybe_uids);
 };
 
 enum fts_backend_flags {
@@ -56,7 +60,24 @@ struct fts_backend_build_context {
 	unsigned int failed:1;
 };
 
+struct fts_backend_lookup_field {
+	const char *key;
+	enum fts_lookup_flags flags;
+};
+
+struct fts_backend_lookup_context {
+	struct fts_backend *backend;
+	pool_t pool;
+
+	ARRAY_DEFINE(fields, struct fts_backend_lookup_field);
+};
+
 void fts_backend_register(const struct fts_backend *backend);
 void fts_backend_unregister(const char *name);
 
+void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
+		     const ARRAY_TYPE(seq_range) *definite_filter,
+		     ARRAY_TYPE(seq_range) *maybe_dest,
+		     const ARRAY_TYPE(seq_range) *maybe_filter);
+
 #endif
diff -r c6d592c69bfd -r 5b53bd0d8f6d src/plugins/fts/fts-api.c
--- a/src/plugins/fts/fts-api.c	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts/fts-api.c	Sun Jul 13 18:16:21 2008 +0300
@@ -144,6 +144,55 @@ void fts_backend_unlock(struct fts_backe
 	backend->v.unlock(backend);
 }
 
+static void
+fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe,
+		  const ARRAY_TYPE(seq_range) *dest_definite,
+		  const ARRAY_TYPE(seq_range) *src_maybe,
+		  const ARRAY_TYPE(seq_range) *src_definite)
+{
+	ARRAY_TYPE(seq_range) src_unwanted;
+	const struct seq_range *range;
+	struct seq_range new_range;
+	unsigned int i, count;
+	uint32_t seq;
+
+	/* add/leave to dest_maybe if at least one list has maybe,
+	   and no lists have none */
+
+	/* create unwanted sequences list from both sources */
+	t_array_init(&src_unwanted, 128);
+	new_range.seq1 = 0; new_range.seq2 = (uint32_t)-1;
+	array_append(&src_unwanted, &new_range, 1);
+	seq_range_array_remove_seq_range(&src_unwanted, src_maybe);
+	seq_range_array_remove_seq_range(&src_unwanted, src_definite);
+
+	/* drop unwanted uids */
+	seq_range_array_remove_seq_range(dest_maybe, &src_unwanted);
+
+	/* add uids that are in dest_definite and src_maybe lists */
+	range = array_get(dest_definite, &count);
+	for (i = 0; i < count; i++) {
+		for (seq = range[i].seq1; seq <= range[i].seq2; seq++) {
+			if (seq_range_exists(src_maybe, seq))
+				seq_range_array_add(dest_maybe, 0, seq);
+		}
+	}
+}
+
+void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
+		     const ARRAY_TYPE(seq_range) *definite_filter,
+		     ARRAY_TYPE(seq_range) *maybe_dest,
+		     const ARRAY_TYPE(seq_range) *maybe_filter)
+{
+	T_BEGIN {
+		fts_merge_maybies(maybe_dest, definite_dest,
+				  maybe_filter, definite_filter);
+	} T_END;
+	/* keep only what exists in both lists. the rest is in
+	   maybies or not wanted */
+	seq_range_array_remove_invert_range(definite_dest, definite_filter);
+}
+
 static void fts_lookup_invert(ARRAY_TYPE(seq_range) *definite_uids,
 			      const ARRAY_TYPE(seq_range) *maybe_uids)
 {
@@ -155,10 +204,10 @@ static void fts_lookup_invert(ARRAY_TYPE
 	(void)seq_range_array_remove_seq_range(definite_uids, maybe_uids);
 }
 
-int fts_backend_lookup(struct fts_backend *backend, const char *key,
-		       enum fts_lookup_flags flags,
-		       ARRAY_TYPE(seq_range) *definite_uids,
-		       ARRAY_TYPE(seq_range) *maybe_uids)
+static int fts_backend_lookup(struct fts_backend *backend, const char *key,
+			      enum fts_lookup_flags flags,
+			      ARRAY_TYPE(seq_range) *definite_uids,
+			      ARRAY_TYPE(seq_range) *maybe_uids)
 {
 	int ret;
 
@@ -171,45 +220,10 @@ int fts_backend_lookup(struct fts_backen
 	return 0;
 }
 
-static void
-fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe,
-		  const ARRAY_TYPE(seq_range) *dest_definite,
-		  const ARRAY_TYPE(seq_range) *src_maybe,
-		  const ARRAY_TYPE(seq_range) *src_definite)
-{
-	ARRAY_TYPE(seq_range) src_unwanted;
-	const struct seq_range *range;
-	struct seq_range new_range;
-	unsigned int i, count;
-	uint32_t seq;
-
-	/* add/leave to dest_maybe if at least one list has maybe,
-	   and no lists have none */
-
-	/* create unwanted sequences list from both sources */
-	t_array_init(&src_unwanted, 128);
-	new_range.seq1 = 0; new_range.seq2 = (uint32_t)-1;
-	array_append(&src_unwanted, &new_range, 1);
-	seq_range_array_remove_seq_range(&src_unwanted, src_maybe);
-	seq_range_array_remove_seq_range(&src_unwanted, src_definite);
-
-	/* drop unwanted uids */
-	seq_range_array_remove_seq_range(dest_maybe, &src_unwanted);
-
-	/* add uids that are in dest_definite and src_maybe lists */
-	range = array_get(dest_definite, &count);
-	for (i = 0; i < count; i++) {
-		for (seq = range[i].seq1; seq <= range[i].seq2; seq++) {
-			if (seq_range_exists(src_maybe, seq))
-				seq_range_array_add(dest_maybe, 0, seq);
-		}
-	}
-}
-
-int fts_backend_filter(struct fts_backend *backend, const char *key,
-		       enum fts_lookup_flags flags,
-		       ARRAY_TYPE(seq_range) *definite_uids,
-		       ARRAY_TYPE(seq_range) *maybe_uids)
+static int fts_backend_filter(struct fts_backend *backend, const char *key,
+			      enum fts_lookup_flags flags,
+			      ARRAY_TYPE(seq_range) *definite_uids,
+			      ARRAY_TYPE(seq_range) *maybe_uids)
 {
 	ARRAY_TYPE(seq_range) tmp_definite, tmp_maybe;
 	int ret;
@@ -228,16 +242,72 @@ int fts_backend_filter(struct fts_backen
 		array_clear(definite_uids);
 		array_clear(maybe_uids);
 	} else {
-		T_BEGIN {
-			fts_merge_maybies(maybe_uids, definite_uids,
-					  &tmp_maybe, &tmp_definite);
-		} T_END;
-		/* keep only what exists in both lists. the rest is in
-		   maybies or not wanted */
-		seq_range_array_remove_invert_range(definite_uids,
-						    &tmp_definite);
+		fts_filter_uids(definite_uids, &tmp_definite,
+				maybe_uids, &tmp_maybe);
 	}
 	array_free(&tmp_maybe);
 	array_free(&tmp_definite);
 	return ret;
 }
+
+struct fts_backend_lookup_context *
+fts_backend_lookup_init(struct fts_backend *backend)
+{
+	struct fts_backend_lookup_context *ctx;
+	pool_t pool;


More information about the dovecot-cvs mailing list