dovecot-2.2: fts: Added fts_no_autofuzzy setting to require exac...

dovecot at dovecot.org dovecot at dovecot.org
Fri Aug 8 13:22:39 UTC 2014


details:   http://hg.dovecot.org/dovecot-2.2/rev/cdf4edcc6256
changeset: 17689:cdf4edcc6256
user:      Timo Sirainen <tss at iki.fi>
date:      Fri Aug 08 16:20:31 2014 +0300
description:
fts: Added fts_no_autofuzzy setting to require exact matches for found results.
This is done by using the FTS search results as only filters on which the
regular non-FTS search is done.

diffstat:

 src/plugins/fts-lucene/fts-backend-lucene.c |  11 ++++---
 src/plugins/fts-lucene/lucene-wrapper.cc    |  40 ++++++++++++++++++----------
 src/plugins/fts-lucene/lucene-wrapper.h     |   6 ++-
 src/plugins/fts-solr/fts-backend-solr-old.c |  21 +++++++++++---
 src/plugins/fts-solr/fts-backend-solr.c     |  22 +++++++++++----
 src/plugins/fts-squat/fts-backend-squat.c   |   4 ++-
 src/plugins/fts/fts-api-private.h           |   5 ++-
 src/plugins/fts/fts-api.c                   |  12 +++++---
 src/plugins/fts/fts-api.h                   |  16 ++++++++--
 src/plugins/fts/fts-search.c                |   8 ++++-
 src/plugins/fts/fts-storage.c               |   3 ++
 src/plugins/fts/fts-storage.h               |   1 +
 12 files changed, 103 insertions(+), 46 deletions(-)

diffs (truncated from 524 to 300 lines):

diff -r f206042d6286 -r cdf4edcc6256 src/plugins/fts-lucene/fts-backend-lucene.c
--- a/src/plugins/fts-lucene/fts-backend-lucene.c	Fri Aug 08 15:15:03 2014 +0300
+++ b/src/plugins/fts-lucene/fts-backend-lucene.c	Fri Aug 08 16:20:31 2014 +0300
@@ -481,7 +481,8 @@
 
 static int
 fts_backend_lucene_lookup(struct fts_backend *_backend, struct mailbox *box,
-			  struct mail_search_arg *args, bool and_args,
+			  struct mail_search_arg *args,
+			  enum fts_lookup_flags flags,
 			  struct fts_result *result)
 {
 	struct lucene_fts_backend *backend =
@@ -493,8 +494,7 @@
 	if (fts_backend_select(backend, box) < 0)
 		return -1;
 	T_BEGIN {
-		ret = lucene_index_lookup(backend->index, args, and_args,
-					  result);
+		ret = lucene_index_lookup(backend->index, args, flags, result);
 	} T_END;
 	return ret;
 }
@@ -550,7 +550,8 @@
 static int
 fts_backend_lucene_lookup_multi(struct fts_backend *_backend,
 				struct mailbox *const boxes[],
-				struct mail_search_arg *args, bool and_args,
+				struct mail_search_arg *args,
+				enum fts_lookup_flags flags,
 				struct fts_multi_result *result)
 {
 	struct lucene_fts_backend *backend =
@@ -566,7 +567,7 @@
 		ret = mailboxes_get_guids(boxes, guids, result);
 		if (ret == 0) {
 			ret = lucene_index_lookup_multi(backend->index,
-							guids, args, and_args,
+							guids, args, flags,
 							result);
 		}
 		hash_table_destroy(&guids);
diff -r f206042d6286 -r cdf4edcc6256 src/plugins/fts-lucene/lucene-wrapper.cc
--- a/src/plugins/fts-lucene/lucene-wrapper.cc	Fri Aug 08 15:15:03 2014 +0300
+++ b/src/plugins/fts-lucene/lucene-wrapper.cc	Fri Aug 08 16:20:31 2014 +0300
@@ -1127,8 +1127,10 @@
 static bool
 lucene_add_definite_query(struct lucene_index *index,
 			  ARRAY_TYPE(lucene_query) &queries,
-			  struct mail_search_arg *arg, bool and_args)
+			  struct mail_search_arg *arg,
+			  enum fts_lookup_flags flags)
 {
+	bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
 	Query *q;
 
 	if (arg->match_not && !and_args) {
@@ -1191,8 +1193,10 @@
 static bool
 lucene_add_maybe_query(struct lucene_index *index,
 		       ARRAY_TYPE(lucene_query) &queries,
-		       struct mail_search_arg *arg, bool and_args)
+		       struct mail_search_arg *arg,
+		       enum fts_lookup_flags flags)
 {
+	bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
 	Query *q = NULL;
 
 	if (arg->match_not) {
@@ -1236,7 +1240,6 @@
 	else
 		lq->occur = BooleanClause::MUST_NOT;
 	return true;
-	return true;
 }
 
 static bool queries_have_non_must_nots(ARRAY_TYPE(lucene_query) &queries)
@@ -1319,7 +1322,8 @@
 }
 
 int lucene_index_lookup(struct lucene_index *index,
-			struct mail_search_arg *args, bool and_args,
+			struct mail_search_arg *args,
+			enum fts_lookup_flags flags,
 			struct fts_result *result)
 {
 	struct mail_search_arg *arg;
@@ -1332,15 +1336,18 @@
 	bool have_definites = false;
 
 	for (arg = args; arg != NULL; arg = arg->next) {
-		if (lucene_add_definite_query(index, def_queries, arg, and_args)) {
+		if (lucene_add_definite_query(index, def_queries, arg, flags)) {
 			arg->match_always = true;
 			have_definites = true;
 		}
 	}
 
 	if (have_definites) {
+		ARRAY_TYPE(seq_range) *uids_arr =
+			(flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ?
+			&result->definite_uids : &result->maybe_uids;
 		if (lucene_index_search(index, def_queries, result,
-					&result->definite_uids) < 0)
+					uids_arr) < 0)
 			return -1;
 	}
 
@@ -1356,7 +1363,7 @@
 	bool have_maybies = false;
 
 	for (arg = args; arg != NULL; arg = arg->next) {
-		if (lucene_add_maybe_query(index, maybe_queries, arg, and_args)) {
+		if (lucene_add_maybe_query(index, maybe_queries, arg, flags)) {
 			arg->match_always = true;
 			have_maybies = true;
 		}
@@ -1374,6 +1381,7 @@
 lucene_index_search_multi(struct lucene_index *index,
 			  HASH_TABLE_TYPE(wguid_result) guids,
 			  ARRAY_TYPE(lucene_query) &queries,
+			  enum fts_lookup_flags flags,
 			  struct fts_multi_result *result)
 {
 	struct fts_score_map *score;
@@ -1421,11 +1429,14 @@
 				break;
 			}
 
-			if (!array_is_created(&br->definite_uids)) {
-				p_array_init(&br->definite_uids, result->pool, 32);
+			ARRAY_TYPE(seq_range) *uids_arr =
+				(flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ?
+				&br->maybe_uids : &br->definite_uids;
+			if (!array_is_created(uids_arr)) {
+				p_array_init(uids_arr, result->pool, 32);
 				p_array_init(&br->scores, result->pool, 32);
 			}
-			if (seq_range_array_add(&br->definite_uids, uid)) {
+			if (seq_range_array_add(uids_arr, uid)) {
 				/* duplicate result */
 			} else {
 				score = array_append_space(&br->scores);
@@ -1443,7 +1454,8 @@
 
 int lucene_index_lookup_multi(struct lucene_index *index,
 			      HASH_TABLE_TYPE(wguid_result) guids,
-			      struct mail_search_arg *args, bool and_args,
+			      struct mail_search_arg *args,
+			      enum fts_lookup_flags flags,
 			      struct fts_multi_result *result)
 {
 	struct mail_search_arg *arg;
@@ -1456,15 +1468,15 @@
 	bool have_definites = false;
 
 	for (arg = args; arg != NULL; arg = arg->next) {
-		if (lucene_add_definite_query(index, def_queries, arg, and_args)) {
+		if (lucene_add_definite_query(index, def_queries, arg, flags)) {
 			arg->match_always = true;
 			have_definites = true;
 		}
 	}
 
 	if (have_definites) {
-		if (lucene_index_search_multi(index, guids,
-					      def_queries, result) < 0)
+		if (lucene_index_search_multi(index, guids, def_queries, flags,
+					      result) < 0)
 			return -1;
 	}
 	return 0;
diff -r f206042d6286 -r cdf4edcc6256 src/plugins/fts-lucene/lucene-wrapper.h
--- a/src/plugins/fts-lucene/lucene-wrapper.h	Fri Aug 08 15:15:03 2014 +0300
+++ b/src/plugins/fts-lucene/lucene-wrapper.h	Fri Aug 08 16:20:31 2014 +0300
@@ -42,12 +42,14 @@
 int lucene_index_optimize(struct lucene_index *index);
 
 int lucene_index_lookup(struct lucene_index *index, 
-			struct mail_search_arg *args, bool and_args,
+			struct mail_search_arg *args,
+			enum fts_lookup_flags flags,
 			struct fts_result *result);
 
 int lucene_index_lookup_multi(struct lucene_index *index,
 			      HASH_TABLE_TYPE(wguid_result) guids,
-			      struct mail_search_arg *args, bool and_args,
+			      struct mail_search_arg *args,
+			      enum fts_lookup_flags flags,
 			      struct fts_multi_result *result);
 
 struct lucene_index_iter *
diff -r f206042d6286 -r cdf4edcc6256 src/plugins/fts-solr/fts-backend-solr-old.c
--- a/src/plugins/fts-solr/fts-backend-solr-old.c	Fri Aug 08 15:15:03 2014 +0300
+++ b/src/plugins/fts-solr/fts-backend-solr-old.c	Fri Aug 08 16:20:31 2014 +0300
@@ -665,11 +665,13 @@
 
 static int
 fts_backend_solr_lookup(struct fts_backend *_backend, struct mailbox *box,
-			struct mail_search_arg *args, bool and_args,
+			struct mail_search_arg *args,
+			enum fts_lookup_flags flags,
 			struct fts_result *result)
 {
 	struct solr_fts_backend *backend =
 		(struct solr_fts_backend *)_backend;
+	bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
 	struct mail_namespace *ns;
 	struct mailbox_status status;
 	string_t *str;
@@ -704,7 +706,10 @@
 	ret = solr_connection_select(backend->solr_conn, str_c(str),
 				     pool, &results);
 	if (ret == 0 && results[0] != NULL) {
-		array_append_array(&result->definite_uids, &results[0]->uids);
+		if ((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0)
+			array_append_array(&result->definite_uids, &results[0]->uids);
+		else
+			array_append_array(&result->maybe_uids, &results[0]->uids);
 		array_append_array(&result->scores, &results[0]->scores);
 	}
 	result->scores_sorted = TRUE;
@@ -728,6 +733,7 @@
 static int
 solr_search_multi(struct solr_fts_backend *backend, string_t *str,
 		  struct mailbox *const boxes[],
+		  enum fts_lookup_flags flags,
 		  struct fts_multi_result *result)
 {
 	struct solr_result **solr_results;
@@ -784,7 +790,10 @@
 		}
 		fts_result = array_append_space(&fts_results);
 		fts_result->box = box;
-		fts_result->definite_uids = solr_results[i]->uids;
+		if ((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0)
+			fts_result->definite_uids = solr_results[i]->uids;
+		else
+			fts_result->maybe_uids = solr_results[i]->uids;
 		fts_result->scores = solr_results[i]->scores;
 		fts_result->scores_sorted = TRUE;
 	}
@@ -797,9 +806,11 @@
 static int
 fts_backend_solr_lookup_multi(struct fts_backend *_backend,
 			      struct mailbox *const boxes[],
-			      struct mail_search_arg *args, bool and_args,
+			      struct mail_search_arg *args,
+			      enum fts_lookup_flags flags,
 			      struct fts_multi_result *result)
 {
+	bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
 	struct solr_fts_backend *backend =
 		(struct solr_fts_backend *)_backend;
 	string_t *str;
@@ -811,7 +822,7 @@
 		    SOLR_MAX_MULTI_ROWS);
 
 	if (solr_add_definite_query_args(str, args, and_args)) {
-		if (solr_search_multi(backend, str, boxes, result) < 0)
+		if (solr_search_multi(backend, str, boxes, flags, result) < 0)
 			return -1;
 	}
 	/* FIXME: maybe_uids could be handled also with some more work.. */
diff -r f206042d6286 -r cdf4edcc6256 src/plugins/fts-solr/fts-backend-solr.c
--- a/src/plugins/fts-solr/fts-backend-solr.c	Fri Aug 08 15:15:03 2014 +0300
+++ b/src/plugins/fts-solr/fts-backend-solr.c	Fri Aug 08 16:20:31 2014 +0300
@@ -783,9 +783,11 @@
 
 static int
 fts_backend_solr_lookup(struct fts_backend *_backend, struct mailbox *box,
-			struct mail_search_arg *args, bool and_args,
+			struct mail_search_arg *args,
+			enum fts_lookup_flags flags,
 			struct fts_result *result)
 {
+	bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
 	struct mailbox_status status;
 	string_t *str;
 	const char *box_guid;
@@ -801,8 +803,11 @@
 	prefix_len = str_len(str);
 
 	if (solr_add_definite_query_args(str, args, and_args)) {
+		ARRAY_TYPE(seq_range) *uids_arr =
+			(flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ?
+			&result->definite_uids : &result->maybe_uids;
 		if (solr_search(_backend, str, box_guid,
-				&result->definite_uids, &result->scores) < 0)
+				uids_arr, &result->scores) < 0)
 			return -1;
 	}
 	str_truncate(str, prefix_len);
@@ -817,7 +822,7 @@
 
 static int
 solr_search_multi(struct fts_backend *_backend, string_t *str,
-		  struct mailbox *const boxes[],
+		  struct mailbox *const boxes[], enum fts_lookup_flags flags,
 		  struct fts_multi_result *result)


More information about the dovecot-cvs mailing list