dovecot-2.2: lib-storage: mail_search_args_simplify() handles no...

dovecot at dovecot.org dovecot at dovecot.org
Thu Apr 23 18:21:32 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/c5751819176f
changeset: 18465:c5751819176f
user:      Timo Sirainen <tss at iki.fi>
date:      Thu Apr 23 21:19:54 2015 +0300
description:
lib-storage: mail_search_args_simplify() handles now SEARCH_HEADER*/TEXT/BODY
These are especially useful with lib-fts, where stemming and other filtering
can produce duplicates. There's some internal deduplication, but it doesn't
catch all of these.

diffstat:

 src/lib-storage/mail-search-args-simplify.c      |  103 +++++++++++++++++-----
 src/lib-storage/test-mail-search-args-simplify.c |   18 +++-
 2 files changed, 97 insertions(+), 24 deletions(-)

diffs (228 lines):

diff -r b2c20d9ff296 -r c5751819176f src/lib-storage/mail-search-args-simplify.c
--- a/src/lib-storage/mail-search-args-simplify.c	Thu Apr 23 21:00:43 2015 +0300
+++ b/src/lib-storage/mail-search-args-simplify.c	Thu Apr 23 21:19:54 2015 +0300
@@ -5,50 +5,80 @@
 #include "mail-search.h"
 
 struct mail_search_simplify_prev_arg {
-	struct mail_search_arg mask;
+	struct {
+		enum mail_search_arg_type type;
+		enum mail_search_arg_flag search_flags;
+		enum mail_search_date_type date_type;
+		bool match_not;
+		bool fuzzy;
+	} bin_mask;
+	const char *hdr_field_name_mask;
+	const char *str_mask;
+
 	struct mail_search_arg *prev_arg;
 };
 
 struct mail_search_simplify_ctx {
 	pool_t pool;
 	/* arg mask => prev_arg */
-	HASH_TABLE(struct mail_search_arg *,
+	HASH_TABLE(struct mail_search_simplify_prev_arg *,
 		   struct mail_search_simplify_prev_arg *) prev_args;
 	bool parent_and;
 	bool removals;
 };
 
-static int mail_search_arg_cmp(const struct mail_search_arg *arg1,
-			       const struct mail_search_arg *arg2)
+static int
+mail_search_simplify_prev_arg_cmp(const struct mail_search_simplify_prev_arg *arg1,
+				  const struct mail_search_simplify_prev_arg *arg2)
 {
-	return memcmp(arg1, arg2, sizeof(*arg1));
+	int ret;
+
+	ret = memcmp(&arg1->bin_mask, &arg2->bin_mask, sizeof(arg1->bin_mask));
+	if (ret == 0)
+		ret = null_strcmp(arg1->hdr_field_name_mask, arg2->hdr_field_name_mask);
+	if (ret == 0)
+		ret = null_strcmp(arg1->str_mask, arg2->str_mask);
+	return ret;
 }
 
-static unsigned int mail_search_arg_hash(const struct mail_search_arg *arg)
+static unsigned int
+mail_search_simplify_prev_arg_hash(const struct mail_search_simplify_prev_arg *arg)
 {
-	return mem_hash(arg, sizeof(*arg));
+	unsigned int hash;
+
+	hash = mem_hash(&arg->bin_mask, sizeof(arg->bin_mask));
+	if (arg->hdr_field_name_mask != NULL)
+		hash ^= str_hash(arg->hdr_field_name_mask);
+	if (arg->str_mask != NULL)
+		hash ^= str_hash(arg->str_mask);
+	return hash;
 }
 
 static void mail_search_arg_get_base_mask(const struct mail_search_arg *arg,
-					  struct mail_search_arg *mask_r)
+					  struct mail_search_simplify_prev_arg *mask_r)
 {
 	memset(mask_r, 0, sizeof(*mask_r));
-	mask_r->type = arg->type;
-	mask_r->match_not = arg->match_not;
-	mask_r->value.search_flags = arg->value.search_flags;
+	mask_r->bin_mask.type = arg->type;
+	mask_r->bin_mask.match_not = arg->match_not;
+	mask_r->bin_mask.fuzzy = arg->fuzzy;
+	mask_r->bin_mask.search_flags = arg->value.search_flags;
 }
 
 static struct mail_search_arg **
 mail_search_args_simplify_get_prev_argp(struct mail_search_simplify_ctx *ctx,
-					const struct mail_search_arg *mask)
+					const struct mail_search_simplify_prev_arg *mask)
 {
 	struct mail_search_simplify_prev_arg *prev_arg;
 
 	prev_arg = hash_table_lookup(ctx->prev_args, mask);
 	if (prev_arg == NULL) {
 		prev_arg = p_new(ctx->pool, struct mail_search_simplify_prev_arg, 1);
-		prev_arg->mask = *mask;
-		hash_table_insert(ctx->prev_args, &prev_arg->mask, prev_arg);
+		prev_arg->bin_mask = mask->bin_mask;
+		prev_arg->hdr_field_name_mask =
+			p_strdup(ctx->pool, mask->hdr_field_name_mask);
+		prev_arg->str_mask =
+			p_strdup(ctx->pool, mask->str_mask);
+		hash_table_insert(ctx->prev_args, prev_arg, prev_arg);
 	}
 	return &prev_arg->prev_arg;
 }
@@ -56,7 +86,7 @@
 static bool mail_search_args_merge_flags(struct mail_search_simplify_ctx *ctx,
 					 struct mail_search_arg *args)
 {
-	struct mail_search_arg mask;
+	struct mail_search_simplify_prev_arg mask;
 	struct mail_search_arg **prev_argp;
 
 	if (!((!args->match_not && ctx->parent_and) ||
@@ -78,7 +108,7 @@
 static bool mail_search_args_merge_set(struct mail_search_simplify_ctx *ctx,
 				       struct mail_search_arg *args)
 {
-	struct mail_search_arg mask;
+	struct mail_search_simplify_prev_arg mask;
 	struct mail_search_arg **prev_argp;
 
 	if (!((!args->match_not && ctx->parent_and) ||
@@ -101,11 +131,11 @@
 static bool mail_search_args_merge_time(struct mail_search_simplify_ctx *ctx,
 					struct mail_search_arg *args)
 {
-	struct mail_search_arg mask;
+	struct mail_search_simplify_prev_arg mask;
 	struct mail_search_arg **prev_argp, *prev_arg;
 
 	mail_search_arg_get_base_mask(args, &mask);
-	mask.value.date_type = args->value.date_type;
+	mask.bin_mask.date_type = args->value.date_type;
 	prev_argp = mail_search_args_simplify_get_prev_argp(ctx, &mask);
 
 	if (*prev_argp == NULL) {
@@ -162,7 +192,7 @@
 static bool mail_search_args_merge_size(struct mail_search_simplify_ctx *ctx,
 					struct mail_search_arg *args)
 {
-	struct mail_search_arg mask;
+	struct mail_search_simplify_prev_arg mask;
 	struct mail_search_arg **prev_argp, *prev_arg;
 
 	mail_search_arg_get_base_mask(args, &mask);
@@ -215,6 +245,25 @@
 	return FALSE;
 }
 
+static bool mail_search_args_merge_text(struct mail_search_simplify_ctx *ctx,
+					struct mail_search_arg *args)
+{
+	struct mail_search_simplify_prev_arg mask;
+	struct mail_search_arg **prev_argp;
+
+	mail_search_arg_get_base_mask(args, &mask);
+	mask.hdr_field_name_mask = args->hdr_field_name;
+	mask.str_mask = args->value.str;
+	prev_argp = mail_search_args_simplify_get_prev_argp(ctx, &mask);
+
+	if (*prev_argp == NULL) {
+		*prev_argp = args;
+		return FALSE;
+	}
+	/* duplicate search word. */
+	return TRUE;
+}
+
 static bool
 mail_search_args_simplify_sub(struct mailbox *box,
 			      struct mail_search_arg *args, bool parent_and)
@@ -226,8 +275,9 @@
 	memset(&ctx, 0, sizeof(ctx));
 	ctx.parent_and = parent_and;
 	ctx.pool = pool_alloconly_create("mail search args simplify", 1024);
-	hash_table_create(&ctx.prev_args, ctx.pool, 0, mail_search_arg_hash,
-			  mail_search_arg_cmp);
+	hash_table_create(&ctx.prev_args, ctx.pool, 0,
+			  mail_search_simplify_prev_arg_hash,
+			  mail_search_simplify_prev_arg_cmp);
 
 	while (args != NULL) {
 		if (args->match_not && (args->type == SEARCH_SUB ||
@@ -284,6 +334,13 @@
 		case SEARCH_LARGER:
 			merged = mail_search_args_merge_size(&ctx, args);
 			break;
+		case SEARCH_HEADER:
+		case SEARCH_HEADER_ADDRESS:
+		case SEARCH_HEADER_COMPRESS_LWSP:
+		case SEARCH_BODY:
+		case SEARCH_TEXT:
+			merged = mail_search_args_merge_text(&ctx, args);
+			break;
 		default:
 			merged = FALSE;
 			break;
@@ -381,6 +438,6 @@
 		/* we may have added some extra SUBs that could be dropped */
 		mail_search_args_simplify_sub(args->box, args->args, TRUE);
 	}
-	if (removals)
-		mail_search_args_simplify_sub(args->box, args->args, TRUE);
+	while (removals)
+		removals = mail_search_args_simplify_sub(args->box, args->args, TRUE);
 }
diff -r b2c20d9ff296 -r c5751819176f src/lib-storage/test-mail-search-args-simplify.c
--- a/src/lib-storage/test-mail-search-args-simplify.c	Thu Apr 23 21:00:43 2015 +0300
+++ b/src/lib-storage/test-mail-search-args-simplify.c	Thu Apr 23 21:19:54 2015 +0300
@@ -79,7 +79,23 @@
 	{ "LARGER 3 LARGER 1 LARGER 2", "LARGER 3" },
 	{ "OR LARGER 1 LARGER 2", "LARGER 1" },
 	{ "OR LARGER 1 OR LARGER 3 LARGER 2", "LARGER 1" },
-	{ "LARGER 3 NOT LARGER 1 LARGER 2", "LARGER 3 NOT LARGER 1" }
+	{ "LARGER 3 NOT LARGER 1 LARGER 2", "LARGER 3 NOT LARGER 1" },
+
+	{ "SUBJECT foo SUBJECT foo", "SUBJECT foo" },
+	{ "SUBJECT foo SUBJECT foob", "SUBJECT foo SUBJECT foob" },
+	{ "OR SUBJECT foo SUBJECT foo", "SUBJECT foo" },
+	{ "FROM foo FROM foo", "FROM foo" },
+	{ "FROM foo FROM bar", "FROM foo FROM bar" },
+	{ "FROM foo TO foo", "FROM foo TO foo" },
+
+	{ "TEXT foo TEXT foo", "TEXT foo" },
+	{ "TEXT foo TEXT foob", "TEXT foo TEXT foob" },
+	{ "OR TEXT foo TEXT foo", "TEXT foo" },
+	{ "TEXT foo NOT TEXT foo TEXT foo NOT TEXT foo", "TEXT foo NOT TEXT foo" },
+	{ "BODY foo BODY foo", "BODY foo" },
+	{ "OR BODY foo BODY foo", "BODY foo" },
+	{ "TEXT foo BODY foo", "TEXT foo BODY foo" },
+	{ "OR ( TEXT foo OR TEXT foo TEXT foo ) ( TEXT foo ( TEXT foo ) )", "TEXT foo" },
 };
 
 static struct mail_search_args *


More information about the dovecot-cvs mailing list