dovecot-2.2: lib-fts: Added "lowercase" filter.

dovecot at dovecot.org dovecot at dovecot.org
Sat May 9 11:28:36 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/75b4b312ea09
changeset: 18584:75b4b312ea09
user:      Timo Sirainen <tss at iki.fi>
date:      Sat May 09 14:26:42 2015 +0300
description:
lib-fts: Added "lowercase" filter.
For now it handles only ASCII characters, but that's enough for our use.

diffstat:

 src/lib-fts/Makefile.am            |   1 +
 src/lib-fts/fts-filter-lowercase.c |  61 ++++++++++++++++++++++++++++++++++++++
 src/lib-fts/fts-filter.c           |   1 +
 src/lib-fts/fts-filter.h           |   4 ++
 src/lib-fts/test-fts-filter.c      |  30 ++++++++++++++++++
 5 files changed, 97 insertions(+), 0 deletions(-)

diffs (148 lines):

diff -r 1963690280b7 -r 75b4b312ea09 src/lib-fts/Makefile.am
--- a/src/lib-fts/Makefile.am	Sat May 09 14:09:37 2015 +0300
+++ b/src/lib-fts/Makefile.am	Sat May 09 14:26:42 2015 +0300
@@ -61,6 +61,7 @@
 
 libfts_la_SOURCES = \
 	fts-filter.c \
+	fts-filter-lowercase.c \
 	fts-filter-normalizer-icu.c \
 	fts-filter-stopwords.c \
 	fts-filter-stemmer-snowball.c \
diff -r 1963690280b7 -r 75b4b312ea09 src/lib-fts/fts-filter-lowercase.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-fts/fts-filter-lowercase.c	Sat May 09 14:26:42 2015 +0300
@@ -0,0 +1,61 @@
+/* Copyright (c) 2015 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "fts-filter.h"
+#include "fts-filter-private.h"
+#include "fts-language.h"
+
+static bool
+fts_filter_lowercase_supports(const struct fts_language *lang ATTR_UNUSED)
+{
+	return TRUE;
+}
+
+static void
+fts_filter_lowercase_destroy(struct fts_filter *filter)
+{
+	i_free(filter);
+}
+
+static int
+fts_filter_lowercase_create(const struct fts_language *lang ATTR_UNUSED,
+			    const char *const *settings,
+			    struct fts_filter **filter_r,
+			    const char **error_r)
+{
+	struct fts_filter *filter;
+
+	if (settings[0] != NULL) {
+		*error_r = t_strdup_printf("Unknown setting: %s", settings[0]);
+		return -1;
+	}
+	filter = i_new(struct fts_filter, 1);
+	*filter = *fts_filter_lowercase;
+
+	*filter_r = filter;
+	return 0;
+}
+
+static int
+fts_filter_lowercase_filter(struct fts_filter *_filter ATTR_UNUSED,
+			    const char **token,
+			    const char **error_r ATTR_UNUSED)
+{
+	*token = t_str_lcase(*token);
+	return 1;
+}
+
+static const struct fts_filter_vfuncs normalizer_filter_vfuncs = {
+	fts_filter_lowercase_supports,
+	fts_filter_lowercase_create,
+	fts_filter_lowercase_filter,
+	fts_filter_lowercase_destroy
+};
+
+static const struct fts_filter fts_filter_lowercase_real = {
+	.class_name = LOWERCASE_FILTER_NAME,
+	.v = &normalizer_filter_vfuncs
+};
+
+const struct fts_filter *fts_filter_lowercase = &fts_filter_lowercase_real;
diff -r 1963690280b7 -r 75b4b312ea09 src/lib-fts/fts-filter.c
--- a/src/lib-fts/fts-filter.c	Sat May 09 14:09:37 2015 +0300
+++ b/src/lib-fts/fts-filter.c	Sat May 09 14:26:42 2015 +0300
@@ -15,6 +15,7 @@
 	fts_filter_register(fts_filter_stopwords);
 	fts_filter_register(fts_filter_stemmer_snowball);
 	fts_filter_register(fts_filter_normalizer_icu);
+	fts_filter_register(fts_filter_lowercase);
 }
 
 void fts_filters_deinit(void)
diff -r 1963690280b7 -r 75b4b312ea09 src/lib-fts/fts-filter.h
--- a/src/lib-fts/fts-filter.h	Sat May 09 14:09:37 2015 +0300
+++ b/src/lib-fts/fts-filter.h	Sat May 09 14:26:42 2015 +0300
@@ -33,6 +33,10 @@
 extern const struct fts_filter *fts_filter_normalizer_icu;
 #define ICU_NORMALIZER_FILTER_NAME "normalizer-icu"
 
+/* Lowecases the input. Currently only ASCII data is lowercased. */
+extern const struct fts_filter *fts_filter_lowercase;
+#define LOWERCASE_FILTER_NAME "lowercase"
+
 /* Register all built-in filters. */
 void fts_filters_init(void);
 void fts_filters_deinit(void);
diff -r 1963690280b7 -r 75b4b312ea09 src/lib-fts/test-fts-filter.c
--- a/src/lib-fts/test-fts-filter.c	Sat May 09 14:09:37 2015 +0300
+++ b/src/lib-fts/test-fts-filter.c	Sat May 09 14:26:42 2015 +0300
@@ -11,6 +11,35 @@
 static const char *const stopword_settings[] = {"stopwords_dir", TEST_STOPWORDS_DIR, NULL};
 static struct fts_language english_language = { .name = "en" };
 
+static void test_fts_filter_lowercase(void)
+{
+	struct {
+		const char *input;
+		const char *output;
+	} tests[] = {
+		{ "foo", "foo" },
+		{ "FOO", "foo" },
+		{ "fOo", "foo" }
+	};
+	const struct fts_filter *filter_class;
+	struct fts_filter *filter;
+	const char *error;
+	const char *token;
+	unsigned int i;
+
+	test_begin("fts filter lowercase");
+	filter_class = fts_filter_find(LOWERCASE_FILTER_NAME);
+	test_assert(fts_filter_create(filter_class, NULL, &english_language, NULL, &filter, &error) == 0);
+
+	for (i = 0; i < N_ELEMENTS(tests); i++) {
+		token = tests[i].input;
+		test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 &&
+				strcmp(token, tests[i].output) == 0, 0);
+	}
+	fts_filter_unref(&filter);
+	test_end();
+}
+
 static void test_fts_filter_stopwords_eng(void)
 {
 	const struct fts_filter *filter_class;
@@ -521,6 +550,7 @@
 int main(void)
 {
 	static void (*test_functions[])(void) = {
+		test_fts_filter_lowercase,
 		test_fts_filter_stopwords_eng,
 		test_fts_filter_stopwords_fin,
 		test_fts_filter_stopwords_fra,


More information about the dovecot-cvs mailing list