dovecot-2.2: lib-fts: Added "lowercase" filter.
dovecot at dovecot.org
dovecot at dovecot.org
Sat May 9 11:28:36 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/75b4b312ea09
changeset: 18584:75b4b312ea09
user: Timo Sirainen <tss at iki.fi>
date: Sat May 09 14:26:42 2015 +0300
description:
lib-fts: Added "lowercase" filter.
For now it handles only ASCII characters, but that's enough for our use.
diffstat:
src/lib-fts/Makefile.am | 1 +
src/lib-fts/fts-filter-lowercase.c | 61 ++++++++++++++++++++++++++++++++++++++
src/lib-fts/fts-filter.c | 1 +
src/lib-fts/fts-filter.h | 4 ++
src/lib-fts/test-fts-filter.c | 30 ++++++++++++++++++
5 files changed, 97 insertions(+), 0 deletions(-)
diffs (148 lines):
diff -r 1963690280b7 -r 75b4b312ea09 src/lib-fts/Makefile.am
--- a/src/lib-fts/Makefile.am Sat May 09 14:09:37 2015 +0300
+++ b/src/lib-fts/Makefile.am Sat May 09 14:26:42 2015 +0300
@@ -61,6 +61,7 @@
libfts_la_SOURCES = \
fts-filter.c \
+ fts-filter-lowercase.c \
fts-filter-normalizer-icu.c \
fts-filter-stopwords.c \
fts-filter-stemmer-snowball.c \
diff -r 1963690280b7 -r 75b4b312ea09 src/lib-fts/fts-filter-lowercase.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-fts/fts-filter-lowercase.c Sat May 09 14:26:42 2015 +0300
@@ -0,0 +1,61 @@
+/* Copyright (c) 2015 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "fts-filter.h"
+#include "fts-filter-private.h"
+#include "fts-language.h"
+
+static bool
+fts_filter_lowercase_supports(const struct fts_language *lang ATTR_UNUSED)
+{
+ return TRUE;
+}
+
+static void
+fts_filter_lowercase_destroy(struct fts_filter *filter)
+{
+ i_free(filter);
+}
+
+static int
+fts_filter_lowercase_create(const struct fts_language *lang ATTR_UNUSED,
+ const char *const *settings,
+ struct fts_filter **filter_r,
+ const char **error_r)
+{
+ struct fts_filter *filter;
+
+ if (settings[0] != NULL) {
+ *error_r = t_strdup_printf("Unknown setting: %s", settings[0]);
+ return -1;
+ }
+ filter = i_new(struct fts_filter, 1);
+ *filter = *fts_filter_lowercase;
+
+ *filter_r = filter;
+ return 0;
+}
+
+static int
+fts_filter_lowercase_filter(struct fts_filter *_filter ATTR_UNUSED,
+ const char **token,
+ const char **error_r ATTR_UNUSED)
+{
+ *token = t_str_lcase(*token);
+ return 1;
+}
+
+static const struct fts_filter_vfuncs normalizer_filter_vfuncs = {
+ fts_filter_lowercase_supports,
+ fts_filter_lowercase_create,
+ fts_filter_lowercase_filter,
+ fts_filter_lowercase_destroy
+};
+
+static const struct fts_filter fts_filter_lowercase_real = {
+ .class_name = LOWERCASE_FILTER_NAME,
+ .v = &normalizer_filter_vfuncs
+};
+
+const struct fts_filter *fts_filter_lowercase = &fts_filter_lowercase_real;
diff -r 1963690280b7 -r 75b4b312ea09 src/lib-fts/fts-filter.c
--- a/src/lib-fts/fts-filter.c Sat May 09 14:09:37 2015 +0300
+++ b/src/lib-fts/fts-filter.c Sat May 09 14:26:42 2015 +0300
@@ -15,6 +15,7 @@
fts_filter_register(fts_filter_stopwords);
fts_filter_register(fts_filter_stemmer_snowball);
fts_filter_register(fts_filter_normalizer_icu);
+ fts_filter_register(fts_filter_lowercase);
}
void fts_filters_deinit(void)
diff -r 1963690280b7 -r 75b4b312ea09 src/lib-fts/fts-filter.h
--- a/src/lib-fts/fts-filter.h Sat May 09 14:09:37 2015 +0300
+++ b/src/lib-fts/fts-filter.h Sat May 09 14:26:42 2015 +0300
@@ -33,6 +33,10 @@
extern const struct fts_filter *fts_filter_normalizer_icu;
#define ICU_NORMALIZER_FILTER_NAME "normalizer-icu"
+/* Lowecases the input. Currently only ASCII data is lowercased. */
+extern const struct fts_filter *fts_filter_lowercase;
+#define LOWERCASE_FILTER_NAME "lowercase"
+
/* Register all built-in filters. */
void fts_filters_init(void);
void fts_filters_deinit(void);
diff -r 1963690280b7 -r 75b4b312ea09 src/lib-fts/test-fts-filter.c
--- a/src/lib-fts/test-fts-filter.c Sat May 09 14:09:37 2015 +0300
+++ b/src/lib-fts/test-fts-filter.c Sat May 09 14:26:42 2015 +0300
@@ -11,6 +11,35 @@
static const char *const stopword_settings[] = {"stopwords_dir", TEST_STOPWORDS_DIR, NULL};
static struct fts_language english_language = { .name = "en" };
+static void test_fts_filter_lowercase(void)
+{
+ struct {
+ const char *input;
+ const char *output;
+ } tests[] = {
+ { "foo", "foo" },
+ { "FOO", "foo" },
+ { "fOo", "foo" }
+ };
+ const struct fts_filter *filter_class;
+ struct fts_filter *filter;
+ const char *error;
+ const char *token;
+ unsigned int i;
+
+ test_begin("fts filter lowercase");
+ filter_class = fts_filter_find(LOWERCASE_FILTER_NAME);
+ test_assert(fts_filter_create(filter_class, NULL, &english_language, NULL, &filter, &error) == 0);
+
+ for (i = 0; i < N_ELEMENTS(tests); i++) {
+ token = tests[i].input;
+ test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 &&
+ strcmp(token, tests[i].output) == 0, 0);
+ }
+ fts_filter_unref(&filter);
+ test_end();
+}
+
static void test_fts_filter_stopwords_eng(void)
{
const struct fts_filter *filter_class;
@@ -521,6 +550,7 @@
int main(void)
{
static void (*test_functions[])(void) = {
+ test_fts_filter_lowercase,
test_fts_filter_stopwords_eng,
test_fts_filter_stopwords_fin,
test_fts_filter_stopwords_fra,
More information about the dovecot-cvs
mailing list