dovecot-2.2: lib-fts: Added normalizer-simple for doing normaliz...

dovecot at dovecot.org dovecot at dovecot.org
Tue Apr 21 17:18:10 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/9aaacd260a7d
changeset: 18442:9aaacd260a7d
user:      Timo Sirainen <tss at iki.fi>
date:      Tue Apr 21 20:16:36 2015 +0300
description:
lib-fts: Added normalizer-simple for doing normalization without libicu.

diffstat:

 src/lib-fts/Makefile.am                    |   9 +--
 src/lib-fts/fts-filter-normalizer-simple.c |  77 ++++++++++++++++++++++++++++++
 src/lib-fts/fts-filter.c                   |   1 +
 src/lib-fts/fts-filter.h                   |   4 +
 4 files changed, 85 insertions(+), 6 deletions(-)

diffs (133 lines):

diff -r 782594d44002 -r 9aaacd260a7d src/lib-fts/Makefile.am
--- a/src/lib-fts/Makefile.am	Tue Apr 21 19:40:42 2015 +0300
+++ b/src/lib-fts/Makefile.am	Tue Apr 21 20:16:36 2015 +0300
@@ -61,6 +61,7 @@
 libfts_la_SOURCES = \
 	fts-filter.c \
 	fts-filter-normalizer-icu.c \
+	fts-filter-normalizer-simple.c \
 	fts-filter-stopwords.c \
 	fts-filter-stemmer-snowball.c \
 	fts-language.c \
@@ -88,13 +89,9 @@
 	../lib/liblib.la
 test_deps = $(noinst_LTLIBRARIES) $(test_libs)
 
-filter_deps = \
-	fts-filter.lo fts-filter-stopwords.lo \
-	fts-filter-stemmer-snowball.lo fts-filter-normalizer-icu.lo
-
 test_fts_filter_SOURCES = test-fts-filter.c
-test_fts_filter_LDADD = $(filter_deps) $(test_libs) $(STEMMER_LIBS) $(TEXTCAT_LIBS) $(NORMALIZER_LIBS)
-test_fts_filter_DEPENDENCIES = $(test_deps) $(filter_deps)
+test_fts_filter_LDADD = libfts.la $(test_libs)
+test_fts_filter_DEPENDENCIES = libfts.la $(test_deps)
 
 if BUILD_FTS_EXTTEXTCAT
 TEST_FTS_LANGUAGE = test-fts-language
diff -r 782594d44002 -r 9aaacd260a7d src/lib-fts/fts-filter-normalizer-simple.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-fts/fts-filter-normalizer-simple.c	Tue Apr 21 20:16:36 2015 +0300
@@ -0,0 +1,77 @@
+/* Copyright (c) 2015 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "unichar.h"
+#include "str.h"
+#include "fts-filter.h"
+#include "fts-filter-private.h"
+#include "fts-language.h"
+
+struct fts_filter_normalizer_simple {
+	struct fts_filter filter;
+	string_t *str;
+};
+
+static bool
+fts_filter_normalizer_simple_supports(const struct fts_language *lang ATTR_UNUSED)
+{
+	return TRUE;
+}
+
+static void
+fts_filter_normalizer_simple_destroy(struct fts_filter *_filter)
+{
+	struct fts_filter_normalizer_simple *filter =
+		(struct fts_filter_normalizer_simple *)_filter;
+
+	str_free(&filter->str);
+	i_free(filter);
+}
+
+static int
+fts_filter_normalizer_simple_create(const struct fts_language *lang ATTR_UNUSED,
+				    const char *const *settings,
+				    struct fts_filter **filter_r,
+				    const char **error_r)
+{
+	struct fts_filter_normalizer_simple *filter;
+
+	if (settings[0] != NULL) {
+		*error_r = t_strdup_printf("Unknown setting: %s", settings[0]);
+		return -1;
+	}
+	filter = i_new(struct fts_filter_normalizer_simple, 1);
+	filter->filter = *fts_filter_normalizer_simple;
+	filter->str = str_new(default_pool, 128);
+
+	*filter_r = &filter->filter;
+	return 0;
+}
+
+static const char *
+fts_filter_normalizer_simple_filter(struct fts_filter *_filter,
+				    const char *token)
+{
+	struct fts_filter_normalizer_simple *filter =
+		(struct fts_filter_normalizer_simple *)_filter;
+
+	str_truncate(filter->str, 0);
+	if (uni_utf8_to_decomposed_titlecase(token, strlen(token),
+					     filter->str) < 0)
+		return NULL;
+	return str_c(filter->str);
+}
+
+static const struct fts_filter_vfuncs normalizer_filter_vfuncs = {
+	fts_filter_normalizer_simple_supports,
+	fts_filter_normalizer_simple_create,
+	fts_filter_normalizer_simple_filter,
+	fts_filter_normalizer_simple_destroy
+};
+
+static const struct fts_filter fts_filter_normalizer_simple_real = {
+	.class_name = SIMPLE_NORMALIZER_FILTER_NAME,
+	.v = &normalizer_filter_vfuncs
+};
+
+const struct fts_filter *fts_filter_normalizer_simple = &fts_filter_normalizer_simple_real;
diff -r 782594d44002 -r 9aaacd260a7d src/lib-fts/fts-filter.c
--- a/src/lib-fts/fts-filter.c	Tue Apr 21 19:40:42 2015 +0300
+++ b/src/lib-fts/fts-filter.c	Tue Apr 21 20:16:36 2015 +0300
@@ -15,6 +15,7 @@
 	fts_filter_register(fts_filter_stopwords);
 	fts_filter_register(fts_filter_stemmer_snowball);
 	fts_filter_register(fts_filter_normalizer_icu);
+	fts_filter_register(fts_filter_normalizer_simple);
 }
 
 void fts_filters_deinit(void)
diff -r 782594d44002 -r 9aaacd260a7d src/lib-fts/fts-filter.h
--- a/src/lib-fts/fts-filter.h	Tue Apr 21 19:40:42 2015 +0300
+++ b/src/lib-fts/fts-filter.h	Tue Apr 21 20:16:36 2015 +0300
@@ -33,6 +33,10 @@
 extern const struct fts_filter *fts_filter_normalizer_icu;
 #define ICU_NORMALIZER_FILTER_NAME "normalizer-icu"
 
+/* Normalization using i;unicode-casemap (RFC 5051) */
+extern const struct fts_filter *fts_filter_normalizer_simple;
+#define SIMPLE_NORMALIZER_FILTER_NAME "normalizer-simple"
+
 /* Register all built-in filters. */
 void fts_filters_init(void);
 void fts_filters_deinit(void);


More information about the dovecot-cvs mailing list