dovecot-2.2: lib-fts: Added normalizer-simple for doing normaliz...
dovecot at dovecot.org
dovecot at dovecot.org
Tue Apr 21 17:18:10 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/9aaacd260a7d
changeset: 18442:9aaacd260a7d
user: Timo Sirainen <tss at iki.fi>
date: Tue Apr 21 20:16:36 2015 +0300
description:
lib-fts: Added normalizer-simple for doing normalization without libicu.
diffstat:
src/lib-fts/Makefile.am | 9 +--
src/lib-fts/fts-filter-normalizer-simple.c | 77 ++++++++++++++++++++++++++++++
src/lib-fts/fts-filter.c | 1 +
src/lib-fts/fts-filter.h | 4 +
4 files changed, 85 insertions(+), 6 deletions(-)
diffs (133 lines):
diff -r 782594d44002 -r 9aaacd260a7d src/lib-fts/Makefile.am
--- a/src/lib-fts/Makefile.am Tue Apr 21 19:40:42 2015 +0300
+++ b/src/lib-fts/Makefile.am Tue Apr 21 20:16:36 2015 +0300
@@ -61,6 +61,7 @@
libfts_la_SOURCES = \
fts-filter.c \
fts-filter-normalizer-icu.c \
+ fts-filter-normalizer-simple.c \
fts-filter-stopwords.c \
fts-filter-stemmer-snowball.c \
fts-language.c \
@@ -88,13 +89,9 @@
../lib/liblib.la
test_deps = $(noinst_LTLIBRARIES) $(test_libs)
-filter_deps = \
- fts-filter.lo fts-filter-stopwords.lo \
- fts-filter-stemmer-snowball.lo fts-filter-normalizer-icu.lo
-
test_fts_filter_SOURCES = test-fts-filter.c
-test_fts_filter_LDADD = $(filter_deps) $(test_libs) $(STEMMER_LIBS) $(TEXTCAT_LIBS) $(NORMALIZER_LIBS)
-test_fts_filter_DEPENDENCIES = $(test_deps) $(filter_deps)
+test_fts_filter_LDADD = libfts.la $(test_libs)
+test_fts_filter_DEPENDENCIES = libfts.la $(test_deps)
if BUILD_FTS_EXTTEXTCAT
TEST_FTS_LANGUAGE = test-fts-language
diff -r 782594d44002 -r 9aaacd260a7d src/lib-fts/fts-filter-normalizer-simple.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-fts/fts-filter-normalizer-simple.c Tue Apr 21 20:16:36 2015 +0300
@@ -0,0 +1,77 @@
+/* Copyright (c) 2015 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "unichar.h"
+#include "str.h"
+#include "fts-filter.h"
+#include "fts-filter-private.h"
+#include "fts-language.h"
+
+struct fts_filter_normalizer_simple {
+ struct fts_filter filter;
+ string_t *str;
+};
+
+static bool
+fts_filter_normalizer_simple_supports(const struct fts_language *lang ATTR_UNUSED)
+{
+ return TRUE;
+}
+
+static void
+fts_filter_normalizer_simple_destroy(struct fts_filter *_filter)
+{
+ struct fts_filter_normalizer_simple *filter =
+ (struct fts_filter_normalizer_simple *)_filter;
+
+ str_free(&filter->str);
+ i_free(filter);
+}
+
+static int
+fts_filter_normalizer_simple_create(const struct fts_language *lang ATTR_UNUSED,
+ const char *const *settings,
+ struct fts_filter **filter_r,
+ const char **error_r)
+{
+ struct fts_filter_normalizer_simple *filter;
+
+ if (settings[0] != NULL) {
+ *error_r = t_strdup_printf("Unknown setting: %s", settings[0]);
+ return -1;
+ }
+ filter = i_new(struct fts_filter_normalizer_simple, 1);
+ filter->filter = *fts_filter_normalizer_simple;
+ filter->str = str_new(default_pool, 128);
+
+ *filter_r = &filter->filter;
+ return 0;
+}
+
+static const char *
+fts_filter_normalizer_simple_filter(struct fts_filter *_filter,
+ const char *token)
+{
+ struct fts_filter_normalizer_simple *filter =
+ (struct fts_filter_normalizer_simple *)_filter;
+
+ str_truncate(filter->str, 0);
+ if (uni_utf8_to_decomposed_titlecase(token, strlen(token),
+ filter->str) < 0)
+ return NULL;
+ return str_c(filter->str);
+}
+
+static const struct fts_filter_vfuncs normalizer_filter_vfuncs = {
+ fts_filter_normalizer_simple_supports,
+ fts_filter_normalizer_simple_create,
+ fts_filter_normalizer_simple_filter,
+ fts_filter_normalizer_simple_destroy
+};
+
+static const struct fts_filter fts_filter_normalizer_simple_real = {
+ .class_name = SIMPLE_NORMALIZER_FILTER_NAME,
+ .v = &normalizer_filter_vfuncs
+};
+
+const struct fts_filter *fts_filter_normalizer_simple = &fts_filter_normalizer_simple_real;
diff -r 782594d44002 -r 9aaacd260a7d src/lib-fts/fts-filter.c
--- a/src/lib-fts/fts-filter.c Tue Apr 21 19:40:42 2015 +0300
+++ b/src/lib-fts/fts-filter.c Tue Apr 21 20:16:36 2015 +0300
@@ -15,6 +15,7 @@
fts_filter_register(fts_filter_stopwords);
fts_filter_register(fts_filter_stemmer_snowball);
fts_filter_register(fts_filter_normalizer_icu);
+ fts_filter_register(fts_filter_normalizer_simple);
}
void fts_filters_deinit(void)
diff -r 782594d44002 -r 9aaacd260a7d src/lib-fts/fts-filter.h
--- a/src/lib-fts/fts-filter.h Tue Apr 21 19:40:42 2015 +0300
+++ b/src/lib-fts/fts-filter.h Tue Apr 21 20:16:36 2015 +0300
@@ -33,6 +33,10 @@
extern const struct fts_filter *fts_filter_normalizer_icu;
#define ICU_NORMALIZER_FILTER_NAME "normalizer-icu"
+/* Normalization using i;unicode-casemap (RFC 5051) */
+extern const struct fts_filter *fts_filter_normalizer_simple;
+#define SIMPLE_NORMALIZER_FILTER_NAME "normalizer-simple"
+
/* Register all built-in filters. */
void fts_filters_init(void);
void fts_filters_deinit(void);
More information about the dovecot-cvs
mailing list