dovecot-2.2: lib-fts: Added fts_tokenizer_reset()
dovecot at dovecot.org
dovecot at dovecot.org
Sat May 9 10:54:31 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/2dca6925bd88
changeset: 18580:2dca6925bd88
user: Timo Sirainen <tss at iki.fi>
date: Sat May 09 13:52:37 2015 +0300
description:
lib-fts: Added fts_tokenizer_reset()
diffstat:
src/lib-fts/fts-tokenizer-address.c | 11 +++++++++++
src/lib-fts/fts-tokenizer-generic.c | 14 ++++++++++++++
src/lib-fts/fts-tokenizer-private.h | 1 +
src/lib-fts/fts-tokenizer.c | 5 +++++
src/lib-fts/fts-tokenizer.h | 4 ++++
src/lib-fts/test-fts-tokenizer.c | 19 ++++++++++++++++---
6 files changed, 51 insertions(+), 3 deletions(-)
diffs (153 lines):
diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/fts-tokenizer-address.c
--- a/src/lib-fts/fts-tokenizer-address.c Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-address.c Sat May 09 13:52:37 2015 +0300
@@ -197,6 +197,16 @@
str_append_n(tok->parent_data, data, size);
}
+static void fts_tokenizer_email_address_reset(struct fts_tokenizer *_tok)
+{
+ struct email_address_fts_tokenizer *tok =
+ (struct email_address_fts_tokenizer *)_tok;
+
+ tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE;
+ str_truncate(tok->last_word, 0);
+ str_truncate(tok->parent_data, 0);
+}
+
static int
fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
const unsigned char *data, size_t size,
@@ -290,6 +300,7 @@
static const struct fts_tokenizer_vfuncs email_address_tokenizer_vfuncs = {
fts_tokenizer_email_address_create,
fts_tokenizer_email_address_destroy,
+ fts_tokenizer_email_address_reset,
fts_tokenizer_email_address_next
};
diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/fts-tokenizer-generic.c
--- a/src/lib-fts/fts-tokenizer-generic.c Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic.c Sat May 09 13:52:37 2015 +0300
@@ -137,6 +137,17 @@
return is_word_break(c);
}
+static void fts_tokenizer_generic_reset(struct fts_tokenizer *_tok)
+{
+ struct generic_fts_tokenizer *tok =
+ (struct generic_fts_tokenizer *)_tok;
+
+ tok->prev_letter = LETTER_TYPE_NONE;
+ tok->prev_prev_letter = LETTER_TYPE_NONE;
+ tok->last_size = 0;
+ buffer_set_used_size(tok->token, 0);
+}
+
static int
fts_tokenizer_generic_next_simple(struct fts_tokenizer *_tok,
const unsigned char *data, size_t size,
@@ -580,6 +591,7 @@
static const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs = {
fts_tokenizer_generic_create,
fts_tokenizer_generic_destroy,
+ fts_tokenizer_generic_reset,
fts_tokenizer_generic_next
};
@@ -592,10 +604,12 @@
const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_simple = {
fts_tokenizer_generic_create,
fts_tokenizer_generic_destroy,
+ fts_tokenizer_generic_reset,
fts_tokenizer_generic_next_simple
};
const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_tr29 = {
fts_tokenizer_generic_create,
fts_tokenizer_generic_destroy,
+ fts_tokenizer_generic_reset,
fts_tokenizer_generic_next_tr29
};
diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/fts-tokenizer-private.h
--- a/src/lib-fts/fts-tokenizer-private.h Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-private.h Sat May 09 13:52:37 2015 +0300
@@ -10,6 +10,7 @@
struct fts_tokenizer **tokenizer_r, const char **error_r);
void (*destroy)(struct fts_tokenizer *tok);
+ void (*reset)(struct fts_tokenizer *tok);
int (*next)(struct fts_tokenizer *tok, const unsigned char *data,
size_t size, size_t *skip_r, const char **token_r);
};
diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/fts-tokenizer.c
--- a/src/lib-fts/fts-tokenizer.c Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.c Sat May 09 13:52:37 2015 +0300
@@ -157,6 +157,11 @@
return ret;
}
+void fts_tokenizer_reset(struct fts_tokenizer *tok)
+{
+ tok->v->reset(tok);
+}
+
int
fts_tokenizer_next(struct fts_tokenizer *tok,
const unsigned char *data, size_t size, const char **token_r)
diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/fts-tokenizer.h
--- a/src/lib-fts/fts-tokenizer.h Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.h Sat May 09 13:52:37 2015 +0300
@@ -63,6 +63,9 @@
void fts_tokenizer_ref(struct fts_tokenizer *tok);
void fts_tokenizer_unref(struct fts_tokenizer **tok);
+/* Reset FTS tokenizer state */
+void fts_tokenizer_reset(struct fts_tokenizer *tok);
+
/*
Returns 1 if token was returned, 0 if input was non-blocking and
more data is needed, -1 if EOF/error.
@@ -82,4 +85,5 @@
const unsigned char *data, size_t size, const char **token_r);
const char *fts_tokenizer_name(const struct fts_tokenizer *tok);
+
#endif
diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/test-fts-tokenizer.c
--- a/src/lib-fts/test-fts-tokenizer.c Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/test-fts-tokenizer.c Sat May 09 13:52:37 2015 +0300
@@ -555,18 +555,31 @@
}
test_assert(*eopp == NULL);
+ /* make sure state is forgotten at EOF */
test_assert(fts_tokenizer_next(tok, (const void *)"foo", 3, &token) == 0);
- test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0);
+ test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 &&
+ strcmp(token, "foo") == 0);
test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0);
test_assert(fts_tokenizer_next(tok, (const void *)"bar at baz", 7, &token) == 0);
- test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0);
+ test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 &&
+ strcmp(token, "bar at baz") == 0);
test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0);
test_assert(fts_tokenizer_next(tok, (const void *)"foo@", 4, &token) == 0);
- test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0);
+ test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 &&
+ strcmp(token, "foo") == 0);
test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0);
+ /* test reset explicitly */
+ test_assert(fts_tokenizer_next(tok, (const void *)"a", 1, &token) == 0);
+ fts_tokenizer_reset(tok);
+ test_assert(fts_tokenizer_next(tok, (const void *)"b at c", 3, &token) == 0);
+ test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 &&
+ strcmp(token, "b at c") == 0);
+ test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0);
+
+
fts_tokenizer_unref(&tok);
fts_tokenizer_unref(&gen_tok);
fts_tokenizers_deinit();
More information about the dovecot-cvs
mailing list