dovecot-2.2: lib-fts: Changed fts_tokenizer_next/final() to retu...
dovecot at dovecot.org
dovecot at dovecot.org
Sat May 9 16:28:07 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/7f151aca47ac
changeset: 18610:7f151aca47ac
user: Timo Sirainen <tss at iki.fi>
date: Sat May 09 19:26:01 2015 +0300
description:
lib-fts: Changed fts_tokenizer_next/final() to return error string.
The current tokenizers can't fail, but if we're doing tokenization via
external services they could fail.
diffstat:
src/lib-fts/fts-tokenizer-address.c | 3 +-
src/lib-fts/fts-tokenizer-generic.c | 11 ++++++---
src/lib-fts/fts-tokenizer-private.h | 3 +-
src/lib-fts/fts-tokenizer.c | 22 +++++++++++---------
src/lib-fts/fts-tokenizer.h | 5 ++-
src/lib-fts/test-fts-tokenizer.c | 40 ++++++++++++++++++------------------
src/plugins/fts/fts-build-mail.c | 2 +-
src/plugins/fts/fts-search-args.c | 18 +++++++++++-----
8 files changed, 59 insertions(+), 45 deletions(-)
diffs (truncated from 314 to 300 lines):
diff -r fa55a06ffae2 -r 7f151aca47ac src/lib-fts/fts-tokenizer-address.c
--- a/src/lib-fts/fts-tokenizer-address.c Sat May 09 19:21:45 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-address.c Sat May 09 19:26:01 2015 +0300
@@ -203,7 +203,8 @@
static int
fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
const unsigned char *data, size_t size,
- size_t *skip_r, const char **token_r)
+ size_t *skip_r, const char **token_r,
+ const char **error_r ATTR_UNUSED)
{
struct email_address_fts_tokenizer *tok =
(struct email_address_fts_tokenizer *)_tok;
diff -r fa55a06ffae2 -r 7f151aca47ac src/lib-fts/fts-tokenizer-generic.c
--- a/src/lib-fts/fts-tokenizer-generic.c Sat May 09 19:21:45 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic.c Sat May 09 19:26:01 2015 +0300
@@ -168,7 +168,8 @@
static int
fts_tokenizer_generic_next_simple(struct fts_tokenizer *_tok,
const unsigned char *data, size_t size,
- size_t *skip_r, const char **token_r)
+ size_t *skip_r, const char **token_r,
+ const char **error_r ATTR_UNUSED)
{
struct generic_fts_tokenizer *tok =
(struct generic_fts_tokenizer *)_tok;
@@ -565,8 +566,9 @@
static int
fts_tokenizer_generic_next_tr29(struct fts_tokenizer *_tok,
- const unsigned char *data, size_t size,
- size_t *skip_r, const char **token_r)
+ const unsigned char *data, size_t size,
+ size_t *skip_r, const char **token_r,
+ const char **error_r ATTR_UNUSED)
{
struct generic_fts_tokenizer *tok =
(struct generic_fts_tokenizer *)_tok;
@@ -614,7 +616,8 @@
const unsigned char *data ATTR_UNUSED,
size_t size ATTR_UNUSED,
size_t *skip_r ATTR_UNUSED,
- const char **token_r ATTR_UNUSED)
+ const char **token_r ATTR_UNUSED,
+ const char **error_r ATTR_UNUSED)
{
i_unreached();
}
diff -r fa55a06ffae2 -r 7f151aca47ac src/lib-fts/fts-tokenizer-private.h
--- a/src/lib-fts/fts-tokenizer-private.h Sat May 09 19:21:45 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-private.h Sat May 09 19:26:01 2015 +0300
@@ -12,7 +12,8 @@
void (*reset)(struct fts_tokenizer *tok);
int (*next)(struct fts_tokenizer *tok, const unsigned char *data,
- size_t size, size_t *skip_r, const char **token_r);
+ size_t size, size_t *skip_r, const char **token_r,
+ const char **error_r);
};
enum fts_tokenizer_parent_state {
diff -r fa55a06ffae2 -r 7f151aca47ac src/lib-fts/fts-tokenizer.c
--- a/src/lib-fts/fts-tokenizer.c Sat May 09 19:21:45 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.c Sat May 09 19:26:01 2015 +0300
@@ -123,7 +123,7 @@
static int
fts_tokenizer_next_self(struct fts_tokenizer *tok,
const unsigned char *data, size_t size,
- const char **token_r)
+ const char **token_r, const char **error_r)
{
int ret = 0;
size_t skip = 0;
@@ -133,12 +133,13 @@
if (tok->prev_reply_finished) {
/* whole new data */
- ret = tok->v->next(tok, data, size, &skip, token_r);
+ ret = tok->v->next(tok, data, size, &skip, token_r, error_r);
} else {
/* continuing previous data */
i_assert(tok->prev_skip <= size);
ret = tok->v->next(tok, data + tok->prev_skip,
- size - tok->prev_skip, &skip, token_r);
+ size - tok->prev_skip, &skip,
+ token_r, error_r);
}
if (ret > 0) {
@@ -164,13 +165,13 @@
int fts_tokenizer_next(struct fts_tokenizer *tok,
const unsigned char *data, size_t size,
- const char **token_r)
+ const char **token_r, const char **error_r)
{
int ret;
switch (tok->parent_state) {
case FTS_TOKENIZER_PARENT_STATE_ADD_DATA:
- ret = fts_tokenizer_next_self(tok, data, size, token_r);
+ ret = fts_tokenizer_next_self(tok, data, size, token_r, error_r);
if (ret <= 0 || tok->parent == NULL || tok->skip_parents)
return ret;
buffer_set_used_size(tok->parent_input, 0);
@@ -179,25 +180,26 @@
/* fall through */
case FTS_TOKENIZER_PARENT_STATE_NEXT_OUTPUT:
ret = fts_tokenizer_next(tok->parent, tok->parent_input->data,
- tok->parent_input->used, token_r);
+ tok->parent_input->used, token_r, error_r);
if (ret != 0)
return ret;
tok->parent_state++;
/* fall through */
case FTS_TOKENIZER_PARENT_STATE_FINALIZE:
- ret = fts_tokenizer_next(tok->parent, NULL, 0, token_r);
+ ret = fts_tokenizer_next(tok->parent, NULL, 0, token_r, error_r);
if (ret != 0)
return ret;
/* we're finished sending this token to parent tokenizer.
see if our own tokenizer has more tokens available */
tok->parent_state = FTS_TOKENIZER_PARENT_STATE_ADD_DATA;
- return fts_tokenizer_next(tok, data, size, token_r);
+ return fts_tokenizer_next(tok, data, size, token_r, error_r);
default:
i_unreached();
}
}
-int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r)
+int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r,
+ const char **error_r)
{
- return fts_tokenizer_next(tok, NULL, 0, token_r);
+ return fts_tokenizer_next(tok, NULL, 0, token_r, error_r);
}
diff -r fa55a06ffae2 -r 7f151aca47ac src/lib-fts/fts-tokenizer.h
--- a/src/lib-fts/fts-tokenizer.h Sat May 09 19:21:45 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.h Sat May 09 19:26:01 2015 +0300
@@ -77,9 +77,10 @@
int fts_tokenizer_next(struct fts_tokenizer *tok,
const unsigned char *data, size_t size,
- const char **token_r);
+ const char **token_r, const char **error_r);
/* Returns same as fts_tokenizer_next(). */
-int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r);
+int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r,
+ const char **error_r);
const char *fts_tokenizer_name(const struct fts_tokenizer *tok);
diff -r fa55a06ffae2 -r 7f151aca47ac src/lib-fts/test-fts-tokenizer.c
--- a/src/lib-fts/test-fts-tokenizer.c Sat May 09 19:21:45 2015 +0300
+++ b/src/lib-fts/test-fts-tokenizer.c Sat May 09 19:26:01 2015 +0300
@@ -38,16 +38,16 @@
const char *const *expected_output)
{
const unsigned char *input = (const unsigned char *)_input;
- const char *token;
+ const char *token, *error;
unsigned int i, max, outi, char_len, input_len = strlen(_input);
/* test all input at once */
outi = 0;
- while (fts_tokenizer_next(tok, input, input_len, &token) > 0) {
+ while (fts_tokenizer_next(tok, input, input_len, &token, &error) > 0) {
test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
outi++;
}
- while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) {
+ while (fts_tokenizer_next(tok, NULL, 0, &token, &error) > 0) {
test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
outi++;
}
@@ -56,12 +56,12 @@
/* test input one byte at a time */
for (i = outi = 0; i < input_len; i += char_len) {
char_len = uni_utf8_char_bytes(input[i]);
- while (fts_tokenizer_next(tok, input+i, char_len, &token) > 0) {
+ while (fts_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) {
test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
outi++;
}
}
- while (fts_tokenizer_final(tok, &token) > 0) {
+ while (fts_tokenizer_final(tok, &token, &error) > 0) {
test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
outi++;
}
@@ -72,12 +72,12 @@
max = rand() % (input_len - i) + 1;
for (char_len = 0; char_len < max; )
char_len += uni_utf8_char_bytes(input[i+char_len]);
- while (fts_tokenizer_next(tok, input+i, char_len, &token) > 0) {
+ while (fts_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) {
test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
outi++;
}
}
- while (fts_tokenizer_final(tok, &token) > 0) {
+ while (fts_tokenizer_final(tok, &token, &error) > 0) {
test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
outi++;
}
@@ -257,28 +257,28 @@
test_tokenizer_inputoutput(tok, input, expected_output);
/* make sure state is forgotten at EOF */
- test_assert(fts_tokenizer_next(tok, (const void *)"foo", 3, &token) == 0);
- test_assert(fts_tokenizer_final(tok, &token) > 0 &&
+ test_assert(fts_tokenizer_next(tok, (const void *)"foo", 3, &token, &error) == 0);
+ test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
strcmp(token, "foo") == 0);
- test_assert(fts_tokenizer_final(tok, &token) == 0);
+ test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
- test_assert(fts_tokenizer_next(tok, (const void *)"bar at baz", 7, &token) == 0);
- test_assert(fts_tokenizer_final(tok, &token) > 0 &&
+ test_assert(fts_tokenizer_next(tok, (const void *)"bar at baz", 7, &token, &error) == 0);
+ test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
strcmp(token, "bar at baz") == 0);
- test_assert(fts_tokenizer_final(tok, &token) == 0);
+ test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
- test_assert(fts_tokenizer_next(tok, (const void *)"foo@", 4, &token) == 0);
- test_assert(fts_tokenizer_final(tok, &token) > 0 &&
+ test_assert(fts_tokenizer_next(tok, (const void *)"foo@", 4, &token, &error) == 0);
+ test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
strcmp(token, "foo") == 0);
- test_assert(fts_tokenizer_final(tok, &token) == 0);
+ test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
/* test reset explicitly */
- test_assert(fts_tokenizer_next(tok, (const void *)"a", 1, &token) == 0);
+ test_assert(fts_tokenizer_next(tok, (const void *)"a", 1, &token, &error) == 0);
fts_tokenizer_reset(tok);
- test_assert(fts_tokenizer_next(tok, (const void *)"b at c", 3, &token) == 0);
- test_assert(fts_tokenizer_final(tok, &token) > 0 &&
+ test_assert(fts_tokenizer_next(tok, (const void *)"b at c", 3, &token, &error) == 0);
+ test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
strcmp(token, "b at c") == 0);
- test_assert(fts_tokenizer_final(tok, &token) == 0);
+ test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
fts_tokenizer_unref(&tok);
fts_tokenizer_unref(&gen_tok);
diff -r fa55a06ffae2 -r 7f151aca47ac src/plugins/fts/fts-build-mail.c
--- a/src/plugins/fts/fts-build-mail.c Sat May 09 19:21:45 2015 +0300
+++ b/src/plugins/fts/fts-build-mail.c Sat May 09 19:26:01 2015 +0300
@@ -261,7 +261,7 @@
int ret;
tokenizer = fts_user_get_index_tokenizer(ctx->update_ctx->backend->ns->user);
- while ((ret = fts_tokenizer_next(tokenizer, data, size, &token)) > 0) {
+ while ((ret = fts_tokenizer_next(tokenizer, data, size, &token, &error)) > 0) {
if (filter != NULL) {
ret = fts_filter_filter(filter, &token, &error);
if (ret == 0)
diff -r fa55a06ffae2 -r 7f151aca47ac src/plugins/fts/fts-search-args.c
--- a/src/plugins/fts/fts-search-args.c Sat May 09 19:21:45 2015 +0300
+++ b/src/plugins/fts/fts-search-args.c Sat May 09 19:26:01 2015 +0300
@@ -81,7 +81,7 @@
token2 = t_strdup(token2);
array_append(&tokens, &token2, 1);
} else if (ret < 0) {
- i_error("fts: Couldn't create search tokens: %s", error);
+ i_error("fts: Couldn't filter search tokens: %s", error);
return -1;
}
}
@@ -99,9 +99,10 @@
{
const ARRAY_TYPE(fts_user_language) *languages;
struct mail_search_arg *and_arg, *orig_arg = *argp;
- const char *token, *orig_token = orig_arg->value.str;
+ const char *error, *token, *orig_token = orig_arg->value.str;
unsigned int orig_token_len = strlen(orig_token);
struct fts_tokenizer *tokenizer;
+ int ret;
languages = fts_user_get_all_languages(backend->ns->user);
tokenizer = fts_user_get_search_tokenizer(backend->ns->user);
@@ -117,20 +118,25 @@
/* reset tokenizer between search args in case there's any state left
from some previous failure */
fts_tokenizer_reset(tokenizer);
- while (fts_tokenizer_next(tokenizer,
- (const void *)orig_token,
- orig_token_len, &token) > 0) {
+ while ((ret = fts_tokenizer_next(tokenizer,
+ (const void *)orig_token,
+ orig_token_len, &token, &error)) > 0) {
if (fts_backend_dovecot_expand_lang_tokens(languages, pool, and_arg,
orig_arg, orig_token,
token) < 0)
return -1;
}
- while (fts_tokenizer_final(tokenizer, &token) > 0) {
More information about the dovecot-cvs
mailing list