dovecot-2.2: fts: Change tokenizer API to be able to return errors
dovecot at dovecot.org
dovecot at dovecot.org
Sat May 9 08:32:19 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/7fe766887394
changeset: 18551:7fe766887394
user: Teemu Huovila <teemu.huovila at dovecot.fi>
date: Sat May 09 11:05:04 2015 +0300
description:
fts: Change tokenizer API to be able to return errors
Modify fts_tokenizer_next() to return integer status codes. It returns
1 if a token was returned in *token_r, 0 if more input is needed and -1
on error.
diffstat:
src/lib-fts/fts-tokenizer-address.c | 36 +++++++------
src/lib-fts/fts-tokenizer-generic.c | 48 +++++++++---------
src/lib-fts/fts-tokenizer-private.h | 5 +-
src/lib-fts/fts-tokenizer.c | 49 +++++++++---------
src/lib-fts/fts-tokenizer.h | 19 +++++--
src/lib-fts/test-fts-tokenizer.c | 94 ++++++++++++++++++++----------------
src/plugins/fts/fts-build-mail.c | 5 +-
src/plugins/fts/fts-search-args.c | 12 ++--
8 files changed, 144 insertions(+), 124 deletions(-)
diffs (truncated from 682 to 300 lines):
diff -r cebe8be92034 -r 7fe766887394 src/lib-fts/fts-tokenizer-address.c
--- a/src/lib-fts/fts-tokenizer-address.c Sat May 09 11:03:21 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-address.c Sat May 09 11:05:04 2015 +0300
@@ -122,25 +122,27 @@
i_free(tok);
}
-static const char *
-fts_tokenizer_address_current_token(struct email_address_fts_tokenizer *tok)
+static int
+fts_tokenizer_address_current_token(struct email_address_fts_tokenizer *tok,
+ const char **token_r)
{
tok->tokenizer.skip_parents = TRUE;
tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE;
- return t_strdup(str_c(tok->last_word));
+ *token_r = t_strdup(str_c(tok->last_word));
+ return 1;
}
-static const char *
-fts_tokenizer_address_parent_data(struct email_address_fts_tokenizer *tok)
+static int
+fts_tokenizer_address_parent_data(struct email_address_fts_tokenizer *tok,
+ const char **token_r)
{
- const char *ret;
/* TODO: search option removes address from data here. */
if (tok->search && tok->state >= EMAIL_ADDRESS_PARSER_STATE_DOMAIN)
i_debug("Would remove current token");
- ret = t_strdup(str_c(tok->parent_data));
+ *token_r = t_strdup(str_c(tok->parent_data));
str_truncate(tok->parent_data, 0);
- return ret;
+ return 1;
}
/* Used to rewind past characters that can not be the start of a new localpart.
@@ -256,10 +258,10 @@
if (!tok->no_parent)
str_append_n(tok->parent_data, data, size);
}
-static const char *
+static int
fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
- const unsigned char *data, size_t size,
- size_t *skip_r)
+ const unsigned char *data, size_t size,
+ size_t *skip_r, const char **token_r)
{
struct email_address_fts_tokenizer *tok =
(struct email_address_fts_tokenizer *)_tok;
@@ -270,18 +272,18 @@
if (tok->state == EMAIL_ADDRESS_PARSER_STATE_COMPLETE) {
*skip_r = pos;
- return fts_tokenizer_address_current_token(tok);
+ return fts_tokenizer_address_current_token(tok, token_r);
}
/* end of data, output lingering tokens. first the parents data, then
possibly our token, if complete enough */
if (size == 0) {
if (!tok->no_parent && str_len(tok->parent_data) > 0)
- return fts_tokenizer_address_parent_data(tok);
+ return fts_tokenizer_address_parent_data(tok, token_r);
if (tok->state == EMAIL_ADDRESS_PARSER_STATE_DOMAIN
&& chars_after_at(tok) > 0)
- return fts_tokenizer_address_current_token(tok);
+ return fts_tokenizer_address_current_token(tok, token_r);
}
/* 1) regular input data OR
@@ -332,9 +334,9 @@
fts_tokenizer_address_update_parent(tok, data+pos,
local_skip);
if (!tok->no_parent)
- return fts_tokenizer_address_parent_data(tok);
+ return fts_tokenizer_address_parent_data(tok, token_r);
else {
- return fts_tokenizer_address_current_token(tok);
+ return fts_tokenizer_address_current_token(tok, token_r);
}
default:
i_unreached();
@@ -342,7 +344,7 @@
}
*skip_r = pos;
- return NULL;
+ return 0;
}
static const struct fts_tokenizer_vfuncs email_address_tokenizer_vfuncs = {
diff -r cebe8be92034 -r 7fe766887394 src/lib-fts/fts-tokenizer-generic.c
--- a/src/lib-fts/fts-tokenizer-generic.c Sat May 09 11:03:21 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic.c Sat May 09 11:05:04 2015 +0300
@@ -82,14 +82,13 @@
i_free(tok);
}
-static const char *
-fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok)
+static int
+fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok,
+ const char **token_r)
{
- const char *ret;
-
- ret = t_strndup(tok->token->data, tok->token->used);
+ *token_r = t_strndup(tok->token->data, tok->token->used);
buffer_set_used_size(tok->token, 0);
- return ret;
+ return 1;
}
/* TODO: This is duplicated from unichar.c */
@@ -135,10 +134,10 @@
return is_word_break(c);
}
-static const char *
+static int
fts_tokenizer_generic_next_simple(struct fts_tokenizer *_tok,
- const unsigned char *data, size_t size,
- size_t *skip_r)
+ const unsigned char *data, size_t size,
+ size_t *skip_r, const char **token_r)
{
struct generic_fts_tokenizer *tok =
(struct generic_fts_tokenizer *)_tok;
@@ -157,7 +156,7 @@
}
/* word boundary found - return a new token */
*skip_r = i + 1;
- return fts_tokenizer_generic_simple_current_token(tok);
+ return fts_tokenizer_generic_simple_current_token(tok, token_r);
}
}
/* word boundary not found yet */
@@ -168,9 +167,9 @@
if (size == 0 && tok->token->used > 0) {
/* return the last token */
- return fts_tokenizer_generic_simple_current_token(tok);
+ return fts_tokenizer_generic_simple_current_token(tok, token_r);
}
- return NULL;
+ return 0;
}
/* TODO: Arrange array searches roughly in order of likelyhood of a match.
@@ -464,20 +463,20 @@
return FALSE;
}
-static const char *
-fts_tokenizer_generic_tr29_current_token(struct generic_fts_tokenizer *tok)
+static int
+fts_tokenizer_generic_tr29_current_token(struct generic_fts_tokenizer *tok,
+ const char **token_r)
{
- const char *ret;
size_t end_skip = 0;
if (is_one_past_end(tok))
end_skip = tok->last_size;
- ret = t_strndup(tok->token->data, tok->token->used - end_skip);
+ *token_r = t_strndup(tok->token->data, tok->token->used - end_skip);
buffer_set_used_size(tok->token, 0);
tok->prev_prev_letter = LETTER_TYPE_NONE;
tok->prev_letter = LETTER_TYPE_NONE;
- return ret;
+ return 1;
}
/*
Find word boundaries in input text. Based on Unicode standard annex
@@ -516,10 +515,10 @@
return FALSE;
}
-static const char *
+static int
fts_tokenizer_generic_next_tr29(struct fts_tokenizer *_tok,
const unsigned char *data, size_t size,
- size_t *skip_r)
+ size_t *skip_r, const char **token_r)
{
struct generic_fts_tokenizer *tok =
(struct generic_fts_tokenizer *)_tok;
@@ -547,7 +546,7 @@
buffer_append(tok->token, data + start_skip,
len - start_skip);
*skip_r = i + 1;
- return fts_tokenizer_generic_tr29_current_token(tok);
+ return fts_tokenizer_generic_tr29_current_token(tok, token_r);
}
}
len = I_MIN(i, tok->max_length);
@@ -558,16 +557,17 @@
if (size == 0 && tok->token->used > 0) {
/* return the last token */
*skip_r = 0;
- return fts_tokenizer_generic_tr29_current_token(tok);
+ return fts_tokenizer_generic_tr29_current_token(tok, token_r);
}
- return NULL;
+ return 0;
}
-static const char *
+static int
fts_tokenizer_generic_next(struct fts_tokenizer *_tok ATTR_UNUSED,
const unsigned char *data ATTR_UNUSED,
size_t size ATTR_UNUSED,
- size_t *skip_r ATTR_UNUSED)
+ size_t *skip_r ATTR_UNUSED,
+ const char **token_r ATTR_UNUSED)
{
i_unreached();
}
diff -r cebe8be92034 -r 7fe766887394 src/lib-fts/fts-tokenizer-private.h
--- a/src/lib-fts/fts-tokenizer-private.h Sat May 09 11:03:21 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-private.h Sat May 09 11:05:04 2015 +0300
@@ -10,9 +10,8 @@
struct fts_tokenizer **tokenizer_r, const char **error_r);
void (*destroy)(struct fts_tokenizer *tok);
- const char *(*next)(struct fts_tokenizer *tok,
- const unsigned char *data, size_t size,
- size_t *skip_r);
+ int (*next)(struct fts_tokenizer *tok, const unsigned char *data,
+ size_t size, size_t *skip_r, const char **token_r);
};
enum fts_tokenizer_parent_state {
diff -r cebe8be92034 -r 7fe766887394 src/lib-fts/fts-tokenizer.c
--- a/src/lib-fts/fts-tokenizer.c Sat May 09 11:03:21 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.c Sat May 09 11:05:04 2015 +0300
@@ -120,11 +120,12 @@
tok->v->destroy(tok);
}
-static const char *
+static int
fts_tokenizer_next_self(struct fts_tokenizer *tok,
- const unsigned char *data, size_t size)
+ const unsigned char *data, size_t size,
+ const char **token_r)
{
- const char *token;
+ int ret = 0;
size_t skip = 0;
i_assert(tok->prev_reply_finished ||
@@ -132,60 +133,60 @@
if (tok->prev_reply_finished) {
/* whole new data */
- token = tok->v->next(tok, data, size, &skip);
+ ret = tok->v->next(tok, data, size, &skip, token_r);
} else {
/* continuing previous data */
i_assert(tok->prev_skip <= size);
- token = tok->v->next(tok, data + tok->prev_skip,
- size - tok->prev_skip, &skip);
+ ret = tok->v->next(tok, data + tok->prev_skip,
+ size - tok->prev_skip, &skip, token_r);
}
- if (token != NULL) {
+ if (ret > 0) {
i_assert(skip <= size - tok->prev_skip);
tok->prev_data = data;
tok->prev_size = size;
tok->prev_skip = tok->prev_skip + skip;
tok->prev_reply_finished = FALSE;
- } else {
+ } else if (ret == 0) {
/* we need a new data block */
tok->prev_data = NULL;
tok->prev_size = 0;
tok->prev_skip = 0;
tok->prev_reply_finished = TRUE;
}
- return token;
+ return ret;
}
-const char *
+int
fts_tokenizer_next(struct fts_tokenizer *tok,
- const unsigned char *data, size_t size)
+ const unsigned char *data, size_t size, const char **token_r)
{
- const char *token;
+ int ret;
More information about the dovecot-cvs
mailing list