dovecot-2.2: lib-fts: Implemented "search" parameter to fts-toke...
dovecot at dovecot.org
dovecot at dovecot.org
Sat May 9 10:17:37 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/60f07e741c57
changeset: 18573:60f07e741c57
user: Timo Sirainen <tss at iki.fi>
date: Sat May 09 13:15:09 2015 +0300
description:
lib-fts: Implemented "search" parameter to fts-tokenizer-address.
diffstat:
src/lib-fts/fts-tokenizer-address.c | 41 ++++++++++++++++++++--------------
src/lib-fts/test-fts-tokenizer.c | 43 +++++++++++++++++++++++++++++++++++++
2 files changed, 67 insertions(+), 17 deletions(-)
diffs (131 lines):
diff -r 07597666aa29 -r 60f07e741c57 src/lib-fts/fts-tokenizer-address.c
--- a/src/lib-fts/fts-tokenizer-address.c Sat May 09 13:01:45 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-address.c Sat May 09 13:15:09 2015 +0300
@@ -76,17 +76,30 @@
return 1;
}
-static int
+static bool
fts_tokenizer_address_parent_data(struct email_address_fts_tokenizer *tok,
const char **token_r)
{
- /* TODO: search option removes address from data here. */
- if (tok->search && tok->state >= EMAIL_ADDRESS_PARSER_STATE_DOMAIN)
- i_debug("Would remove current token");
+ if (tok->tokenizer.parent == NULL || str_len(tok->parent_data) == 0)
+ return FALSE;
+
+ if (tok->search && tok->state >= EMAIL_ADDRESS_PARSER_STATE_DOMAIN) {
+ /* we're searching and we want to find only the full
+ user at domain (not "user" and "domain"). we'll do this by
+ not feeding the last user at domain to parent tokenizer. */
+ unsigned int parent_prefix_len =
+ str_len(tok->parent_data) - str_len(tok->last_word);
+ i_assert(str_len(tok->parent_data) >= str_len(tok->last_word) &&
+ strcmp(str_c(tok->parent_data) + parent_prefix_len,
+ str_c(tok->last_word)) == 0);
+ str_truncate(tok->parent_data, parent_prefix_len);
+ if (str_len(tok->parent_data) == 0)
+ return FALSE;
+ }
*token_r = t_strdup(str_c(tok->parent_data));
str_truncate(tok->parent_data, 0);
- return 1;
+ return TRUE;
}
/* Used to rewind past characters that can not be the start of a new localpart.
@@ -204,8 +217,8 @@
/* end of data, output lingering tokens. first the parents data, then
possibly our token, if complete enough */
if (size == 0) {
- if (tok->tokenizer.parent != NULL && str_len(tok->parent_data) > 0)
- return fts_tokenizer_address_parent_data(tok, token_r);
+ if (fts_tokenizer_address_parent_data(tok, token_r))
+ return 1;
if (tok->state == EMAIL_ADDRESS_PARSER_STATE_DOMAIN &&
!domain_is_empty(tok))
@@ -254,16 +267,10 @@
break;
case EMAIL_ADDRESS_PARSER_STATE_COMPLETE:
- /* skip tailing non-atext */
- local_skip = skip_nonlocal_part(data+pos, size - pos);
- *skip_r = pos + local_skip;
- fts_tokenizer_address_update_parent(tok, data+pos,
- local_skip);
- if (tok->tokenizer.parent != NULL)
- return fts_tokenizer_address_parent_data(tok, token_r);
- else {
- return fts_tokenizer_address_current_token(tok, token_r);
- }
+ *skip_r = pos;
+ if (fts_tokenizer_address_parent_data(tok, token_r))
+ return 1;
+ return fts_tokenizer_address_current_token(tok, token_r);
default:
i_unreached();
}
diff -r 07597666aa29 -r 60f07e741c57 src/lib-fts/test-fts-tokenizer.c
--- a/src/lib-fts/test-fts-tokenizer.c Sat May 09 13:01:45 2015 +0300
+++ b/src/lib-fts/test-fts-tokenizer.c Sat May 09 13:15:09 2015 +0300
@@ -518,6 +518,48 @@
test_end();
}
+static void test_fts_tokenizer_address_search(void)
+{
+ static const unsigned char input[] =
+ "@invalid invalid@ abc at example.com, "
+ "Bar Baz <bar at example.org>, "
+ "foo at domain";
+ static const char *const expected_output[] = {
+ "invalid", "invalid", "abc at example.com", "Bar", "Baz",
+ "bar at example.org", "foo at domain", NULL
+ };
+ static const char *const settings[] = { "search", "" };
+ struct fts_tokenizer *tok, *gen_tok;
+ const char * const *eopp = expected_output;
+ const char *token, *error;
+ unsigned int i;
+ int ret;
+
+ test_begin("fts tokenizer search email address + parent, input one character at a time");
+ fts_tokenizers_init();
+
+ test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0);
+ test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, settings, &tok, &error) == 0);
+
+ for (i = 0; i <= sizeof(input)-1; ) {
+ ret = i < sizeof(input)-1 ?
+ fts_tokenizer_next(tok, &input[i], 1, &token) :
+ fts_tokenizer_next(tok, NULL, 0, &token);
+ if (ret == 0) {
+ i++;
+ continue;
+ }
+ test_assert(*eopp != NULL);
+ test_assert(null_strcmp(token, *eopp) == 0);
+ eopp++;
+ }
+ test_assert(*eopp == NULL);
+ fts_tokenizer_unref(&tok);
+ fts_tokenizer_unref(&gen_tok);
+ fts_tokenizers_deinit();
+ test_end();
+}
+
int main(void)
{
static void (*test_functions[])(void) = {
@@ -534,6 +576,7 @@
test_fts_tokenizer_address_char,
test_fts_tokenizer_address_line,
test_fts_tokenizer_address_rand,
+ test_fts_tokenizer_address_search,
NULL
};
More information about the dovecot-cvs
mailing list