dovecot-2.2: lib-fts: Optimization for tr29 - we don't need to t...
dovecot at dovecot.org
dovecot at dovecot.org
Mon Jun 1 18:31:45 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/3ff93cabcac3
changeset: 18769:3ff93cabcac3
user: Timo Sirainen <tss at iki.fi>
date: Mon Jun 01 21:08:27 2015 +0300
description:
lib-fts: Optimization for tr29 - we don't need to track last_size explicitly
diffstat:
src/lib-fts/fts-tokenizer-generic-private.h | 1 -
src/lib-fts/fts-tokenizer-generic.c | 20 +++++++++++---------
2 files changed, 11 insertions(+), 10 deletions(-)
diffs (60 lines):
diff -r 6e459e8c3a5b -r 3ff93cabcac3 src/lib-fts/fts-tokenizer-generic-private.h
--- a/src/lib-fts/fts-tokenizer-generic-private.h Mon Jun 01 18:35:58 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic-private.h Mon Jun 01 21:08:27 2015 +0300
@@ -43,7 +43,6 @@
enum boundary_algorithm algorithm;
enum letter_type prev_letter;
enum letter_type prev_prev_letter;
- size_t last_size; /* Bytes in latest utf8 character. */
buffer_t *token;
};
diff -r 6e459e8c3a5b -r 3ff93cabcac3 src/lib-fts/fts-tokenizer-generic.c
--- a/src/lib-fts/fts-tokenizer-generic.c Mon Jun 01 18:35:58 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic.c Mon Jun 01 21:08:27 2015 +0300
@@ -184,7 +184,6 @@
tok->prev_letter = LETTER_TYPE_NONE;
tok->prev_prev_letter = LETTER_TYPE_NONE;
- tok->last_size = 0;
buffer_set_used_size(tok->token, 0);
}
@@ -552,17 +551,21 @@
fts_tokenizer_generic_tr29_current_token(struct generic_fts_tokenizer *tok,
const char **token_r)
{
- size_t end_skip = 0;
- ssize_t len;
+ const unsigned char *data = tok->token->data;
+ ssize_t len = tok->token->used;
- if (is_one_past_end(tok))
- end_skip = tok->last_size;
+ if (is_one_past_end(tok)) {
+ /* delete the last character */
+ while ((data[len-1] & 0x80) != 0)
+ len--;
+ i_assert(len > 0);
+ len--;
+ }
tok->prev_prev_letter = LETTER_TYPE_NONE;
tok->prev_letter = LETTER_TYPE_NONE;
- len = tok->token->used - end_skip;
- *token_r = len == 0 ? "" : fts_uni_strndup(tok->token->data, len);
+ *token_r = len == 0 ? "" : fts_uni_strndup(data, len);
buffer_set_used_size(tok->token, 0);
return len > 0;
}
@@ -629,8 +632,7 @@
char_start_i = i;
if (uni_utf8_get_char_n(data + i, size - i, &c) <= 0)
i_unreached();
- tok->last_size = uni_utf8_char_bytes(data[i]);
- i += tok->last_size - 1; /* Utf8 bytes > 1, for() handles the 1 byte increment. */
+ i += uni_utf8_char_bytes(data[i]) - 1; /* Utf8 bytes > 1, for() handles the 1 byte increment. */
lt = letter_type(c);
if (tok->prev_letter == LETTER_TYPE_NONE && is_nonword(lt)) {
/* TODO: test that start_skip works with multibyte utf8 chars */
More information about the dovecot-cvs
mailing list