dovecot-2.2: lib-fts: simple tokenizer optimization - don't chec...
dovecot at dovecot.org
dovecot at dovecot.org
Mon Jun 1 18:32:06 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/7e3f4fa82dfe
changeset: 18775:7e3f4fa82dfe
user: Timo Sirainen <tss at iki.fi>
date: Mon Jun 01 21:28:42 2015 +0300
description:
lib-fts: simple tokenizer optimization - don't check unicode word breaks for ASCII chars.
diffstat:
src/lib-fts/fts-tokenizer-generic.c | 15 +++------------
1 files changed, 3 insertions(+), 12 deletions(-)
diffs (37 lines):
diff -r 33547eaa0cac -r 7e3f4fa82dfe src/lib-fts/fts-tokenizer-generic.c
--- a/src/lib-fts/fts-tokenizer-generic.c Mon Jun 01 21:27:09 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic.c Mon Jun 01 21:28:42 2015 +0300
@@ -129,21 +129,10 @@
BINARY_NUMBER_SEARCH(data, count, value, idx_r);
}
-static bool fts_ascii_word_break(unsigned char c)
-{
- if (c < 0x80)
- return fts_ascii_word_breaks[c] != 0;
- return FALSE;
-}
-
static bool fts_uni_word_break(unichar_t c)
{
unsigned int idx;
- /* Override some apostrophes, which get special treatment. */
- if (IS_APOSTROPHE(c))
- return FALSE;
-
/* Unicode General Punctuation, including deprecated characters. */
if (c >= 0x2000 && c <= 0x206f)
return TRUE;
@@ -169,8 +158,10 @@
{
if (apostrophe)
return tok->prev_letter == LETTER_TYPE_SINGLE_QUOTE;
+ else if (c < 0x80)
+ return fts_ascii_word_breaks[c] != 0;
else
- return fts_ascii_word_break(c) || fts_uni_word_break(c);
+ return fts_uni_word_break(c);
}
static void fts_tokenizer_generic_reset(struct fts_tokenizer *_tok)
More information about the dovecot-cvs
mailing list