dovecot-2.2: lib-fts: simple tokenizer optimization - don't chec...

dovecot at dovecot.org dovecot at dovecot.org
Mon Jun 1 18:32:06 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/7e3f4fa82dfe
changeset: 18775:7e3f4fa82dfe
user:      Timo Sirainen <tss at iki.fi>
date:      Mon Jun 01 21:28:42 2015 +0300
description:
lib-fts: simple tokenizer optimization - don't check unicode word breaks for ASCII chars.

diffstat:

 src/lib-fts/fts-tokenizer-generic.c |  15 +++------------
 1 files changed, 3 insertions(+), 12 deletions(-)

diffs (37 lines):

diff -r 33547eaa0cac -r 7e3f4fa82dfe src/lib-fts/fts-tokenizer-generic.c
--- a/src/lib-fts/fts-tokenizer-generic.c	Mon Jun 01 21:27:09 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic.c	Mon Jun 01 21:28:42 2015 +0300
@@ -129,21 +129,10 @@
 	BINARY_NUMBER_SEARCH(data, count, value, idx_r);
 }
 
-static bool fts_ascii_word_break(unsigned char c)
-{
-	if (c < 0x80)
-		return fts_ascii_word_breaks[c] != 0;
-	return FALSE;
-}
-
 static bool fts_uni_word_break(unichar_t c)
 {
 	unsigned int idx;
 
-	/* Override some apostrophes, which get special treatment. */
-	if (IS_APOSTROPHE(c))
-		return FALSE;
-
 	/* Unicode General Punctuation, including deprecated characters. */
 	if (c >= 0x2000 && c <= 0x206f)
 		return TRUE;
@@ -169,8 +158,10 @@
 {
 	if (apostrophe)
 		return tok->prev_letter == LETTER_TYPE_SINGLE_QUOTE;
+	else if (c < 0x80)
+		return fts_ascii_word_breaks[c] != 0;
 	else
-		return fts_ascii_word_break(c) || fts_uni_word_break(c);
+		return fts_uni_word_break(c);
 }
 
 static void fts_tokenizer_generic_reset(struct fts_tokenizer *_tok)


More information about the dovecot-cvs mailing list