dovecot-2.2: lib-fts: Fixed tr29 tokenizer to delete last charac...
dovecot at dovecot.org
dovecot at dovecot.org
Tue Jun 2 17:52:47 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/97b5c5e88540
changeset: 18796:97b5c5e88540
user: Timo Sirainen <tss at iki.fi>
date: Tue Jun 02 20:50:23 2015 +0300
description:
lib-fts: Fixed tr29 tokenizer to delete last character correctly when it's preceded by non-ASCII
diffstat:
src/lib-fts/fts-tokenizer-generic.c | 3 ++-
src/lib-fts/test-fts-tokenizer.c | 6 +++---
2 files changed, 5 insertions(+), 4 deletions(-)
diffs (43 lines):
diff -r 0bcd3e9e77d4 -r 97b5c5e88540 src/lib-fts/fts-tokenizer-generic.c
--- a/src/lib-fts/fts-tokenizer-generic.c Tue Jun 02 19:59:45 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic.c Tue Jun 02 20:50:23 2015 +0300
@@ -546,7 +546,8 @@
if (is_one_past_end(tok) &&
tok->untruncated_length <= tok->max_length) {
/* delete the last character */
- while ((data[len-1] & 0x80) != 0)
+ while ((data[len-1] & 0x80) != 0 &&
+ ((data[len-1] & (0x80|0x40)) != (0x80|0x40)))
len--;
i_assert(len > 0);
len--;
diff -r 0bcd3e9e77d4 -r 97b5c5e88540 src/lib-fts/test-fts-tokenizer.c
--- a/src/lib-fts/test-fts-tokenizer.c Tue Jun 02 19:59:45 2015 +0300
+++ b/src/lib-fts/test-fts-tokenizer.c Tue Jun 02 20:50:23 2015 +0300
@@ -17,7 +17,7 @@
static const char *test_inputs[] = {
/* generic things and word truncation: */
- "hello world\r\n\nAnd there\twas: text galore, "
+ "hello world\r\n\nAnd there\twas: text galor\xC3\xA9\xE2\x80\xA7 "
"abc at example.com, "
"Bar Baz <bar at example.org>, "
"foo at domain "
@@ -137,7 +137,7 @@
{
static const char *const expected_output[] = {
"hello", "world", "And",
- "there", "was", "text", "galore",
+ "there", "was", "text", "galor\xC3\xA9",
"abc", "example", "com", "Bar", "Baz",
"bar", "example", "org", "foo", "domain",
"1234567890123456789012345678ä",
@@ -194,7 +194,7 @@
{
static const char *const expected_output[] = {
"hello", "world", "And",
- "there", "was", "text", "galore",
+ "there", "was", "text", "galor\xC3\xA9",
"abc", "example", "com", "Bar", "Baz",
"bar", "example", "org", "foo", "domain",
"1234567890123456789012345678ä",
More information about the dovecot-cvs
mailing list