dovecot-2.2: lib-fts: Added unit testing for unicode apostrophe ...

dovecot at dovecot.org dovecot at dovecot.org
Mon Jun 1 19:01:30 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/ff791143d1b4
changeset: 18782:ff791143d1b4
user:      Timo Sirainen <tss at iki.fi>
date:      Mon Jun 01 21:59:02 2015 +0300
description:
lib-fts: Added unit testing for unicode apostrophe handling.

diffstat:

 src/lib-fts/test-fts-tokenizer.c |  15 +++++++++++++++
 1 files changed, 15 insertions(+), 0 deletions(-)

diffs (39 lines):

diff -r 78d473873e12 -r ff791143d1b4 src/lib-fts/test-fts-tokenizer.c
--- a/src/lib-fts/test-fts-tokenizer.c	Mon Jun 01 21:58:30 2015 +0300
+++ b/src/lib-fts/test-fts-tokenizer.c	Mon Jun 01 21:59:02 2015 +0300
@@ -41,6 +41,11 @@
 	"123456789012345678901234567890x',"
 	"123456789012345678901234567890x'',"
 
+	/* \xe28099 = U+2019 is a smart quote, sometimes used as an apostrophe */
+	"\xE2\x80\x99 \xE2\x80\x99 \xE2\x80\x99\xE2\x80\x99 \xE2\x80\x99\xE2\x80\x99\xE2\x80\x99 \xE2\x80\x99quoted text\xE2\x80\x99\xE2\x80\x99word\xE2\x80\x99 \xE2\x80\x99hlo words\xE2\x80\x99 you\xE2\x80\x99re78901234567890123456789012 bad\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99word\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99pre post\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99",
+
+	"you\xE2\x80\x99re\xE2\x80\x99xyz",
+
 	/* whitespace: with Unicode(utf8) U+FF01(ef bc 81)(U+2000(e2 80 80) and
 	   U+205A(e2 81 9a) and U+205F(e2 81 9f) */
 	"hello\xEF\xBC\x81world\r\nAnd\xE2\x80\x80there\twas: text "
@@ -156,6 +161,11 @@
 		"123456789012345678901234567890",
 		"123456789012345678901234567890",
 
+		"quoted", "text", "word", "hlo", "words", "you're789012345678901234567890", "bad",
+		"word", "pre", "post", NULL,
+
+		"you're'xyz", NULL,
+
 		"hello", "world", "And",
 		"there", "was", "text", "galore",
 		"and", "more", NULL,
@@ -208,6 +218,11 @@
 		"123456789012345678901234567890",
 		"123456789012345678901234567890",
 
+		"quoted", "text", "word", "hlo", "words", "you're789012345678901234567890", "bad",
+		"word", "pre", "post", NULL,
+
+		"you're'xyz", NULL,
+
 		"hello", "world", "And",
 		"there", "was", "text", "galore",
 		"and", "more", NULL,


More information about the dovecot-cvs mailing list