dovecot: Treat replacement characters as non-indexed chars.

dovecot at dovecot.org dovecot at dovecot.org
Tue Jan 22 09:35:10 EET 2008


details:   http://hg.dovecot.org/dovecot/rev/d9b87e3ce6c8
changeset: 7187:d9b87e3ce6c8
user:      Timo Sirainen <tss at iki.fi>
date:      Tue Jan 22 09:33:40 2008 +0200
description:
Treat replacement characters as non-indexed chars.

diffstat:

1 file changed, 13 insertions(+), 2 deletions(-)
src/plugins/fts-squat/squat-trie.c |   15 +++++++++++++--

diffs (29 lines):

diff -r d48c419a27ca -r d9b87e3ce6c8 src/plugins/fts-squat/squat-trie.c
--- a/src/plugins/fts-squat/squat-trie.c	Tue Jan 22 09:32:27 2008 +0200
+++ b/src/plugins/fts-squat/squat-trie.c	Tue Jan 22 09:33:40 2008 +0200
@@ -821,12 +821,23 @@ squat_data_normalize(struct squat_trie *
 squat_data_normalize(struct squat_trie *trie, const unsigned char *data,
 		     unsigned int size)
 {
+	static const unsigned char replacement_utf8[] = { 0xef, 0xbf, 0xbd };
 	unsigned char *dest;
 	unsigned int i;
 
 	dest = t_malloc(size);
-	for (i = 0; i < size; i++)
-		dest[i] = trie->hdr.normalize_map[data[i]];
+	for (i = 0; i < size; i++) {
+		if (data[i] == replacement_utf8[0] && i + 2 < size &&
+		    data[i+1] == replacement_utf8[1] &&
+		    data[i+2] == replacement_utf8[2]) {
+			/* Don't index replacement character */
+			dest[i++] = 0;
+			dest[i++] = 0;
+			dest[i] = 0;
+		} else {
+			dest[i] = trie->hdr.normalize_map[data[i]];
+		}
+	}
 	return dest;
 }
 


More information about the dovecot-cvs mailing list