dovecot-2.0-sslstream: str_sanitize(): Don't break UTF-8 input.

dovecot at dovecot.org dovecot at dovecot.org
Sat Feb 13 02:55:48 EET 2010


details:   http://hg.dovecot.org/dovecot-2.0-sslstream/rev/f68c2cc1b32b
changeset: 10213:f68c2cc1b32b
user:      Timo Sirainen <tss at iki.fi>
date:      Wed Oct 28 13:50:55 2009 -0400
description:
str_sanitize(): Don't break UTF-8 input.

diffstat:

2 files changed, 38 insertions(+), 12 deletions(-)
src/lib/str-sanitize.c      |   34 +++++++++++++++++++++++++++-------
src/lib/test-str-sanitize.c |   16 +++++++++++-----

diffs (101 lines):

diff -r 104edcb89a70 -r f68c2cc1b32b src/lib/str-sanitize.c
--- a/src/lib/str-sanitize.c	Tue Oct 27 22:44:39 2009 -0400
+++ b/src/lib/str-sanitize.c	Wed Oct 28 13:50:55 2009 -0400
@@ -1,32 +1,52 @@
 /* Copyright (c) 2004-2009 Dovecot authors, see the included COPYING file */
 
 #include "lib.h"
+#include "unichar.h"
 #include "str.h"
 #include "str-sanitize.h"
 
 static size_t str_sanitize_skip_start(const char *src, size_t max_len)
 {
+	unsigned int len;
+	unichar_t chr;
 	size_t i;
 
-	for (i = 0; i < max_len; i++) {
-		if (((unsigned char)src[i] & 0x7f) < 32)
+	for (i = 0; i < max_len; ) {
+		len = uni_utf8_char_bytes(src[i]);
+		if (uni_utf8_get_char(src+i, &chr) <= 0)
 			break;
+		if ((unsigned char)src[i] < 32)
+			break;
+		i += len;
 	}
 	return i;
 }
 
 void str_sanitize_append(string_t *dest, const char *src, size_t max_len)
 {
+	unsigned int len;
+	unichar_t chr;
 	size_t i;
+	int ret;
 
-	i = str_sanitize_skip_start(src, max_len);
-	str_append_n(dest, src, i);
-
-	for (; i < max_len && src[i] != '\0'; i++) {
-		if (((unsigned char)src[i] & 0x7f) < 32)
+	for (i = 0; i < max_len && src[i] != '\0'; ) {
+		len = uni_utf8_char_bytes(src[i]);
+		ret = uni_utf8_get_char(src+i, &chr);
+		if (ret <= 0) {
+			/* invalid UTF-8 */
+			str_append_c(dest, '?');
+			if (ret == 0) {
+				/* input ended too early */
+				return;
+			}
+			i++;
+			continue;
+		}
+		if ((unsigned char)src[i] < 32)
 			str_append_c(dest, '?');
 		else
 			str_append_c(dest, src[i]);
+		i += len;
 	}
 
 	if (src[i] != '\0') {
diff -r 104edcb89a70 -r f68c2cc1b32b src/lib/test-str-sanitize.c
--- a/src/lib/test-str-sanitize.c	Tue Oct 27 22:44:39 2009 -0400
+++ b/src/lib/test-str-sanitize.c	Wed Oct 28 13:50:55 2009 -0400
@@ -17,7 +17,10 @@ void test_str_sanitize(void)
 		{ "ab", 2 },
 		{ "abc", 2 },
 		{ "abcd", 3 },
-		{ "abcde", 4 }
+		{ "abcde", 4 },
+		{ "с", 10 },
+		{ "с", 1 },
+		{ "\001x\x1fy\x81", 10 }
 	};
 	static const char *output[] = {
 		NULL,
@@ -26,15 +29,18 @@ void test_str_sanitize(void)
 		"ab",
 		"...",
 		"...",
-		"a..."
+		"a...",
+		"с",
+		"с",
+		"?x?y?"
 	};
 	const char *str;
 	unsigned int i;
-	bool success;
 
+	test_begin("str_sanitize");
 	for (i = 0; i < N_ELEMENTS(input); i++) {
 		str = str_sanitize(input[i].str, input[i].max_len);
-		success = null_strcmp(output[i], str) == 0;
-		test_out(t_strdup_printf("str_sanitize(%d)", i), success);
+		test_assert(null_strcmp(output[i], str) == 0);
 	}
+	test_end();
 }


More information about the dovecot-cvs mailing list