dovecot-2.0-sslstream: str_sanitize(): Don't break UTF-8 input.
dovecot at dovecot.org
dovecot at dovecot.org
Sat Feb 13 02:55:48 EET 2010
details: http://hg.dovecot.org/dovecot-2.0-sslstream/rev/f68c2cc1b32b
changeset: 10213:f68c2cc1b32b
user: Timo Sirainen <tss at iki.fi>
date: Wed Oct 28 13:50:55 2009 -0400
description:
str_sanitize(): Don't break UTF-8 input.
diffstat:
2 files changed, 38 insertions(+), 12 deletions(-)
src/lib/str-sanitize.c | 34 +++++++++++++++++++++++++++-------
src/lib/test-str-sanitize.c | 16 +++++++++++-----
diffs (101 lines):
diff -r 104edcb89a70 -r f68c2cc1b32b src/lib/str-sanitize.c
--- a/src/lib/str-sanitize.c Tue Oct 27 22:44:39 2009 -0400
+++ b/src/lib/str-sanitize.c Wed Oct 28 13:50:55 2009 -0400
@@ -1,32 +1,52 @@
/* Copyright (c) 2004-2009 Dovecot authors, see the included COPYING file */
#include "lib.h"
+#include "unichar.h"
#include "str.h"
#include "str-sanitize.h"
static size_t str_sanitize_skip_start(const char *src, size_t max_len)
{
+ unsigned int len;
+ unichar_t chr;
size_t i;
- for (i = 0; i < max_len; i++) {
- if (((unsigned char)src[i] & 0x7f) < 32)
+ for (i = 0; i < max_len; ) {
+ len = uni_utf8_char_bytes(src[i]);
+ if (uni_utf8_get_char(src+i, &chr) <= 0)
break;
+ if ((unsigned char)src[i] < 32)
+ break;
+ i += len;
}
return i;
}
void str_sanitize_append(string_t *dest, const char *src, size_t max_len)
{
+ unsigned int len;
+ unichar_t chr;
size_t i;
+ int ret;
- i = str_sanitize_skip_start(src, max_len);
- str_append_n(dest, src, i);
-
- for (; i < max_len && src[i] != '\0'; i++) {
- if (((unsigned char)src[i] & 0x7f) < 32)
+ for (i = 0; i < max_len && src[i] != '\0'; ) {
+ len = uni_utf8_char_bytes(src[i]);
+ ret = uni_utf8_get_char(src+i, &chr);
+ if (ret <= 0) {
+ /* invalid UTF-8 */
+ str_append_c(dest, '?');
+ if (ret == 0) {
+ /* input ended too early */
+ return;
+ }
+ i++;
+ continue;
+ }
+ if ((unsigned char)src[i] < 32)
str_append_c(dest, '?');
else
str_append_c(dest, src[i]);
+ i += len;
}
if (src[i] != '\0') {
diff -r 104edcb89a70 -r f68c2cc1b32b src/lib/test-str-sanitize.c
--- a/src/lib/test-str-sanitize.c Tue Oct 27 22:44:39 2009 -0400
+++ b/src/lib/test-str-sanitize.c Wed Oct 28 13:50:55 2009 -0400
@@ -17,7 +17,10 @@ void test_str_sanitize(void)
{ "ab", 2 },
{ "abc", 2 },
{ "abcd", 3 },
- { "abcde", 4 }
+ { "abcde", 4 },
+ { "Ñ", 10 },
+ { "Ñ", 1 },
+ { "\001x\x1fy\x81", 10 }
};
static const char *output[] = {
NULL,
@@ -26,15 +29,18 @@ void test_str_sanitize(void)
"ab",
"...",
"...",
- "a..."
+ "a...",
+ "Ñ",
+ "Ñ",
+ "?x?y?"
};
const char *str;
unsigned int i;
- bool success;
+ test_begin("str_sanitize");
for (i = 0; i < N_ELEMENTS(input); i++) {
str = str_sanitize(input[i].str, input[i].max_len);
- success = null_strcmp(output[i], str) == 0;
- test_out(t_strdup_printf("str_sanitize(%d)", i), success);
+ test_assert(null_strcmp(output[i], str) == 0);
}
+ test_end();
}
More information about the dovecot-cvs
mailing list