dovecot-1.2: Added uni_utf8_str_is_valid().
dovecot at dovecot.org
dovecot at dovecot.org
Wed Nov 11 19:33:53 EET 2009
details: http://hg.dovecot.org/dovecot-1.2/rev/bf2fb1679cb4
changeset: 9476:bf2fb1679cb4
user: Timo Sirainen <tss at iki.fi>
date: Mon Nov 09 21:21:55 2009 -0500
description:
Added uni_utf8_str_is_valid().
diffstat:
2 files changed, 27 insertions(+), 6 deletions(-)
src/lib/unichar.c | 31 +++++++++++++++++++++++++------
src/lib/unichar.h | 2 ++
diffs (66 lines):
diff -r 76ff6831c9ae -r bf2fb1679cb4 src/lib/unichar.c
--- a/src/lib/unichar.c Tue Nov 10 19:13:36 2009 -0500
+++ b/src/lib/unichar.c Mon Nov 09 21:21:55 2009 -0500
@@ -323,8 +323,8 @@ is_valid_utf8_seq(const unsigned char *i
return len;
}
-bool uni_utf8_get_valid_data(const unsigned char *input, size_t size,
- buffer_t *buf)
+static int uni_utf8_find_invalid_pos(const unsigned char *input, size_t size,
+ size_t *pos_r)
{
size_t i, len;
@@ -334,13 +334,24 @@ bool uni_utf8_get_valid_data(const unsig
i++;
else {
len = is_valid_utf8_seq(input + i, size-i);
- if (unlikely(len == 0))
- goto broken;
+ if (unlikely(len == 0)) {
+ *pos_r = i;
+ return -1;
+ }
i += len;
}
}
- return TRUE;
-broken:
+ return 0;
+}
+
+bool uni_utf8_get_valid_data(const unsigned char *input, size_t size,
+ buffer_t *buf)
+{
+ size_t i, len;
+
+ if (uni_utf8_find_invalid_pos(input, size, &i) == 0)
+ return TRUE;
+
/* broken utf-8 input - skip the broken characters */
buffer_append(buf, input, i++);
@@ -362,3 +373,11 @@ broken:
}
return FALSE;
}
+
+bool uni_utf8_str_is_valid(const char *str)
+{
+ size_t i;
+
+ return uni_utf8_find_invalid_pos((const unsigned char *)str,
+ strlen(str), &i) == 0;
+}
diff -r 76ff6831c9ae -r bf2fb1679cb4 src/lib/unichar.h
--- a/src/lib/unichar.h Tue Nov 10 19:13:36 2009 -0500
+++ b/src/lib/unichar.h Mon Nov 09 21:21:55 2009 -0500
@@ -69,5 +69,7 @@ int uni_utf8_to_decomposed_titlecase(con
replacement character (0xfffd), write the output to buf and return FALSE. */
bool uni_utf8_get_valid_data(const unsigned char *input, size_t size,
buffer_t *buf);
+/* Returns TRUE if string is valid UTF-8 input. */
+bool uni_utf8_str_is_valid(const char *str);
#endif
More information about the dovecot-cvs
mailing list