dovecot-2.2: various: use new uni_utf8_get_char*() interface

dovecot at dovecot.org dovecot at dovecot.org
Mon Jun 1 19:19:13 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/778b57788590
changeset: 18789:778b57788590
user:      Phil Carmody <phil at dovecot.fi>
date:      Mon Jun 01 22:16:19 2015 +0300
description:
various: use new uni_utf8_get_char*() interface
No need for additional uni_utf8_char_bytes() calls if you can parse and
know the size with one call.

Signed-off-by: Phil Carmody <phil at dovecot.fi>

diffstat:

 src/lib/str-sanitize.c                      |  26 ++++++++++----------------
 src/lib/unichar.c                           |  26 ++++++++++----------------
 src/plugins/fts-solr/fts-backend-solr-old.c |   6 ++----
 src/plugins/fts-solr/fts-backend-solr.c     |   6 ++----
 4 files changed, 24 insertions(+), 40 deletions(-)

diffs (155 lines):

diff -r 3a5ea8cf2233 -r 778b57788590 src/lib/str-sanitize.c
--- a/src/lib/str-sanitize.c	Mon Jun 01 22:14:19 2015 +0300
+++ b/src/lib/str-sanitize.c	Mon Jun 01 22:16:19 2015 +0300
@@ -7,13 +7,12 @@
 
 static size_t str_sanitize_skip_start(const char *src, size_t max_bytes)
 {
-	unsigned int len;
 	unichar_t chr;
 	size_t i;
 
-	for (i = 0; src[i] != '\0'; ) {
-		len = uni_utf8_char_bytes(src[i]);
-		if (i + len > max_bytes || uni_utf8_get_char(src+i, &chr) <= 0)
+	for (i = 0; i < max_bytes && src[i] != '\0'; ) {
+		int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
+		if (len <= 0)
 			break;
 		if ((unsigned char)src[i] < 32)
 			break;
@@ -45,23 +44,18 @@
 
 void str_sanitize_append(string_t *dest, const char *src, size_t max_bytes)
 {
-	unsigned int len, initial_pos = str_len(dest);
+	unsigned int initial_pos = str_len(dest);
 	unichar_t chr;
 	size_t i;
-	int ret;
 
-	for (i = 0; src[i] != '\0'; ) {
-		len = uni_utf8_char_bytes(src[i]);
-		if (i + len > max_bytes)
-			break;
-		ret = uni_utf8_get_char(src+i, &chr);
-		if (ret <= 0) {
+	for (i = 0; i < max_bytes && src[i] != '\0'; ) {
+		int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
+		if (len == 0)
+			break; /* input ended too early */
+
+		if (len < 0) {
 			/* invalid UTF-8 */
 			str_append_c(dest, '?');
-			if (ret == 0) {
-				/* input ended too early */
-				return;
-			}
 			i++;
 			continue;
 		}
diff -r 3a5ea8cf2233 -r 778b57788590 src/lib/unichar.c
--- a/src/lib/unichar.c	Mon Jun 01 22:14:19 2015 +0300
+++ b/src/lib/unichar.c	Mon Jun 01 22:16:19 2015 +0300
@@ -109,11 +109,12 @@
 	unichar_t chr;
 
 	while (*input != '\0') {
-		if (uni_utf8_get_char(input, &chr) <= 0) {
+		int len = uni_utf8_get_char(input, &chr);
+		if (len <= 0) {
 			/* invalid input */
 			return -1;
 		}
-                input += uni_utf8_char_bytes(*input);
+                input += len;
 
 		array_append(output, &chr, 1);
 	}
@@ -124,15 +125,11 @@
 		       ARRAY_TYPE(unichars) *output)
 {
 	unichar_t chr;
-	unsigned int len;
 
 	while (size > 0) {
-		if (uni_utf8_get_char_n(input, size, &chr) <= 0) {
-			/* invalid input */
-			return -1;
-		}
-		len = uni_utf8_char_bytes(*input);
-		i_assert(len <= size);
+		int len = uni_utf8_get_char_n(input, size, &chr);
+		if (len <= 0)
+			return -1; /* invalid input */
 		input += len; size -= len;
 
 		array_append(output, &chr, 1);
@@ -329,19 +326,18 @@
 				     buffer_t *output)
 {
 	const unsigned char *input = _input;
-	unsigned int bytes;
 	unichar_t chr;
 	int ret = 0;
 
 	while (size > 0) {
-		if (uni_utf8_get_char_n(input, size, &chr) <= 0) {
+		int bytes = uni_utf8_get_char_n(input, size, &chr);
+		if (bytes <= 0) {
 			/* invalid input. try the next byte. */
 			ret = -1;
 			input++; size--;
 			output_add_replacement_char(output);
 			continue;
 		}
-		bytes = uni_utf8_char_bytes(*input);
 		input += bytes;
 		size -= bytes;
 
@@ -359,10 +355,8 @@
 is_valid_utf8_seq(const unsigned char *input, unsigned int size)
 {
 	unichar_t chr;
-
-	if (uni_utf8_get_char_n(input, size, &chr) <= 0)
-		return 0;
-	return uni_utf8_char_bytes(input[0]);
+	int len = uni_utf8_get_char_n(input, size, &chr);
+	return len <= 0 ? 0 : len;
 }
 
 static int uni_utf8_find_invalid_pos(const unsigned char *input, size_t size,
diff -r 3a5ea8cf2233 -r 778b57788590 src/plugins/fts-solr/fts-backend-solr-old.c
--- a/src/plugins/fts-solr/fts-backend-solr-old.c	Mon Jun 01 22:14:19 2015 +0300
+++ b/src/plugins/fts-solr/fts-backend-solr-old.c	Mon Jun 01 22:16:19 2015 +0300
@@ -89,10 +89,8 @@
 				/* make sure the character is valid for XML
 				   so we don't get XML parser errors */
 				unsigned int char_len =
-					uni_utf8_char_bytes(data[i]);
-				if (i + char_len <= len &&
-				    uni_utf8_get_char_n(data + i, char_len, &chr) > 0 &&
-				    is_valid_xml_char(chr))
+					uni_utf8_get_char_n(data + i, len - i, &chr);
+				if (char_len > 0 && is_valid_xml_char(chr))
 					str_append_n(dest, data + i, char_len);
 				else {
 					str_append_n(dest, utf8_replacement_char,
diff -r 3a5ea8cf2233 -r 778b57788590 src/plugins/fts-solr/fts-backend-solr.c
--- a/src/plugins/fts-solr/fts-backend-solr.c	Mon Jun 01 22:14:19 2015 +0300
+++ b/src/plugins/fts-solr/fts-backend-solr.c	Mon Jun 01 22:16:19 2015 +0300
@@ -110,10 +110,8 @@
 				/* make sure the character is valid for XML
 				   so we don't get XML parser errors */
 				unsigned int char_len =
-					uni_utf8_char_bytes(data[i]);
-				if (i + char_len <= len &&
-				    uni_utf8_get_char_n(data + i, char_len, &chr) > 0 &&
-				    is_valid_xml_char(chr))
+					uni_utf8_get_char_n(data + i, len - i, &chr);
+				if (char_len > 0 && is_valid_xml_char(chr))
 					str_append_n(dest, data + i, char_len);
 				else {
 					str_append_n(dest, utf8_replacement_char,


More information about the dovecot-cvs mailing list