dovecot-2.2: various: use new uni_utf8_get_char*() interface
dovecot at dovecot.org
dovecot at dovecot.org
Mon Jun 1 19:19:13 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/778b57788590
changeset: 18789:778b57788590
user: Phil Carmody <phil at dovecot.fi>
date: Mon Jun 01 22:16:19 2015 +0300
description:
various: use new uni_utf8_get_char*() interface
No need for additional uni_utf8_char_bytes() calls if you can parse and
know the size with one call.
Signed-off-by: Phil Carmody <phil at dovecot.fi>
diffstat:
src/lib/str-sanitize.c | 26 ++++++++++----------------
src/lib/unichar.c | 26 ++++++++++----------------
src/plugins/fts-solr/fts-backend-solr-old.c | 6 ++----
src/plugins/fts-solr/fts-backend-solr.c | 6 ++----
4 files changed, 24 insertions(+), 40 deletions(-)
diffs (155 lines):
diff -r 3a5ea8cf2233 -r 778b57788590 src/lib/str-sanitize.c
--- a/src/lib/str-sanitize.c Mon Jun 01 22:14:19 2015 +0300
+++ b/src/lib/str-sanitize.c Mon Jun 01 22:16:19 2015 +0300
@@ -7,13 +7,12 @@
static size_t str_sanitize_skip_start(const char *src, size_t max_bytes)
{
- unsigned int len;
unichar_t chr;
size_t i;
- for (i = 0; src[i] != '\0'; ) {
- len = uni_utf8_char_bytes(src[i]);
- if (i + len > max_bytes || uni_utf8_get_char(src+i, &chr) <= 0)
+ for (i = 0; i < max_bytes && src[i] != '\0'; ) {
+ int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
+ if (len <= 0)
break;
if ((unsigned char)src[i] < 32)
break;
@@ -45,23 +44,18 @@
void str_sanitize_append(string_t *dest, const char *src, size_t max_bytes)
{
- unsigned int len, initial_pos = str_len(dest);
+ unsigned int initial_pos = str_len(dest);
unichar_t chr;
size_t i;
- int ret;
- for (i = 0; src[i] != '\0'; ) {
- len = uni_utf8_char_bytes(src[i]);
- if (i + len > max_bytes)
- break;
- ret = uni_utf8_get_char(src+i, &chr);
- if (ret <= 0) {
+ for (i = 0; i < max_bytes && src[i] != '\0'; ) {
+ int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
+ if (len == 0)
+ break; /* input ended too early */
+
+ if (len < 0) {
/* invalid UTF-8 */
str_append_c(dest, '?');
- if (ret == 0) {
- /* input ended too early */
- return;
- }
i++;
continue;
}
diff -r 3a5ea8cf2233 -r 778b57788590 src/lib/unichar.c
--- a/src/lib/unichar.c Mon Jun 01 22:14:19 2015 +0300
+++ b/src/lib/unichar.c Mon Jun 01 22:16:19 2015 +0300
@@ -109,11 +109,12 @@
unichar_t chr;
while (*input != '\0') {
- if (uni_utf8_get_char(input, &chr) <= 0) {
+ int len = uni_utf8_get_char(input, &chr);
+ if (len <= 0) {
/* invalid input */
return -1;
}
- input += uni_utf8_char_bytes(*input);
+ input += len;
array_append(output, &chr, 1);
}
@@ -124,15 +125,11 @@
ARRAY_TYPE(unichars) *output)
{
unichar_t chr;
- unsigned int len;
while (size > 0) {
- if (uni_utf8_get_char_n(input, size, &chr) <= 0) {
- /* invalid input */
- return -1;
- }
- len = uni_utf8_char_bytes(*input);
- i_assert(len <= size);
+ int len = uni_utf8_get_char_n(input, size, &chr);
+ if (len <= 0)
+ return -1; /* invalid input */
input += len; size -= len;
array_append(output, &chr, 1);
@@ -329,19 +326,18 @@
buffer_t *output)
{
const unsigned char *input = _input;
- unsigned int bytes;
unichar_t chr;
int ret = 0;
while (size > 0) {
- if (uni_utf8_get_char_n(input, size, &chr) <= 0) {
+ int bytes = uni_utf8_get_char_n(input, size, &chr);
+ if (bytes <= 0) {
/* invalid input. try the next byte. */
ret = -1;
input++; size--;
output_add_replacement_char(output);
continue;
}
- bytes = uni_utf8_char_bytes(*input);
input += bytes;
size -= bytes;
@@ -359,10 +355,8 @@
is_valid_utf8_seq(const unsigned char *input, unsigned int size)
{
unichar_t chr;
-
- if (uni_utf8_get_char_n(input, size, &chr) <= 0)
- return 0;
- return uni_utf8_char_bytes(input[0]);
+ int len = uni_utf8_get_char_n(input, size, &chr);
+ return len <= 0 ? 0 : len;
}
static int uni_utf8_find_invalid_pos(const unsigned char *input, size_t size,
diff -r 3a5ea8cf2233 -r 778b57788590 src/plugins/fts-solr/fts-backend-solr-old.c
--- a/src/plugins/fts-solr/fts-backend-solr-old.c Mon Jun 01 22:14:19 2015 +0300
+++ b/src/plugins/fts-solr/fts-backend-solr-old.c Mon Jun 01 22:16:19 2015 +0300
@@ -89,10 +89,8 @@
/* make sure the character is valid for XML
so we don't get XML parser errors */
unsigned int char_len =
- uni_utf8_char_bytes(data[i]);
- if (i + char_len <= len &&
- uni_utf8_get_char_n(data + i, char_len, &chr) > 0 &&
- is_valid_xml_char(chr))
+ uni_utf8_get_char_n(data + i, len - i, &chr);
+ if (char_len > 0 && is_valid_xml_char(chr))
str_append_n(dest, data + i, char_len);
else {
str_append_n(dest, utf8_replacement_char,
diff -r 3a5ea8cf2233 -r 778b57788590 src/plugins/fts-solr/fts-backend-solr.c
--- a/src/plugins/fts-solr/fts-backend-solr.c Mon Jun 01 22:14:19 2015 +0300
+++ b/src/plugins/fts-solr/fts-backend-solr.c Mon Jun 01 22:16:19 2015 +0300
@@ -110,10 +110,8 @@
/* make sure the character is valid for XML
so we don't get XML parser errors */
unsigned int char_len =
- uni_utf8_char_bytes(data[i]);
- if (i + char_len <= len &&
- uni_utf8_get_char_n(data + i, char_len, &chr) > 0 &&
- is_valid_xml_char(chr))
+ uni_utf8_get_char_n(data + i, len - i, &chr);
+ if (char_len > 0 && is_valid_xml_char(chr))
str_append_n(dest, data + i, char_len);
else {
str_append_n(dest, utf8_replacement_char,
More information about the dovecot-cvs
mailing list