dovecot: Use uni_utf8_to_decomposed_titlecase() to have proper c...

dovecot at dovecot.org dovecot at dovecot.org
Fri Jul 20 17:39:16 EEST 2007


details:   http://hg.dovecot.org/dovecot/rev/5f56b2eb32b3
changeset: 6131:5f56b2eb32b3
user:      Timo Sirainen <tss at iki.fi>
date:      Fri Jul 20 17:27:02 2007 +0300
description:
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
comparing.

diffstat:

5 files changed, 43 insertions(+), 62 deletions(-)
src/lib-charset/charset-iconv.c      |   53 +++++++++++++++++++---------------
src/lib-charset/charset-utf8.c       |   33 +++------------------
src/lib-charset/charset-utf8.h       |    5 ---
src/lib-mail/message-decoder.c       |    9 +++--
src/lib-mail/message-header-decode.c |    5 +--

diffs (210 lines):

diff -r 9afe3fa4858d -r 5f56b2eb32b3 src/lib-charset/charset-iconv.c
--- a/src/lib-charset/charset-iconv.c	Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-charset/charset-iconv.c	Fri Jul 20 17:27:02 2007 +0300
@@ -1,7 +1,8 @@
-/* Copyright (C) 2002 Timo Sirainen */
+/* Copyright (C) 2002-2007 Timo Sirainen */
 
 #include "lib.h"
 #include "buffer.h"
+#include "unichar.h"
 #include "charset-utf8.h"
 
 #ifdef HAVE_ICONV
@@ -63,32 +64,37 @@ charset_to_utf8_try(struct charset_trans
 		    enum charset_result *result)
 {
 	ICONV_CONST char *ic_srcbuf;
-	char *ic_destbuf;
-	size_t srcleft, destpos, destleft, size;
+	char tmpbuf[8192], *ic_destbuf;
+	size_t srcleft, destleft;
 	bool ret = TRUE;
 
-	destpos = dest->used;
 	if (t->cd == (iconv_t)-1) {
 		/* no translation needed - just copy it to outbuf uppercased */
-		if (t->ucase)
-			charset_utf8_ucase_write(dest, destpos, src, *src_size);
-		else
+		*result = CHARSET_RET_OK;
+		if (!t->ucase) {
 			buffer_append(dest, src, *src_size);
+			return TRUE;
+		}
 
-		*result = CHARSET_RET_OK;
+		if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0)
+			*result = CHARSET_RET_INVALID_INPUT;
 		return TRUE;
 	}
-	destleft = buffer_get_size(dest) - destpos;
-	if (destleft < *src_size) {
-		/* The buffer is most likely too small to hold the output,
-		   so increase it at least to the input size. */
-		destleft = *src_size;
+	if (!t->ucase) {
+		destleft = buffer_get_size(dest) - dest->used;
+		if (destleft < *src_size) {
+			/* The buffer is most likely too small to hold the
+			   output, so increase it at least to the input size. */
+			destleft = *src_size;
+		}
+		ic_destbuf = buffer_append_space_unsafe(dest, destleft);
+	} else {
+		destleft = sizeof(tmpbuf);
+		ic_destbuf = tmpbuf;
 	}
 
-	size = destleft;
 	srcleft = *src_size;
 	ic_srcbuf = (ICONV_CONST char *) src;
-	ic_destbuf = buffer_append_space_unsafe(dest, destleft);
 
 	if (iconv(t->cd, &ic_srcbuf, &srcleft,
 		  &ic_destbuf, &destleft) != (size_t)-1)
@@ -104,16 +110,17 @@ charset_to_utf8_try(struct charset_trans
 		*result = CHARSET_RET_INVALID_INPUT;
 		return TRUE;
 	}
-	size -= destleft;
+	*src_size -= srcleft;
 
-	/* give back the memory we didn't use */
-	buffer_set_used_size(dest, dest->used - destleft);
+	if (!t->ucase) {
+		/* give back the memory we didn't use */
+		buffer_set_used_size(dest, dest->used - destleft);
+	} else {
+		size_t tmpsize = sizeof(tmpbuf) - destleft;
 
-	*src_size -= srcleft;
-	if (t->ucase) {
-		charset_utf8_ucase_write(dest, destpos,
-					 (unsigned char *)ic_destbuf - size,
-					 size);
+		/* we just converted data to UTF-8, it can't be invalid */
+		if (uni_utf8_to_decomposed_titlecase(tmpbuf, tmpsize, dest) < 0)
+			i_unreached();
 	}
 	return ret;
 }
diff -r 9afe3fa4858d -r 5f56b2eb32b3 src/lib-charset/charset-utf8.c
--- a/src/lib-charset/charset-utf8.c	Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-charset/charset-utf8.c	Fri Jul 20 17:27:02 2007 +0300
@@ -13,31 +13,6 @@ bool charset_is_utf8(const char *charset
 		strcasecmp(charset, "UTF-8") == 0 ||
 		strcasecmp(charset, "UTF8") == 0;
 }
-
-void charset_utf8_ucase_write(buffer_t *dest, size_t destpos,
-			      const unsigned char *src, size_t src_size)
-{
-	char *destbuf;
-	size_t i;
-
-	destbuf = buffer_get_space_unsafe(dest, destpos, src_size);
-	for (i = 0; i < src_size; i++)
-		destbuf[i] = i_toupper(src[i]); /* FIXME: utf8 */
-}
-
-const char *charset_utf8_ucase_strdup(const unsigned char *data, size_t size,
-				      size_t *utf8_size_r)
-{
-	buffer_t *dest;
-
-	dest = buffer_create_dynamic(pool_datastack_create(), size);
-	charset_utf8_ucase_write(dest, 0, data, size);
-	if (utf8_size_r != NULL)
-		*utf8_size_r = buffer_get_used_size(dest);
-	buffer_append_c(dest, '\0');
-	return buffer_free_without_data(dest);
-}
-
 
 #ifndef HAVE_ICONV
 
@@ -82,10 +57,12 @@ charset_to_utf8(struct charset_translati
 charset_to_utf8(struct charset_translation *t,
 		const unsigned char *src, size_t *src_size, buffer_t *dest)
 {
-	if (t == &utf8_translation_uc || t == &ascii_translation_uc)
-		charset_utf8_ucase_write(dest, dest->used, src, *src_size);
-	else
+	if (t != &utf8_translation_uc && t != &ascii_translation_uc) {
 		buffer_append(dest, src, *src_size);
+		return CHARSET_RET_OK;
+	}
+	if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0)
+		return CHARSET_RET_INVALID_INPUT;
 	return CHARSET_RET_OK;
 }
 
diff -r 9afe3fa4858d -r 5f56b2eb32b3 src/lib-charset/charset-utf8.h
--- a/src/lib-charset/charset-utf8.h	Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-charset/charset-utf8.h	Fri Jul 20 17:27:02 2007 +0300
@@ -24,9 +24,4 @@ charset_to_utf8(struct charset_translati
 charset_to_utf8(struct charset_translation *t,
 		const unsigned char *src, size_t *src_size, buffer_t *dest);
 
-void charset_utf8_ucase_write(buffer_t *dest, size_t destpos,
-			      const unsigned char *src, size_t src_size);
-const char *charset_utf8_ucase_strdup(const unsigned char *data, size_t size,
-				      size_t *utf8_size_r);
-
 #endif
diff -r 9afe3fa4858d -r 5f56b2eb32b3 src/lib-mail/message-decoder.c
--- a/src/lib-mail/message-decoder.c	Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-mail/message-decoder.c	Fri Jul 20 17:27:02 2007 +0300
@@ -4,6 +4,7 @@
 #include "buffer.h"
 #include "base64.h"
 #include "str.h"
+#include "unichar.h"
 #include "charset-utf8.h"
 #include "quoted-printable.h"
 #include "rfc822-parser.h"
@@ -159,9 +160,8 @@ static bool message_decode_header(struct
 	value_len = ctx->buf->used;
 
 	if (ctx->ucase) {
-		charset_utf8_ucase_write(ctx->buf, ctx->buf->used,
-					 (const unsigned char *)hdr->name,
-					 hdr->name_len);
+		(void)uni_utf8_to_decomposed_titlecase(hdr->name, hdr->name_len,
+						       ctx->buf);
 		buffer_append_c(ctx->buf, '\0');
 	}
 
@@ -300,7 +300,8 @@ static bool message_decode_body(struct m
 	if (ctx->charset_utf8) {
 		if (ctx->ucase) {
 			buffer_set_used_size(ctx->buf2, 0);
-			charset_utf8_ucase_write(ctx->buf2, 0, data, size);
+			(void)uni_utf8_to_decomposed_titlecase(data, size,
+							       ctx->buf);
 			output->data = ctx->buf2->data;
 			output->size = ctx->buf2->used;
 		} else {
diff -r 9afe3fa4858d -r 5f56b2eb32b3 src/lib-mail/message-header-decode.c
--- a/src/lib-mail/message-header-decode.c	Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-mail/message-header-decode.c	Fri Jul 20 17:27:02 2007 +0300
@@ -3,6 +3,7 @@
 #include "lib.h"
 #include "base64.h"
 #include "buffer.h"
+#include "unichar.h"
 #include "charset-utf8.h"
 #include "quoted-printable.h"
 #include "message-header-decode.h"
@@ -139,8 +140,8 @@ decode_utf8_callback(const unsigned char
 	if (charset == NULL || charset_is_utf8(charset)) {
 		/* ASCII / UTF-8 */
 		if (ctx->ucase) {
-			charset_utf8_ucase_write(ctx->dest, ctx->dest->used,
-						 data, size);
+			(void)uni_utf8_to_decomposed_titlecase(data, size,
+							       ctx->dest);
 		} else {
 			buffer_append(ctx->dest, data, size);
 		}


More information about the dovecot-cvs mailing list