dovecot: Use uni_utf8_to_decomposed_titlecase() to have proper c...
dovecot at dovecot.org
dovecot at dovecot.org
Fri Jul 20 17:39:16 EEST 2007
details: http://hg.dovecot.org/dovecot/rev/5f56b2eb32b3
changeset: 6131:5f56b2eb32b3
user: Timo Sirainen <tss at iki.fi>
date: Fri Jul 20 17:27:02 2007 +0300
description:
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
comparing.
diffstat:
5 files changed, 43 insertions(+), 62 deletions(-)
src/lib-charset/charset-iconv.c | 53 +++++++++++++++++++---------------
src/lib-charset/charset-utf8.c | 33 +++------------------
src/lib-charset/charset-utf8.h | 5 ---
src/lib-mail/message-decoder.c | 9 +++--
src/lib-mail/message-header-decode.c | 5 +--
diffs (210 lines):
diff -r 9afe3fa4858d -r 5f56b2eb32b3 src/lib-charset/charset-iconv.c
--- a/src/lib-charset/charset-iconv.c Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-charset/charset-iconv.c Fri Jul 20 17:27:02 2007 +0300
@@ -1,7 +1,8 @@
-/* Copyright (C) 2002 Timo Sirainen */
+/* Copyright (C) 2002-2007 Timo Sirainen */
#include "lib.h"
#include "buffer.h"
+#include "unichar.h"
#include "charset-utf8.h"
#ifdef HAVE_ICONV
@@ -63,32 +64,37 @@ charset_to_utf8_try(struct charset_trans
enum charset_result *result)
{
ICONV_CONST char *ic_srcbuf;
- char *ic_destbuf;
- size_t srcleft, destpos, destleft, size;
+ char tmpbuf[8192], *ic_destbuf;
+ size_t srcleft, destleft;
bool ret = TRUE;
- destpos = dest->used;
if (t->cd == (iconv_t)-1) {
/* no translation needed - just copy it to outbuf uppercased */
- if (t->ucase)
- charset_utf8_ucase_write(dest, destpos, src, *src_size);
- else
+ *result = CHARSET_RET_OK;
+ if (!t->ucase) {
buffer_append(dest, src, *src_size);
+ return TRUE;
+ }
- *result = CHARSET_RET_OK;
+ if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0)
+ *result = CHARSET_RET_INVALID_INPUT;
return TRUE;
}
- destleft = buffer_get_size(dest) - destpos;
- if (destleft < *src_size) {
- /* The buffer is most likely too small to hold the output,
- so increase it at least to the input size. */
- destleft = *src_size;
+ if (!t->ucase) {
+ destleft = buffer_get_size(dest) - dest->used;
+ if (destleft < *src_size) {
+ /* The buffer is most likely too small to hold the
+ output, so increase it at least to the input size. */
+ destleft = *src_size;
+ }
+ ic_destbuf = buffer_append_space_unsafe(dest, destleft);
+ } else {
+ destleft = sizeof(tmpbuf);
+ ic_destbuf = tmpbuf;
}
- size = destleft;
srcleft = *src_size;
ic_srcbuf = (ICONV_CONST char *) src;
- ic_destbuf = buffer_append_space_unsafe(dest, destleft);
if (iconv(t->cd, &ic_srcbuf, &srcleft,
&ic_destbuf, &destleft) != (size_t)-1)
@@ -104,16 +110,17 @@ charset_to_utf8_try(struct charset_trans
*result = CHARSET_RET_INVALID_INPUT;
return TRUE;
}
- size -= destleft;
+ *src_size -= srcleft;
- /* give back the memory we didn't use */
- buffer_set_used_size(dest, dest->used - destleft);
+ if (!t->ucase) {
+ /* give back the memory we didn't use */
+ buffer_set_used_size(dest, dest->used - destleft);
+ } else {
+ size_t tmpsize = sizeof(tmpbuf) - destleft;
- *src_size -= srcleft;
- if (t->ucase) {
- charset_utf8_ucase_write(dest, destpos,
- (unsigned char *)ic_destbuf - size,
- size);
+ /* we just converted data to UTF-8, it can't be invalid */
+ if (uni_utf8_to_decomposed_titlecase(tmpbuf, tmpsize, dest) < 0)
+ i_unreached();
}
return ret;
}
diff -r 9afe3fa4858d -r 5f56b2eb32b3 src/lib-charset/charset-utf8.c
--- a/src/lib-charset/charset-utf8.c Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-charset/charset-utf8.c Fri Jul 20 17:27:02 2007 +0300
@@ -13,31 +13,6 @@ bool charset_is_utf8(const char *charset
strcasecmp(charset, "UTF-8") == 0 ||
strcasecmp(charset, "UTF8") == 0;
}
-
-void charset_utf8_ucase_write(buffer_t *dest, size_t destpos,
- const unsigned char *src, size_t src_size)
-{
- char *destbuf;
- size_t i;
-
- destbuf = buffer_get_space_unsafe(dest, destpos, src_size);
- for (i = 0; i < src_size; i++)
- destbuf[i] = i_toupper(src[i]); /* FIXME: utf8 */
-}
-
-const char *charset_utf8_ucase_strdup(const unsigned char *data, size_t size,
- size_t *utf8_size_r)
-{
- buffer_t *dest;
-
- dest = buffer_create_dynamic(pool_datastack_create(), size);
- charset_utf8_ucase_write(dest, 0, data, size);
- if (utf8_size_r != NULL)
- *utf8_size_r = buffer_get_used_size(dest);
- buffer_append_c(dest, '\0');
- return buffer_free_without_data(dest);
-}
-
#ifndef HAVE_ICONV
@@ -82,10 +57,12 @@ charset_to_utf8(struct charset_translati
charset_to_utf8(struct charset_translation *t,
const unsigned char *src, size_t *src_size, buffer_t *dest)
{
- if (t == &utf8_translation_uc || t == &ascii_translation_uc)
- charset_utf8_ucase_write(dest, dest->used, src, *src_size);
- else
+ if (t != &utf8_translation_uc && t != &ascii_translation_uc) {
buffer_append(dest, src, *src_size);
+ return CHARSET_RET_OK;
+ }
+ if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0)
+ return CHARSET_RET_INVALID_INPUT;
return CHARSET_RET_OK;
}
diff -r 9afe3fa4858d -r 5f56b2eb32b3 src/lib-charset/charset-utf8.h
--- a/src/lib-charset/charset-utf8.h Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-charset/charset-utf8.h Fri Jul 20 17:27:02 2007 +0300
@@ -24,9 +24,4 @@ charset_to_utf8(struct charset_translati
charset_to_utf8(struct charset_translation *t,
const unsigned char *src, size_t *src_size, buffer_t *dest);
-void charset_utf8_ucase_write(buffer_t *dest, size_t destpos,
- const unsigned char *src, size_t src_size);
-const char *charset_utf8_ucase_strdup(const unsigned char *data, size_t size,
- size_t *utf8_size_r);
-
#endif
diff -r 9afe3fa4858d -r 5f56b2eb32b3 src/lib-mail/message-decoder.c
--- a/src/lib-mail/message-decoder.c Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-mail/message-decoder.c Fri Jul 20 17:27:02 2007 +0300
@@ -4,6 +4,7 @@
#include "buffer.h"
#include "base64.h"
#include "str.h"
+#include "unichar.h"
#include "charset-utf8.h"
#include "quoted-printable.h"
#include "rfc822-parser.h"
@@ -159,9 +160,8 @@ static bool message_decode_header(struct
value_len = ctx->buf->used;
if (ctx->ucase) {
- charset_utf8_ucase_write(ctx->buf, ctx->buf->used,
- (const unsigned char *)hdr->name,
- hdr->name_len);
+ (void)uni_utf8_to_decomposed_titlecase(hdr->name, hdr->name_len,
+ ctx->buf);
buffer_append_c(ctx->buf, '\0');
}
@@ -300,7 +300,8 @@ static bool message_decode_body(struct m
if (ctx->charset_utf8) {
if (ctx->ucase) {
buffer_set_used_size(ctx->buf2, 0);
- charset_utf8_ucase_write(ctx->buf2, 0, data, size);
+ (void)uni_utf8_to_decomposed_titlecase(data, size,
+ ctx->buf);
output->data = ctx->buf2->data;
output->size = ctx->buf2->used;
} else {
diff -r 9afe3fa4858d -r 5f56b2eb32b3 src/lib-mail/message-header-decode.c
--- a/src/lib-mail/message-header-decode.c Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-mail/message-header-decode.c Fri Jul 20 17:27:02 2007 +0300
@@ -3,6 +3,7 @@
#include "lib.h"
#include "base64.h"
#include "buffer.h"
+#include "unichar.h"
#include "charset-utf8.h"
#include "quoted-printable.h"
#include "message-header-decode.h"
@@ -139,8 +140,8 @@ decode_utf8_callback(const unsigned char
if (charset == NULL || charset_is_utf8(charset)) {
/* ASCII / UTF-8 */
if (ctx->ucase) {
- charset_utf8_ucase_write(ctx->dest, ctx->dest->used,
- data, size);
+ (void)uni_utf8_to_decomposed_titlecase(data, size,
+ ctx->dest);
} else {
buffer_append(ctx->dest, data, size);
}
More information about the dovecot-cvs
mailing list