dovecot: charset_to_utf8_begin() now takes bool ucase parameter....
dovecot at dovecot.org
dovecot at dovecot.org
Fri Jul 20 06:03:51 EEST 2007
details: http://hg.dovecot.org/dovecot/rev/e5451501ff2f
changeset: 6112:e5451501ff2f
user: Timo Sirainen <tss at iki.fi>
date: Fri Jul 20 06:03:45 2007 +0300
description:
charset_to_utf8_begin() now takes bool ucase parameter. Changed
charset_to_ucase_utf8*() to charset_to_utf8*().
diffstat:
5 files changed, 74 insertions(+), 52 deletions(-)
src/lib-charset/charset-iconv.c | 44 +++++++++++++++++++++---------------
src/lib-charset/charset-utf8.c | 46 ++++++++++++++++++++++++--------------
src/lib-charset/charset-utf8.h | 19 +++++++--------
src/lib-imap/imap-base-subject.c | 4 +--
src/lib-mail/message-decoder.c | 13 +++++-----
diffs (291 lines):
diff -r c83546491bad -r e5451501ff2f src/lib-charset/charset-iconv.c
--- a/src/lib-charset/charset-iconv.c Fri Jul 20 02:45:32 2007 +0300
+++ b/src/lib-charset/charset-iconv.c Fri Jul 20 06:03:45 2007 +0300
@@ -11,30 +11,32 @@
struct charset_translation {
iconv_t cd;
+ unsigned int ucase:1;
};
-struct charset_translation *charset_to_utf8_begin(const char *charset,
- bool *unknown_charset)
+struct charset_translation *
+charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r)
{
struct charset_translation *t;
iconv_t cd;
- if (unknown_charset != NULL)
- *unknown_charset = FALSE;
+ if (unknown_charset_r != NULL)
+ *unknown_charset_r = FALSE;
if (charset_is_utf8(charset))
cd = (iconv_t)-1;
else {
cd = iconv_open("UTF-8", charset);
if (cd == (iconv_t)-1) {
- if (unknown_charset != NULL)
- *unknown_charset = TRUE;
+ if (unknown_charset_r != NULL)
+ *unknown_charset_r = TRUE;
return NULL;
}
}
t = i_new(struct charset_translation, 1);
t->cd = cd;
+ t->ucase = ucase;
return t;
}
@@ -56,23 +58,25 @@ void charset_to_utf8_reset(struct charse
}
enum charset_result
-charset_to_ucase_utf8(struct charset_translation *t,
- const unsigned char *src, size_t *src_size,
- buffer_t *dest)
+charset_to_utf8(struct charset_translation *t,
+ const unsigned char *src, size_t *src_size, buffer_t *dest)
{
ICONV_CONST char *ic_srcbuf;
char *ic_destbuf;
size_t srcleft, destpos, destleft, size;
enum charset_result ret;
- destpos = buffer_get_used_size(dest);
+ destpos = dest->used;
destleft = buffer_get_size(dest) - destpos;
if (t->cd == (iconv_t)-1) {
/* no translation needed - just copy it to outbuf uppercased */
if (*src_size > destleft)
*src_size = destleft;
- _charset_utf8_ucase(src, *src_size, dest, destpos);
+ if (t->ucase)
+ _charset_utf8_ucase(src, *src_size, dest, destpos);
+ else
+ buffer_write(dest, destpos, src, *src_size);
return CHARSET_RET_OK;
}
@@ -95,25 +99,29 @@ charset_to_ucase_utf8(struct charset_tra
size -= destleft;
/* give back the memory we didn't use */
- buffer_set_used_size(dest, buffer_get_used_size(dest) - destleft);
+ buffer_set_used_size(dest, dest->used - destleft);
*src_size -= srcleft;
- _charset_utf8_ucase((unsigned char *) ic_destbuf - size, size,
- dest, destpos);
+ if (t->ucase) {
+ _charset_utf8_ucase((unsigned char *) ic_destbuf - size, size,
+ dest, destpos);
+ } else {
+ buffer_write(dest, destpos, ic_destbuf - size, size);
+ }
return ret;
}
enum charset_result
-charset_to_ucase_utf8_full(struct charset_translation *t,
- const unsigned char *src, size_t *src_size,
- buffer_t *dest)
+charset_to_utf8_full(struct charset_translation *t,
+ const unsigned char *src, size_t *src_size,
+ buffer_t *dest)
{
enum charset_result ret;
size_t pos, used, size;
for (pos = 0;;) {
size = *src_size - pos;
- ret = charset_to_ucase_utf8(t, src + pos, &size, dest);
+ ret = charset_to_utf8(t, src + pos, &size, dest);
pos += size;
if (ret != CHARSET_RET_OUTPUT_FULL) {
diff -r c83546491bad -r e5451501ff2f src/lib-charset/charset-utf8.c
--- a/src/lib-charset/charset-utf8.c Fri Jul 20 02:45:32 2007 +0300
+++ b/src/lib-charset/charset-utf8.c Fri Jul 20 06:03:45 2007 +0300
@@ -48,24 +48,25 @@ struct charset_translation {
};
static struct charset_translation ascii_translation, utf8_translation;
+static struct charset_translation ascii_translation_uc, utf8_translation_uc;
-struct charset_translation *charset_to_utf8_begin(const char *charset,
- bool *unknown_charset)
+struct charset_translation *
+charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r)
{
- if (unknown_charset != NULL)
- *unknown_charset = FALSE;
+ if (unknown_charset_r != NULL)
+ *unknown_charset_r = FALSE;
if (strcasecmp(charset, "us-ascii") == 0 ||
strcasecmp(charset, "ascii") == 0)
- return &ascii_translation;
+ return ucase ? &ascii_translation_uc : &ascii_translation;
if (strcasecmp(charset, "UTF-8") == 0 ||
strcasecmp(charset, "UTF8") == 0)
- return &utf8_translation;
+ return ucase ? &utf8_translation_uc : &utf8_translation;
/* no support for charsets that need translation */
- if (unknown_charset != NULL)
- *unknown_charset = TRUE;
+ if (unknown_charset_r != NULL)
+ *unknown_charset_r = TRUE;
return NULL;
}
@@ -78,19 +79,32 @@ void charset_to_utf8_reset(struct charse
}
enum charset_result
-charset_to_ucase_utf8(struct charset_translation *t __attr_unused__,
- const unsigned char *src, size_t *src_size,
- buffer_t *dest)
+charset_to_utf8(struct charset_translation *t,
+ const unsigned char *src, size_t *src_size, buffer_t *dest)
{
- size_t destpos, destleft;
+ size_t destpos = dest->used, destleft;
- destpos = buffer_get_used_size(dest);
destleft = buffer_get_size(dest) - destpos;
+ if (*src_size > destleft)
+ *src_size = destleft;
/* no translation needed - just copy it to outbuf uppercased */
- if (*src_size > destleft)
- *src_size = destleft;
- _charset_utf8_ucase(src, *src_size, dest, destpos);
+ if (t == &utf8_translation_uc || t == &ascii_translation_uc)
+ _charset_utf8_ucase(src, *src_size, dest, destpos);
+ else
+ buffer_write(dest, destpos, src, *src_size);
+ return CHARSET_RET_OK;
+}
+
+enum charset_result
+charset_to_utf8_full(struct charset_translation *t,
+ const unsigned char *src, size_t *src_size,
+ buffer_t *dest)
+{
+ if (t == &utf8_translation_uc || t == &ascii_translation_uc)
+ _charset_utf8_ucase(src, *src_size, dest, dest->used);
+ else
+ buffer_append(dest, src, *src_size);
return CHARSET_RET_OK;
}
diff -r c83546491bad -r e5451501ff2f src/lib-charset/charset-utf8.h
--- a/src/lib-charset/charset-utf8.h Fri Jul 20 02:45:32 2007 +0300
+++ b/src/lib-charset/charset-utf8.h Fri Jul 20 06:03:45 2007 +0300
@@ -8,9 +8,9 @@ enum charset_result {
CHARSET_RET_INVALID_INPUT = -2
};
-/* Begin translation to UTF-8. */
-struct charset_translation *charset_to_utf8_begin(const char *charset,
- bool *unknown_charset);
+/* Begin translation to UTF-8. If ucase=TRUE, returns data uppercased. */
+struct charset_translation *
+charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r);
void charset_to_utf8_end(struct charset_translation **t);
@@ -22,15 +22,14 @@ bool charset_is_utf8(const char *charset
/* Translate src to UTF-8. src_size is updated to contain the number of
characters actually translated from src. Note that dest buffer is used
only up to its current size, for growing it automatically use
- charset_to_ucase_utf8_full(). */
+ charset_to_utf8_full(). */
enum charset_result
-charset_to_ucase_utf8(struct charset_translation *t,
- const unsigned char *src, size_t *src_size,
- buffer_t *dest);
+charset_to_utf8(struct charset_translation *t,
+ const unsigned char *src, size_t *src_size, buffer_t *dest);
enum charset_result
-charset_to_ucase_utf8_full(struct charset_translation *t,
- const unsigned char *src, size_t *src_size,
- buffer_t *dest);
+charset_to_utf8_full(struct charset_translation *t,
+ const unsigned char *src, size_t *src_size,
+ buffer_t *dest);
/* Simple wrappers for above functions. If utf8_size is non-NULL, it's set
to same as strlen(returned data). */
diff -r c83546491bad -r e5451501ff2f src/lib-imap/imap-base-subject.c
--- a/src/lib-imap/imap-base-subject.c Fri Jul 20 02:45:32 2007 +0300
+++ b/src/lib-imap/imap-base-subject.c Fri Jul 20 06:03:45 2007 +0300
@@ -22,9 +22,9 @@ static bool header_decode(const unsigned
/* It's ASCII. */
buffer_append(buf, data, size);
} else {
- t = charset_to_utf8_begin(charset, NULL);
+ t = charset_to_utf8_begin(charset, TRUE, NULL);
if (t != NULL) {
- (void)charset_to_ucase_utf8(t, data, &size, buf);
+ (void)charset_to_utf8(t, data, &size, buf);
charset_to_utf8_end(&t);
}
}
diff -r c83546491bad -r e5451501ff2f src/lib-mail/message-decoder.c
--- a/src/lib-mail/message-decoder.c Fri Jul 20 02:45:32 2007 +0300
+++ b/src/lib-mail/message-decoder.c Fri Jul 20 06:03:45 2007 +0300
@@ -83,14 +83,14 @@ message_decode_header_callback(const uns
return TRUE;
}
- t = charset_to_utf8_begin(charset, &unknown_charset);
+ t = charset_to_utf8_begin(charset, TRUE, &unknown_charset);
if (unknown_charset) {
/* let's just ignore this part */
return TRUE;
}
/* ignore any errors */
- (void)charset_to_ucase_utf8_full(t, data, &size, ctx->buf);
+ (void)charset_to_utf8_full(t, data, &size, ctx->buf);
charset_to_utf8_end(&t);
return TRUE;
}
@@ -199,8 +199,8 @@ static void translation_buf_decode(struc
memcpy(trans_buf + ctx->translation_size, data, skip);
pos = *size;
- (void)charset_to_ucase_utf8_full(ctx->charset_trans,
- *data, &pos, ctx->buf2);
+ (void)charset_to_utf8_full(ctx->charset_trans,
+ *data, &pos, ctx->buf2);
i_assert(pos > ctx->translation_size);
skip = (ctx->translation_size + skip) - pos;
@@ -226,6 +226,7 @@ static bool message_decode_body(struct m
ctx->charset_trans =
charset_to_utf8_begin(ctx->content_charset != NULL ?
ctx->content_charset : "UTF-8",
+ TRUE,
&unknown_charset);
}
@@ -317,8 +318,8 @@ static bool message_decode_body(struct m
translation_buf_decode(ctx, &data, &size);
pos = size;
- (void)charset_to_ucase_utf8_full(ctx->charset_trans,
- data, &pos, ctx->buf2);
+ (void)charset_to_utf8_full(ctx->charset_trans,
+ data, &pos, ctx->buf2);
if (pos != size) {
ctx->translation_size = size - pos;
i_assert(ctx->translation_size <=
More information about the dovecot-cvs
mailing list