dovecot-2.0-pigeonhole: ManageSieve: now using Dovecot API for U...
pigeonhole at rename-it.nl
pigeonhole at rename-it.nl
Fri Feb 18 15:21:14 EET 2011
details: http://hg.rename-it.nl/dovecot-2.0-pigeonhole/rev/bd894c3cbf7b
changeset: 1478:bd894c3cbf7b
user: Stephan Bosch <stephan at rename-it.nl>
date: Fri Feb 18 14:03:28 2011 +0100
description:
ManageSieve: now using Dovecot API for UTF-8 validity checks.
diffstat:
src/lib-managesieve/managesieve-parser.c | 70 +++++------------------
src/lib-managesieve/managesieve-parser.h | 41 -------------
src/lib-managesieve/managesieve-quote.c | 87 ++++------------------------
3 files changed, 30 insertions(+), 168 deletions(-)
diffs (truncated from 329 to 300 lines):
diff -r 92fa68d19585 -r bd894c3cbf7b src/lib-managesieve/managesieve-parser.c
--- a/src/lib-managesieve/managesieve-parser.c Fri Feb 18 02:58:43 2011 +0100
+++ b/src/lib-managesieve/managesieve-parser.c Fri Feb 18 14:03:28 2011 +0100
@@ -2,6 +2,7 @@
*/
#include "lib.h"
+#include "unichar.h"
#include "istream.h"
#include "ostream.h"
#include "strescape.h"
@@ -60,7 +61,7 @@
parser->cur_list = LIST_REALLOC(parser, parser->cur_list, size);
parser->cur_list->alloc = size;
- parser->root_list = parser->cur_list;
+ parser->root_list = parser->cur_list;
}
struct managesieve_parser *
@@ -70,7 +71,7 @@
struct managesieve_parser *parser;
parser = i_new(struct managesieve_parser, 1);
- parser->pool = pool_alloconly_create("MANAGESIEVE parser", 8192);
+ parser->pool = pool_alloconly_create("MANAGESIEVE parser", 8192);
parser->input = input;
parser->output = output;
parser->max_line_size = max_line_size;
@@ -237,7 +238,6 @@
const unsigned char *data, size_t data_size)
{
size_t i;
- int utf8_len;
/* QUOTED-CHAR = SAFE-UTF8-CHAR / "\" QUOTED-SPECIALS
* quoted = <"> *QUOTED-CHAR <">
@@ -247,8 +247,13 @@
/* read until we've found non-escaped ", CR or LF */
for (i = parser->cur_pos; i < data_size; i++) {
if (data[i] == '"') {
+
+ if ( !uni_utf8_data_is_valid(data+1, i-1) ) {
+ parser->error = "Invalid UTF-8 character in quoted-string.";
+ return FALSE;
+ }
+
managesieve_parser_save_arg(parser, data, i);
-
i++; /* skip the trailing '"' too */
break;
}
@@ -275,59 +280,10 @@
continue;
}
- /* Enforce valid UTF-8
- */
- if ( (utf8_len = UTF8_LEN(data[i])) == 0 ) {
+ if ( !IS_SAFE_CHAR(data[i]) ) {
parser->error = "String contains invalid character.";
return FALSE;
}
-
- if ( utf8_len > 1 ) {
- bool overlong = FALSE;
-
- if ( (i+utf8_len-1) >= data_size ) {
- /* Known data ends in the middle of a UTF-8 character;
- * leave it to next time.
- */
- break;
- }
-
- /* Check for overlong UTF-8 sequences */
- switch (utf8_len) {
- case 2:
- if (!(data[i] & 0x1E)) overlong = TRUE;
- break;
- case 3:
- if (!(data[i] & 0x0F) && !(data[i+1] & 0x20)) overlong = TRUE;
- break;
- case 4:
- if (!(data[i] & 0x07) && !(data[i+1] & 0x30)) overlong = TRUE;
- break;
- case 5:
- if (!(data[i] & 0x03) && !(data[i+1] & 0x38)) overlong = TRUE;
- break;
- case 6:
- if (!(data[i] & 0x01) && !(data[i+1] & 0x3C)) overlong = TRUE;
- break;
- default:
- i_unreached();
- }
-
- if ( overlong ) {
- parser->error = "String contains invalid/overlong UTF-8 character.";
- return FALSE;
- }
-
- utf8_len--;
-
- /* Parse the series of UTF8_1 characters */
- for (; utf8_len > 0; utf8_len--, i++ ) {
- if (!IS_UTF8_1(data[i+1])) {
- parser->error = "String contains invalid UTF-8 character.";
- return FALSE;
- }
- }
- }
}
parser->cur_pos = i;
@@ -436,6 +392,12 @@
if (data_size < parser->literal_size) {
return FALSE;
} else {
+ if ( !uni_utf8_data_is_valid
+ (data, (size_t)parser->literal_size) ) {
+ parser->error = "Invalid UTF-8 character in literal string.";
+ return FALSE;
+ }
+
managesieve_parser_save_arg(parser, data,
(size_t)parser->literal_size);
parser->cur_pos = (size_t)parser->literal_size;
diff -r 92fa68d19585 -r bd894c3cbf7b src/lib-managesieve/managesieve-parser.h
--- a/src/lib-managesieve/managesieve-parser.h Fri Feb 18 02:58:43 2011 +0100
+++ b/src/lib-managesieve/managesieve-parser.h Fri Feb 18 14:03:28 2011 +0100
@@ -39,47 +39,6 @@
#define IS_SAFE_CHAR(c) \
(IS_TEXT_CHAR(c) && !IS_QUOTED_SPECIAL(c))
-/* UTF8-1 = %x80-BF
- */
-#define IS_UTF8_1(c) \
- (((c) & 0xC0) == 0x80)
-
-/* UTF8-2 = %xC0-DF UTF8-1
- */
-#define IS_UTF8_2S(c) \
- (((c) & 0xE0) == 0xC0)
-
-/* UTF8-3 = %xE0-EF 2UTF8-1
- */
-#define IS_UTF8_3S(c) \
- (((c) & 0xF0) == 0xE0)
-
-/* UTF8-4 = %xF0-F7 3UTF8-1
- */
-#define IS_UTF8_4S(c) \
- (((c) & 0xF8) == 0xF0)
-
-/* UTF8-5 = %xF8-FB 4UTF8-1
- */
-#define IS_UTF8_5S(c) \
- (((c) & 0xFC) == 0xF8)
-
-/* UTF8-6 = %xFC-FD 5UTF8-1
- */
-#define IS_UTF8_6S(c) \
- (((c) & 0xFE) == 0xFC)
-
-/* SAFE-UTF8-CHAR = SAFE-CHAR / UTF8-2 / UTF8-3 / UTF8-4 /
- * UTF8-5 / UTF8-6
- */
-#define UTF8_LEN(c) \
- ( IS_SAFE_CHAR(c) ? 1 : \
- IS_UTF8_2S(c) ? 2 : \
- IS_UTF8_3S(c) ? 3 : \
- IS_UTF8_4S(c) ? 4 : \
- IS_UTF8_5S(c) ? 5 : \
- IS_UTF8_6S(c) ? 6 : 0 )
-
enum managesieve_parser_flags {
/* Set this flag if you wish to read only size of literal argument
and not convert literal into string. Useful when you need to deal
diff -r 92fa68d19585 -r bd894c3cbf7b src/lib-managesieve/managesieve-quote.c
--- a/src/lib-managesieve/managesieve-quote.c Fri Feb 18 02:58:43 2011 +0100
+++ b/src/lib-managesieve/managesieve-quote.c Fri Feb 18 14:03:28 2011 +0100
@@ -3,6 +3,7 @@
#include "lib.h"
#include "str.h"
+#include "unichar.h"
#include "managesieve-parser.h"
#include "managesieve-quote.h"
@@ -14,13 +15,11 @@
void managesieve_quote_append(string_t *str, const unsigned char *value,
size_t value_len, bool compress_lwsp)
{
- size_t i, extra = 0;
+ size_t i, extra = 0, escape = 0;
bool
last_lwsp = TRUE,
literal = FALSE,
- modify = FALSE,
- escape = FALSE;
- int utf8_len;
+ modify = FALSE;
if (value == NULL) {
str_append(str, "\"\"");
@@ -42,7 +41,7 @@
break;
case '"':
case '\\':
- escape = TRUE;
+ escape++;
last_lwsp = FALSE;
break;
case 13:
@@ -51,36 +50,6 @@
last_lwsp = TRUE;
break;
default:
- /* Enforce valid UTF-8
- */
- if ( (utf8_len=UTF8_LEN(value[i])) == 0 ) {
- modify = TRUE;
- extra++;
- break;
- }
-
- if ( utf8_len > 1 ) {
- int c = utf8_len - 1;
-
- if ( (i+utf8_len-1) >= value_len ) {
- /* Value ends in the middle of a UTF-8 character;
- * Kill the partial UTF-8 character
- */
- extra += i + utf8_len - value_len;
- modify = TRUE;
- break;
- }
-
- /* Parse the series of UTF8_1 characters */
- for (i++; c > 0; c--, i++ ) {
- if (!IS_UTF8_1(value[i])) {
- extra += utf8_len - c;
- modify = TRUE;
- break;
- }
- }
- }
-
last_lwsp = FALSE;
}
}
@@ -93,9 +62,10 @@
str_printfa(str, "{%"PRIuSIZE_T"}\r\n", value_len - extra);
}
- if (!modify && (literal || !escape))
+ if (!modify && (literal || escape == 0))
str_append_n(str, value, value_len);
else {
+ string_t *unchecked = t_str_new(value_len+escape+4);
last_lwsp = TRUE;
for (i = 0; i < value_len; i++) {
switch (value[i]) {
@@ -103,58 +73,29 @@
case '\\':
last_lwsp = FALSE;
if (!literal)
- str_append_c(str, '\\');
- str_append_c(str, value[i]);
+ str_append_c(unchecked, '\\');
+ str_append_c(unchecked, value[i]);
break;
case ' ':
case '\t':
if (!last_lwsp || !compress_lwsp)
- str_append_c(str, ' ');
+ str_append_c(unchecked, ' ');
last_lwsp = TRUE;
break;
case 13:
case 10:
last_lwsp = TRUE;
- str_append_c(str, value[i]);
+ str_append_c(unchecked, value[i]);
break;
default:
- /* Enforce valid UTF-8
- */
- if ( (utf8_len=UTF8_LEN(value[i])) == 0 )
- break;
-
- if ( utf8_len > 1 ) {
- int c = utf8_len - 1;
- int j;
-
- if ( (i+utf8_len-1) >= value_len ) {
- /* Value ends in the middle of a UTF-8 character;
- * Kill the partial character
- */
- i = value_len;
- break;
More information about the dovecot-cvs
mailing list