dovecot-2.0-pigeonhole: ManageSieve: now using Dovecot API for U...

pigeonhole at rename-it.nl pigeonhole at rename-it.nl
Fri Feb 18 15:21:14 EET 2011


details:   http://hg.rename-it.nl/dovecot-2.0-pigeonhole/rev/bd894c3cbf7b
changeset: 1478:bd894c3cbf7b
user:      Stephan Bosch <stephan at rename-it.nl>
date:      Fri Feb 18 14:03:28 2011 +0100
description:
ManageSieve: now using Dovecot API for UTF-8 validity checks.

diffstat:

 src/lib-managesieve/managesieve-parser.c |  70 +++++------------------
 src/lib-managesieve/managesieve-parser.h |  41 -------------
 src/lib-managesieve/managesieve-quote.c  |  87 ++++------------------------
 3 files changed, 30 insertions(+), 168 deletions(-)

diffs (truncated from 329 to 300 lines):

diff -r 92fa68d19585 -r bd894c3cbf7b src/lib-managesieve/managesieve-parser.c
--- a/src/lib-managesieve/managesieve-parser.c	Fri Feb 18 02:58:43 2011 +0100
+++ b/src/lib-managesieve/managesieve-parser.c	Fri Feb 18 14:03:28 2011 +0100
@@ -2,6 +2,7 @@
  */
 
 #include "lib.h"
+#include "unichar.h"
 #include "istream.h"
 #include "ostream.h"
 #include "strescape.h"
@@ -60,7 +61,7 @@
 	parser->cur_list = LIST_REALLOC(parser, parser->cur_list, size);
 	parser->cur_list->alloc = size;
 
-  parser->root_list = parser->cur_list;
+	parser->root_list = parser->cur_list;
 }
 
 struct managesieve_parser *
@@ -70,7 +71,7 @@
 	struct managesieve_parser *parser;
 
 	parser = i_new(struct managesieve_parser, 1);
-        parser->pool = pool_alloconly_create("MANAGESIEVE parser", 8192);
+	parser->pool = pool_alloconly_create("MANAGESIEVE parser", 8192);
 	parser->input = input;
 	parser->output = output;
 	parser->max_line_size = max_line_size;
@@ -237,7 +238,6 @@
 				   const unsigned char *data, size_t data_size)
 {
 	size_t i;
-	int utf8_len;
 
 	/* QUOTED-CHAR        = SAFE-UTF8-CHAR / "\" QUOTED-SPECIALS
 	 * quoted             = <"> *QUOTED-CHAR <">
@@ -247,8 +247,13 @@
 	/* read until we've found non-escaped ", CR or LF */
 	for (i = parser->cur_pos; i < data_size; i++) {
 		if (data[i] == '"') {
+
+			if ( !uni_utf8_data_is_valid(data+1, i-1) ) {
+				parser->error = "Invalid UTF-8 character in quoted-string.";
+				return FALSE;
+			}
+
 			managesieve_parser_save_arg(parser, data, i);
-
 			i++; /* skip the trailing '"' too */
 			break;
 		}
@@ -275,59 +280,10 @@
 			continue;
 		}
 
-		/* Enforce valid UTF-8
-		 */
-		if ( (utf8_len = UTF8_LEN(data[i])) == 0 ) {
+		if ( !IS_SAFE_CHAR(data[i]) ) {
 			parser->error = "String contains invalid character.";
 			return FALSE;
 		}
-		
-		if ( utf8_len > 1 ) {
-			bool overlong = FALSE;
-
-			if ( (i+utf8_len-1) >= data_size ) {
-				/* Known data ends in the middle of a UTF-8 character;
-				 * leave it to next time.
-				 */
-				break;
-			}
-
-			/* Check for overlong UTF-8 sequences */
-			switch (utf8_len) {
-			case 2:
-				if (!(data[i] & 0x1E)) overlong = TRUE;
-				break;
-			case 3:	
-				if (!(data[i] & 0x0F) && !(data[i+1] & 0x20)) overlong = TRUE;
-				break;
-			case 4:
-				if (!(data[i] & 0x07) && !(data[i+1] & 0x30)) overlong = TRUE;				
-				break;
-			case 5:
-				if (!(data[i] & 0x03) && !(data[i+1] & 0x38)) overlong = TRUE;
-				break;				
-			case 6:
-				if (!(data[i] & 0x01) && !(data[i+1] & 0x3C)) overlong = TRUE;
-				break;				
-			default:
-				i_unreached();
-			} 
-
-			if ( overlong ) {
-				parser->error = "String contains invalid/overlong UTF-8 character.";
-				return FALSE;
-			}
-
-			utf8_len--;
-	
-			/* Parse the series of UTF8_1 characters */
-			for (; utf8_len > 0; utf8_len--, i++ ) {  
-				if (!IS_UTF8_1(data[i+1])) {
-					parser->error = "String contains invalid UTF-8 character.";
-			    return FALSE;
-				}
-			}
-		}
 	}
 
 	parser->cur_pos = i;
@@ -436,6 +392,12 @@
 		if (data_size < parser->literal_size) {
 			return FALSE;
 		} else {
+			if ( !uni_utf8_data_is_valid
+				(data, (size_t)parser->literal_size) ) {
+				parser->error = "Invalid UTF-8 character in literal string.";
+				return FALSE;
+			}
+			
 			managesieve_parser_save_arg(parser, data,
 					     (size_t)parser->literal_size);
 			parser->cur_pos = (size_t)parser->literal_size;
diff -r 92fa68d19585 -r bd894c3cbf7b src/lib-managesieve/managesieve-parser.h
--- a/src/lib-managesieve/managesieve-parser.h	Fri Feb 18 02:58:43 2011 +0100
+++ b/src/lib-managesieve/managesieve-parser.h	Fri Feb 18 14:03:28 2011 +0100
@@ -39,47 +39,6 @@
 #define IS_SAFE_CHAR(c) \
 	(IS_TEXT_CHAR(c) && !IS_QUOTED_SPECIAL(c))
 
-/* UTF8-1             = %x80-BF
- */
-#define IS_UTF8_1(c) \
-	(((c) & 0xC0) == 0x80)
-
-/* UTF8-2             = %xC0-DF UTF8-1
- */
-#define IS_UTF8_2S(c) \
-  (((c) & 0xE0) == 0xC0)
-
-/* UTF8-3             = %xE0-EF 2UTF8-1
- */
-#define IS_UTF8_3S(c) \
-  (((c) & 0xF0) == 0xE0)
-
-/* UTF8-4             = %xF0-F7 3UTF8-1
- */
-#define IS_UTF8_4S(c) \
-  (((c) & 0xF8) == 0xF0)
-
-/* UTF8-5             = %xF8-FB 4UTF8-1
- */
-#define IS_UTF8_5S(c) \
-  (((c) & 0xFC) == 0xF8)
-
-/* UTF8-6             = %xFC-FD 5UTF8-1
- */
-#define IS_UTF8_6S(c) \
-  (((c) & 0xFE) == 0xFC)
-
-/* SAFE-UTF8-CHAR     = SAFE-CHAR / UTF8-2 / UTF8-3 / UTF8-4 /
- *                      UTF8-5 / UTF8-6
- */
-#define UTF8_LEN(c) \
-  ( IS_SAFE_CHAR(c) ? 1 : \
-    IS_UTF8_2S(c) ? 2 : \
-    IS_UTF8_3S(c) ? 3 : \
-    IS_UTF8_4S(c) ? 4 : \
-    IS_UTF8_5S(c) ? 5 : \
-    IS_UTF8_6S(c) ? 6 : 0 )
-
 enum managesieve_parser_flags {
 	/* Set this flag if you wish to read only size of literal argument
 	   and not convert literal into string. Useful when you need to deal
diff -r 92fa68d19585 -r bd894c3cbf7b src/lib-managesieve/managesieve-quote.c
--- a/src/lib-managesieve/managesieve-quote.c	Fri Feb 18 02:58:43 2011 +0100
+++ b/src/lib-managesieve/managesieve-quote.c	Fri Feb 18 14:03:28 2011 +0100
@@ -3,6 +3,7 @@
 
 #include "lib.h"
 #include "str.h"
+#include "unichar.h"
 #include "managesieve-parser.h"
 #include "managesieve-quote.h"
 
@@ -14,13 +15,11 @@
 void managesieve_quote_append(string_t *str, const unsigned char *value,
 		       size_t value_len, bool compress_lwsp)
 {
-	size_t i, extra = 0;
+	size_t i, extra = 0, escape = 0;
 	bool 
 		last_lwsp = TRUE, 
 		literal = FALSE, 
-		modify = FALSE,
-		escape = FALSE;
-	int utf8_len;
+		modify = FALSE;
 
  	if (value == NULL) {
 		str_append(str, "\"\"");
@@ -42,7 +41,7 @@
 			break;
 		case '"':
 		case '\\':
-			escape = TRUE;
+			escape++;
 			last_lwsp = FALSE;
 			break;
 		case 13:
@@ -51,36 +50,6 @@
 			last_lwsp = TRUE;
 			break;
 		default:
-			/* Enforce valid UTF-8
-			 */
-			if ( (utf8_len=UTF8_LEN(value[i])) == 0 ) {
-				modify = TRUE;
-				extra++;
-				break;
-			}
-
-			if ( utf8_len > 1 ) {
-				int c = utf8_len - 1;
-
-		 		if ( (i+utf8_len-1) >= value_len ) {
-				  	/* Value ends in the middle of a UTF-8 character;
-					 * Kill the partial UTF-8 character
-					 */
-				  	extra += i + utf8_len - value_len;
-					modify = TRUE;
-					break;        	
-				}
-
-				/* Parse the series of UTF8_1 characters */
-				for (i++; c > 0; c--, i++ ) {
-					if (!IS_UTF8_1(value[i])) {
-						extra += utf8_len - c;
-						modify = TRUE;
-						break;
-					}
-				}
-			}
-   			
 			last_lwsp = FALSE;
 		}
 	}
@@ -93,9 +62,10 @@
 		str_printfa(str, "{%"PRIuSIZE_T"}\r\n", value_len - extra);
 	}
 
-	if (!modify && (literal || !escape))
+	if (!modify && (literal || escape == 0))
 		str_append_n(str, value, value_len);
 	else {
+		string_t *unchecked = t_str_new(value_len+escape+4);
 		last_lwsp = TRUE;
 		for (i = 0; i < value_len; i++) {
 			switch (value[i]) {
@@ -103,58 +73,29 @@
 			case '\\':
 				last_lwsp = FALSE;
 				if (!literal) 
-					str_append_c(str, '\\');
-				str_append_c(str, value[i]);
+					str_append_c(unchecked, '\\');
+				str_append_c(unchecked, value[i]);
 				break;
 			case ' ':
 			case '\t':
 				if (!last_lwsp || !compress_lwsp)
-					str_append_c(str, ' ');
+					str_append_c(unchecked, ' ');
 				last_lwsp = TRUE;
 				break;
 			case 13:
 			case 10:
 				last_lwsp = TRUE;
-				str_append_c(str, value[i]);
+				str_append_c(unchecked, value[i]);
 				break;
 			default:
-	  			/* Enforce valid UTF-8
-				 */
-				if ( (utf8_len=UTF8_LEN(value[i])) == 0 ) 
-					break;
-      
-				if ( utf8_len > 1 ) {
-					int c = utf8_len - 1;
-					int j;
-
-					if ( (i+utf8_len-1) >= value_len ) {
-						/* Value ends in the middle of a UTF-8 character;
-						 * Kill the partial character
-						 */
-					 	i = value_len;
-						break;


More information about the dovecot-cvs mailing list