dovecot-2.2: lib-http: Improved message header and body parsing ...

dovecot at dovecot.org dovecot at dovecot.org
Sun Sep 15 03:50:34 EEST 2013


details:   http://hg.dovecot.org/dovecot-2.2/rev/d59ac8efc5af
changeset: 16743:d59ac8efc5af
user:      Stephan Bosch <stephan at rename-it.nl>
date:      Sun Sep 15 03:39:45 2013 +0300
description:
lib-http: Improved message header and body parsing for better RFC compliance.
Added pre-parsed transfer-encoding and connection header content (array) to
parsed message struct. Fixed message body handling for when both
transfer-encoding and content-length headers are missing. Now duplicates of
unique important message headers yield an error.

diffstat:

 src/lib-http/http-message-parser.c  |  247 +++++++++++++++++++++++++++++++----
 src/lib-http/http-message-parser.h  |    7 +-
 src/lib-http/http-request-parser.c  |    3 +-
 src/lib-http/http-request-parser.h  |    1 +
 src/lib-http/http-response-parser.c |    3 +-
 src/lib-http/http-response.h        |    1 +
 src/lib-http/http-transfer.h        |   14 ++
 7 files changed, 241 insertions(+), 35 deletions(-)

diffs (truncated from 438 to 300 lines):

diff -r 1404dbde402c -r d59ac8efc5af src/lib-http/http-message-parser.c
--- a/src/lib-http/http-message-parser.c	Sun Sep 15 03:37:59 2013 +0300
+++ b/src/lib-http/http-message-parser.c	Sun Sep 15 03:39:45 2013 +0300
@@ -49,6 +49,7 @@
 	}
 	parser->msg.date = (time_t)-1;
 	p_array_init(&parser->msg.headers, parser->msg.pool, 32);
+	p_array_init(&parser->msg.connection_options, parser->msg.pool, 4);
 }
 
 int http_message_parse_version(struct http_message_parser *parser)
@@ -106,10 +107,24 @@
 	memcpy(value, data, size);
 	hdr->size = size;
 
+	/* https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-23
+	     Section 3.2.2:
+
+	   A sender MUST NOT generate multiple header fields with the same field
+	   name in a message unless either the entire field value for that
+	   header field is defined as a comma-separated list [i.e., #(values)]
+	   or the header field is a well-known exception.
+	 */
+
 	switch (name[0]) {
 	case 'C': case 'c':
+		/* Connection: */
 		if (strcasecmp(name, "Connection") == 0) {
+			const char **opt_idx;
 			const char *option;
+			unsigned int num_tokens = 0;
+
+			/* Multiple Connection headers are allowed and combined into one */
 
 			/* Connection        = 1#connection-option
 				 connection-option = token
@@ -118,24 +133,42 @@
 			for (;;) {
 				if (http_parse_token_list_next(&hparser, &option) <= 0)
 					break;
-				if (strcasecmp(option, "close") == 0) {
+				num_tokens++;
+				if (strcasecmp(option, "close") == 0)
 					parser->msg.connection_close = TRUE;
-					break; // not interested in any other options
-				}
+				opt_idx = array_append_space(&parser->msg.connection_options);
+				*opt_idx = p_strdup(parser->msg.pool, option);
 			}
+
+			if (hparser.cur < hparser.end || num_tokens == 0) {
+				*error_r = "Invalid Connection header";
+				return -1;
+			}
+
 			return 0;
 		}
+		/* Content-Length: */
 		if (strcasecmp(name, "Content-Length") == 0) {
+			if (parser->msg.have_content_length) {
+				*error_r = "Duplicate Content-Length header";
+				return -1;
+			}
 			/* Content-Length = 1*DIGIT */
 			if (str_to_uoff(hdr->value, &parser->msg.content_length) < 0) {
 				*error_r = "Invalid Content-Length header";
 				return -1;
 			}
+			parser->msg.have_content_length = TRUE;
 			return 0;
 		}
 		break;
 	case 'D': case 'd':
 		if (strcasecmp(name, "Date") == 0) {
+			if (parser->msg.date != (time_t)-1) {
+				*error_r = "Duplicate Date header";
+				return -1;
+			}
+
 			/* Date = HTTP-date */
 			(void)http_date_parse(data, size, &parser->msg.date);
 			return 0;
@@ -143,15 +176,102 @@
 		break;
 	case 'L': case 'l':
 		if (strcasecmp(name, "Location") == 0) {
+			/* FIXME: move this to response parser */
 			/* Location = URI-reference (not parsed here) */
 			parser->msg.location = hdr->value;
 			return 0;
 		}
 		break;
 	case 'T': case 't':
+		/* Transfer-Encoding: */
 		if (strcasecmp(name, "Transfer-Encoding") == 0) {
-			/* Transfer-Encoding = 1#transfer-coding */
-			parser->msg.transfer_encoding = hdr->value;
+			const char *trenc = NULL;
+	
+			/* Multiple Transfer-Encoding headers are allowed and combined into one */
+			if (!array_is_created(&parser->msg.transfer_encoding))
+				p_array_init(&parser->msg.transfer_encoding, parser->msg.pool, 4);
+
+			/* Transfer-Encoding  = 1#transfer-coding 
+				 transfer-coding    = "chunked" / "compress" / "deflate" / "gzip"
+				                      / transfer-extension
+				 transfer-extension = token *( OWS ";" OWS transfer-parameter )
+				 transfer-parameter = attribute BWS "=" BWS value
+				 attribute          = token
+				 value              = word
+			*/
+			http_parser_init(&hparser, data, size);
+			for (;;) {
+				/* transfer-coding */
+				if (http_parse_token(&hparser, &trenc) > 0) {
+					struct http_transfer_coding *coding;
+					bool parse_error;
+
+					coding = array_append_space(&parser->msg.transfer_encoding);
+					coding->name = p_strdup(parser->msg.pool, trenc);
+		
+					/* *( OWS ";" OWS transfer-parameter ) */
+					parse_error = FALSE;
+					for (;;) {
+						struct http_transfer_param *param;
+						const char *attribute, *value;
+
+						/* OWS ";" OWS */
+						http_parse_ows(&hparser);
+						if (hparser.cur >= hparser.end || *hparser.cur != ';')
+							break;
+						hparser.cur++;
+						http_parse_ows(&hparser);
+
+						/* attribute */
+						if (http_parse_token(&hparser, &attribute) <= 0) {
+							parse_error = TRUE;
+							break;
+						}
+
+						/* BWS "=" BWS */
+						http_parse_ows(&hparser);
+						if (hparser.cur >= hparser.end || *hparser.cur != '=') {
+							parse_error = TRUE;
+							break;
+						}
+						hparser.cur++;
+						http_parse_ows(&hparser);
+
+						/* value */
+						if (http_parse_word(&hparser, &value) <= 0) {
+							parse_error = TRUE;
+							break;
+						}
+		
+						if (!array_is_created(&coding->parameters))
+							p_array_init(&coding->parameters, parser->msg.pool, 2);
+						param = array_append_space(&coding->parameters);
+						param->attribute = p_strdup(parser->msg.pool, attribute);
+						param->value = p_strdup(parser->msg.pool, value);
+					}
+					if (parse_error)
+						break;
+					
+				} else {
+					/* http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-23
+							 Appendix B:
+
+						 For compatibility with legacy list rules, recipients SHOULD accept
+						 empty list elements.
+					 */
+				}
+				http_parse_ows(&hparser);
+				if (hparser.cur >= hparser.end || *hparser.cur != ',')
+					break;
+				hparser.cur++;
+				http_parse_ows(&hparser);
+			}
+
+			if (hparser.cur < hparser.end ||
+				array_count(&parser->msg.transfer_encoding) == 0) {
+				*error_r = "Invalid Transfer-Encoding header";
+				return -1;
+			}
 			return 0;
 		}
 		break;
@@ -188,40 +308,105 @@
 	return ret;
 }
 
-int http_message_parse_body(struct http_message_parser *parser,
+
+int http_message_parse_body(struct http_message_parser *parser, bool request,
 			    const char **error_r)
 {
-	struct http_parser hparser;
+	*error_r = NULL;
 
-	if (parser->msg.content_length > 0) {
+	if (array_is_created(&parser->msg.transfer_encoding)) {
+		const struct http_transfer_coding *coding;
+
+		bool chunked_last = FALSE;
+
+		array_foreach(&parser->msg.transfer_encoding, coding) {
+			if (strcasecmp(coding->name, "chunked") == 0) {
+				chunked_last = TRUE;
+		
+				if (*error_r == NULL && array_is_created(&coding->parameters) &&
+					array_count(&coding->parameters) > 0) {
+					const struct http_transfer_param *param =
+						array_idx(&coding->parameters, 0);
+					
+					*error_r = t_strdup_printf("Unexpected parameter `%s' specified"
+						"for the `%s' transfer coding", param->attribute, coding->name);
+				}
+			} else if (chunked_last) {
+				*error_r = "Chunked Transfer-Encoding must be last";
+				return -1;
+			} else if (*error_r == NULL) {
+ 				*error_r = t_strdup_printf(
+  				"Unknown transfer coding `%s'", coding->name);
+  		}
+  	}
+
+		if (chunked_last) {	
+			parser->payload =
+				http_transfer_chunked_istream_create(parser->input);
+		} else if (!request) {
+			/*  https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-23
+			      Section 3.3.3.:
+
+			    If a Transfer-Encoding header field is present in a response and
+			    the chunked transfer coding is not the final encoding, the
+			    message body length is determined by reading the connection until
+			    it is closed by the server.
+			 */
+			parser->payload =
+					i_stream_create_limit(parser->input, (size_t)-1);
+		} else {
+			/* https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-23
+			      Section 3.3.3.:
+
+			   If a Transfer-Encoding header field is present in a request and the
+			   chunked transfer coding is not the final encoding, the message body
+			   length cannot be determined reliably; the server MUST respond with
+			   the 400 (Bad Request) status code and then close the connection.
+			 */
+			*error_r = "Final Transfer-Encoding in request is not `chunked'";
+			return -1;
+		}
+
+		/* https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-23
+		     Section 3.3.3.:
+
+			 If a message is received with both a Transfer-Encoding and a
+       Content-Length header field, the Transfer-Encoding overrides the
+       Content-Length.  Such a message might indicate an attempt to
+       perform request or response smuggling (bypass of security-related
+       checks on message routing or content) and thus ought to be
+       handled as an error.  A sender MUST remove the received Content-
+       Length field prior to forwarding such a message downstream.
+		 */
+		if (parser->msg.have_content_length) {
+			ARRAY_TYPE(http_response_header) *headers = &parser->msg.headers;
+			const struct http_response_header *hdr;
+
+			array_foreach(headers, hdr) {
+				if (strcasecmp(hdr->key, "Content-Length") == 0) {
+					array_delete(headers, array_foreach_idx(headers, hdr), 1);
+					break;
+				}
+			}
+		}
+	} else if (parser->msg.content_length > 0) {
 		/* Got explicit message size from Content-Length: header */
 		parser->payload =
 			i_stream_create_limit(parser->input,
 					      parser->msg.content_length);
-	} else if (parser->msg.transfer_encoding != NULL) {
-		const char *tenc;
+	} else if (!parser->msg.have_content_length && !request) {
+		/* https://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-23
+		     Section 3.3.3.:
 
-		/* Transfer-Encoding = 1#transfer-coding
-		   transfer-coding    = "chunked" / "compress" / "deflate" / "gzip"
-				      / transfer-extension       ;  [FIXME]
-		   transfer-extension = token *( OWS ";" OWS transfer-parameter )
-		*/
-		http_parser_init(&hparser,
-				 (const unsigned char *)parser->msg.transfer_encoding,
-				 strlen(parser->msg.transfer_encoding));
-		for (;;) {
-			if (http_parse_token_list_next(&hparser, &tenc) <= 0)
-				break;
-			if (strcasecmp(tenc, "chunked") == 0) {
-				parser->payload =
-					http_transfer_chunked_istream_create(parser->input);


More information about the dovecot-cvs mailing list