dovecot-2.2: lib-http: Added support for parsing request target ...

dovecot at dovecot.org dovecot at dovecot.org
Sun Sep 15 03:50:34 EEST 2013


details:   http://hg.dovecot.org/dovecot-2.2/rev/29ceb7126b91
changeset: 16747:29ceb7126b91
user:      Stephan Bosch <stephan at rename-it.nl>
date:      Sun Sep 15 03:47:29 2013 +0300
description:
lib-http: Added support for parsing request target URLs.

diffstat:

 src/lib-http/http-request.h |   14 ++
 src/lib-http/http-url.c     |  286 ++++++++++++++++++++++++++++++++++---------
 src/lib-http/http-url.h     |    6 +
 3 files changed, 245 insertions(+), 61 deletions(-)

diffs (truncated from 435 to 300 lines):

diff -r bbe4a469e276 -r 29ceb7126b91 src/lib-http/http-request.h
--- a/src/lib-http/http-request.h	Sun Sep 15 03:46:25 2013 +0300
+++ b/src/lib-http/http-request.h	Sun Sep 15 03:47:29 2013 +0300
@@ -3,6 +3,20 @@
 
 #include "http-header.h"
 
+struct http_url;
+
+enum http_request_target_format {
+	HTTP_REQUEST_TARGET_FORMAT_ORIGIN = 0,
+	HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE,
+	HTTP_REQUEST_TARGET_FORMAT_AUTHORITY,
+	HTTP_REQUEST_TARGET_FORMAT_ASTERISK
+};
+
+struct http_request_target {
+	enum http_request_target_format format;
+	struct http_url *url;
+};
+
 struct http_request {
 	const char *method;
 
diff -r bbe4a469e276 -r 29ceb7126b91 src/lib-http/http-url.c
--- a/src/lib-http/http-url.c	Sun Sep 15 03:46:25 2013 +0300
+++ b/src/lib-http/http-url.c	Sun Sep 15 03:47:29 2013 +0300
@@ -5,7 +5,9 @@
 #include "strfuncs.h"
 #include "net.h"
 #include "uri-util.h"
+
 #include "http-url.h"
+#include "http-request.h"
 
 /*
  * HTTP URL parser
@@ -19,57 +21,20 @@
 	struct http_url *url;
 	struct http_url *base;
 
-	unsigned int relative:1;
+	enum http_request_target_format req_format;
+
+ 	unsigned int relative:1;
+	unsigned int request_target:1;
 };
 
-static bool http_url_do_parse(struct http_url_parser *url_parser)
+static bool http_url_parse_authority(struct http_url_parser *url_parser)
 {
 	struct uri_parser *parser = &url_parser->parser;
-	struct http_url *url = url_parser->url, *base = url_parser->base;
+	struct http_url *url = url_parser->url;
 	struct uri_authority auth;
-	const char *const *path;
-	bool relative = TRUE, have_path = FALSE;
-	int path_relative;
-	const char *part;
 	int ret;
 
-	/* RFC 2616 - Hypertext Transfer Protocol, Section 3.2:
-	 *   
-	 * http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]]
-	 * 
-	 * Translated to RFC 3986:
-	 *
-	 * absolute-http-URL = "http:" "//" host [ ":" port ] path-absolute
-	 *                       ["?" query] [ "#" fragment ]
-	 * relative-http-ref = relative-http-part [ "?" query ] [ "#" fragment ]
-	 * relative-http-part = "//" host [ ":" port ] path-abempty
-	 *                      / path-absolute
-	 *                      / path-noscheme
-	 *                      / path-empty
-	 */
-
-	/* "http:" / "https:" */
-	if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0) {
-		const char *scheme;
-
-		if ((ret = uri_parse_scheme(parser, &scheme)) < 0)
-			return FALSE;
-		else if (ret > 0) {
-			if (strcasecmp(scheme, "https") == 0) {
-				if (url != NULL)
-					url->have_ssl = TRUE;
-			} else if (strcasecmp(scheme, "http") != 0) {
-				parser->error = "Not an HTTP URL";
-				return FALSE;
-			}
-			relative = FALSE;
-		}
-	} else {
-		relative = FALSE;
-	}
-
-	/* "//" host [ ":" port ] */
-	if ((ret = uri_parse_slashslash_authority(parser, &auth)) < 0)
+	if ((ret = uri_parse_authority(parser, &auth)) < 0)
 		return FALSE;
 	if (ret > 0) {
 		if (auth.enc_userinfo != NULL) {
@@ -87,19 +52,153 @@
 			parser->error = "HTTP URL does not allow `userinfo@' part";
 			return FALSE;
 		}
-		relative = FALSE;
-	} else if (!relative) {
-		parser->error = "Absolute HTTP URL requires `//' after `http:'";
-		return FALSE;
 	}
-
-	if (ret > 0 && url != NULL) {
+	if (url != NULL) {
 		url->host_name = p_strdup(parser->pool, auth.host_literal);
 		url->host_ip = auth.host_ip;
 		url->have_host_ip = auth.have_host_ip;
 		url->port = auth.port;
 		url->have_port = auth.have_port;
 	}
+	return TRUE;
+}
+
+static bool http_url_parse_authority_form(struct http_url_parser *url_parser)
+{
+	struct uri_parser *parser = &url_parser->parser;
+
+	if (!http_url_parse_authority(url_parser))
+		return FALSE;
+	if (parser->cur != parser->end)
+		return FALSE;
+	url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_AUTHORITY;
+	return TRUE;
+}
+
+static bool http_url_do_parse(struct http_url_parser *url_parser)
+{
+	struct uri_parser *parser = &url_parser->parser;
+	struct http_url *url = url_parser->url, *base = url_parser->base;
+	const char *const *path;
+	bool relative = TRUE, have_scheme = FALSE, have_authority = FALSE,
+		have_path = FALSE;
+	int path_relative;
+	const char *part;
+	int ret;
+
+	/*
+	   http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-23
+	     Appendix C:
+
+	   http-URI       = "http://" authority path-abempty [ "?" query ]
+	                      [ "#" fragment ]
+	   https-URI      = "https://" authority path-abempty [ "?" query ]
+	                      [ "#" fragment ]
+	   partial-URI    = relative-part [ "?" query ]
+
+	   request-target = origin-form / absolute-form / authority-form /
+	                    asterisk-form
+
+	   origin-form    = absolute-path [ "?" query ]
+	   absolute-form  = absolute-URI
+	   authority-form = authority
+	   asterisk-form  = "*"
+	                    ; Not parsed here
+
+	   absolute-path  = 1*( "/" segment )
+
+	   http://tools.ietf.org/html/rfc3986
+	     Appendix A: (implemented in uri-util.h)
+
+	   absolute-URI   = scheme ":" hier-part [ "?" query ]
+
+	   hier-part      = "//" authority path-abempty
+	                  / path-absolute
+	                  / path-rootless
+	                  / path-empty
+
+	   relative-part  = "//" authority path-abempty
+	                  / path-absolute
+	                  / path-noscheme
+	                  / path-empty
+
+	   authority     = [ userinfo "@" ] host [ ":" port ]
+
+	   path-abempty   = *( "/" segment )
+	   path-absolute  = "/" [ segment-nz *( "/" segment ) ]
+	   path-noscheme  = segment-nz-nc *( "/" segment )
+	   path-rootless  = segment-nz *( "/" segment )
+	   path-empty     = 0<pchar>
+
+	   segment        = *pchar
+	   segment-nz     = 1*pchar
+	   segment-nz-nc  = 1*( unreserved / pct-encoded / sub-delims / "@" )
+                    ; non-zero-length segment without any colon ":"
+
+	   query          = *( pchar / "/" / "?" )
+	   fragment       = *( pchar / "/" / "?" )
+	 */
+
+	/* "http:" / "https:" */
+	if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0) {
+		const char *scheme;
+
+		if ((ret = uri_parse_scheme(parser, &scheme)) < 0)
+			return FALSE;
+		else if (ret > 0) {
+			if (strcasecmp(scheme, "https") == 0) {
+				if (url != NULL)
+					url->have_ssl = TRUE;
+			} else if (strcasecmp(scheme, "http") != 0) {
+				if (url_parser->request_target) {
+					/* valid as non-HTTP scheme, but also try to parse as authority */
+					parser->cur = parser->begin;
+					if (!http_url_parse_authority_form(url_parser)) {
+						url_parser->url = NULL; /* indicate non-http-url */
+						url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
+					}
+					return TRUE;
+				}
+				parser->error = "Not an HTTP URL";
+				return FALSE;
+			}
+			relative = FALSE;
+			have_scheme = TRUE;
+		}
+	} else {
+		relative = FALSE;
+		have_scheme = TRUE;
+	}
+
+	/* "//" authority   ; or
+	 * ["//"] authority ; when parsing a request target
+	 */
+	if (parser->cur < parser->end && parser->cur[0] == '/') {
+		if (parser->cur+1 < parser->end && parser->cur[1] == '/') {
+			parser->cur += 2;
+			relative = FALSE;
+			have_authority = TRUE;
+		} else {
+			/* start of absolute-path */
+		}
+	} else if (url_parser->request_target && !have_scheme) {
+		if (!http_url_parse_authority_form(url_parser)) {
+			/* not non-HTTP scheme and invalid as authority-form */
+			parser->error = "Request target is invalid";
+			return FALSE;
+		}
+		return TRUE;
+	}
+
+	if (have_scheme && !have_authority) {
+		parser->error = "Absolute HTTP URL requires `//' after `http:'";
+ 		return FALSE;
+	}
+
+	if (have_authority) {
+		if (!http_url_parse_authority(url_parser))
+			return FALSE;
+	}
 
 	/* path-abempty / path-absolute / path-noscheme / path-empty */
 	if ((ret = uri_parse_path(parser, &path_relative, &path)) < 0)
@@ -108,14 +207,15 @@
 	/* Relative URLs are only valid when we have a base URL */
 	if (relative) {
 		if (base == NULL) {
-			parser->error = "Relative URL not allowed";
+			parser->error = "Relative HTTP URL not allowed";
 			return FALSE;
-		} else if (url != NULL) {
-			url->host_name = p_strdup_empty(parser->pool, base->host_name); 
+		} else if (!have_authority && url != NULL) {
+			url->host_name = p_strdup(parser->pool, base->host_name); 
 			url->host_ip = base->host_ip;
 			url->have_host_ip = base->have_host_ip;
 			url->port = base->port;
 			url->have_port = base->have_port;
+			url->have_ssl = base->have_ssl;
 		}
 
 		url_parser->relative = TRUE;
@@ -152,7 +252,7 @@
 			if (url != NULL && pend > pbegin)
 				str_append_n(fullpath, pbegin, pend-pbegin);
 		}
-	
+
 		/* append relative path */
 		while (*path != NULL) {
 			if (!uri_data_decode(parser, *path, NULL, &part))
@@ -161,7 +261,7 @@
 			if (url != NULL) {
 				str_append_c(fullpath, '/');
 				str_append(fullpath, part);
-			}	
+			}
 			path++;
 		}
 
@@ -170,7 +270,7 @@


More information about the dovecot-cvs mailing list