dovecot-2.2: liblib: Added generic URI parsing functions.
dovecot at dovecot.org
dovecot at dovecot.org
Sat Jun 2 19:02:27 EEST 2012
details: http://hg.dovecot.org/dovecot-2.2/rev/ba36e4380cf4
changeset: 14587:ba36e4380cf4
user: Stephan Bosch <stephan at rename-it.nl>
date: Sat Jun 02 17:06:21 2012 +0300
description:
liblib: Added generic URI parsing functions.
diffstat:
src/lib/Makefile.am | 2 +
src/lib/uri-util.c | 723 ++++++++++++++++++++++++++++++++++++++++++++++++++++
src/lib/uri-util.h | 49 +++
3 files changed, 774 insertions(+), 0 deletions(-)
diffs (truncated from 799 to 300 lines):
diff -r 21d67121985a -r ba36e4380cf4 src/lib/Makefile.am
--- a/src/lib/Makefile.am Sat Jun 02 16:55:21 2012 +0300
+++ b/src/lib/Makefile.am Sat Jun 02 17:06:21 2012 +0300
@@ -121,6 +121,7 @@
unlink-directory.c \
unlink-old-files.c \
unichar.c \
+ uri-util.c \
utc-offset.c \
utc-mktime.c \
var-expand.c \
@@ -228,6 +229,7 @@
unlink-directory.h \
unlink-old-files.h \
unichar.h \
+ uri-util.h \
utc-offset.h \
utc-mktime.h \
var-expand.h \
diff -r 21d67121985a -r ba36e4380cf4 src/lib/uri-util.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/uri-util.c Sat Jun 02 17:06:21 2012 +0300
@@ -0,0 +1,723 @@
+/* Copyright (c) 2010-2012 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "str.h"
+#include "network.h"
+#include "uri-util.h"
+
+#include <ctype.h>
+
+/*
+ * Generic URI parsing.
+ *
+ * [URI-GEN] RFC3986 Appendix A:
+ *
+ * host = IP-literal / IPv4address / reg-name
+ * port = *DIGIT
+ * reg-name = *( unreserved / pct-encoded / sub-delims )
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ * pct-encoded = "%" HEXDIG HEXDIG
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ * / "*" / "+" / "," / ";" / "="
+ * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+ * IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+ * IPv6address = 6( h16 ":" ) ls32
+ * / "::" 5( h16 ":" ) ls32
+ * / [ h16 ] "::" 4( h16 ":" ) ls32
+ * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+ * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+ * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+ * / [ *4( h16 ":" ) h16 ] "::" ls32
+ * / [ *5( h16 ":" ) h16 ] "::" h16
+ * / [ *6( h16 ":" ) h16 ] "::"
+ * h16 = 1*4HEXDIG
+ * ls32 = ( h16 ":" h16 ) / IPv4address
+ * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+ * dec-octet = DIGIT ; 0-9
+ * / %x31-39 DIGIT ; 10-99
+ * / "1" 2DIGIT ; 100-199
+ * / "2" %x30-34 DIGIT ; 200-249
+ * / "25" %x30-35 ; 250-255
+ */
+
+#define URI_MAX_SCHEME_NAME_LEN 64
+
+/* Character lookup table
+ *
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" [bit0]
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ * / "*" / "+" / "," / ";" / "=" [bit1]
+ * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" [bit2]
+ * pchar = unreserved / sub-delims / ":" / "@" [bit0|bit1|bit3]
+ * 'uchar' = unreserved / sub-delims / ":" [bit0|bit1|bit4]
+ * 'fchar' = pchar / "/" / "?" [bit0|bit1|bit3|bit5]
+ *
+ */
+
+static unsigned const char _uri_char_lookup[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
+ 0, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 36, // 20
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 2, 0, 2, 0, 36, // 30
+ 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 4, 0, 1, // 50
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 70
+};
+
+static inline int _decode_hex_digit(const unsigned char digit)
+{
+ switch (digit) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return digit - '0';
+
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ return digit - 'a' + 0x0a;
+
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ return digit - 'A' + 0x0A;
+ }
+ return -1;
+}
+
+static int
+uri_parse_pct_encoded(struct uri_parser *parser, const unsigned char **p,
+ const unsigned char *pend, unsigned char *ch_r)
+{
+ int value;
+
+ if (**p == 0 || *(*p+1) == 0 || (pend != NULL && *p+1 >= pend)) {
+ parser->error = "Unexpected URI boundary after '%'";
+ return -1;
+ }
+
+ if ((value = _decode_hex_digit(**p)) < 0) {
+ parser->error = t_strdup_printf(
+ "Expecting hex digit after '%%', but found '%c'", **p);
+ return -1;
+ }
+
+ *ch_r = (value & 0x0f) << 4;
+ *p += 1;
+
+ if ((value = _decode_hex_digit(**p)) < 0) {
+ parser->error = t_strdup_printf(
+ "Expecting hex digit after '%%%c', but found '%c'", *((*p)-1), **p);
+ return -1;
+ }
+
+ *ch_r |= (value & 0x0f);
+ *p += 1;
+
+ if (*ch_r == '\0') {
+ parser->error =
+ "Percent encoding is not allowed to encode NUL character";
+ return -1;
+ }
+ return 1;
+}
+
+static int
+uri_parse_unreserved_char(struct uri_parser *parser, unsigned char *ch_r)
+{
+ if (*parser->cur == '%') {
+ parser->cur++;
+ if (uri_parse_pct_encoded(parser, &parser->cur,
+ parser->end, ch_r) <= 0)
+ return -1;
+ return 1;
+ }
+
+ if ((*parser->cur & 0x80) != 0)
+ return 0;
+
+ if (_uri_char_lookup[*parser->cur] & 0x01) {
+ *ch_r = *parser->cur;
+ parser->cur++;
+ return 1;
+ }
+ return 0;
+}
+
+int uri_parse_unreserved(struct uri_parser *parser, string_t *part)
+{
+ int len = 0;
+
+ while (parser->cur < parser->end) {
+ int ret;
+ unsigned char ch = 0;
+
+ if ((ret = uri_parse_unreserved_char(parser, &ch)) < 0)
+ return -1;
+
+ if (ret == 0)
+ break;
+
+ if (part != NULL)
+ str_append_c(part, ch);
+ len++;
+ }
+
+ return len > 0 ? 1 : 0;
+}
+
+bool uri_data_decode(struct uri_parser *parser, const char *data,
+ const char *until, const char **decoded_r)
+{
+ const unsigned char *p = (const unsigned char *)data;
+ const unsigned char *pend = (const unsigned char *)until;
+ string_t *decoded;
+
+ if (pend == NULL) {
+ /* NULL means unlimited; solely rely on '\0' */
+ pend = (const unsigned char *)(size_t)-1;
+ }
+
+ if (p >= pend || *p == '\0') {
+ if (decoded_r != NULL)
+ *decoded_r = "";
+ return TRUE;
+ }
+
+ decoded = uri_parser_get_tmpbuf(parser, 256);
+ while (p < pend && *p != '\0') {
+ unsigned char ch;
+
+ if (*p == '%') {
+ p++;
+ if (uri_parse_pct_encoded(parser, &p, NULL, &ch) <= 0)
+ return FALSE;
+
+ str_append_c(decoded, ch);
+ } else {
+ str_append_c(decoded, *p);
+ p++;
+ }
+ }
+
+ if (decoded_r != NULL)
+ *decoded_r = t_strdup(str_c(decoded));
+ return TRUE;
+}
+
+const char *uri_cut_scheme(const char **uri_p)
+{
+ const char *p = *uri_p;
+ const char *scheme;
+ size_t len = 1;
+
+ /* RFC 3968:
+ * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ */
+
+ if (!i_isalpha(*p))
+ return NULL;
+ p++;
+
+ while (len < URI_MAX_SCHEME_NAME_LEN && *p != '\0') {
+ if (!i_isalnum(*p) && *p != '+' && *p != '-' && *p != '.')
+ break;
+ p++;
+ len++;
+ }
+
+ if (*p != ':')
+ return NULL;
+
+ scheme = t_strdup_until(*uri_p, p);
+ *uri_p = p + 1;
+
+ return scheme;
+}
+
+int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
+{
+ const char *p;
+
+ if (parser->cur >= parser->end)
+ return 0;
+
+ p = (const char *)parser->cur;
+ if ((*scheme_r = uri_cut_scheme(&p)) == NULL)
+ return 0;
+
+ parser->cur = (const unsigned char *)p;
+ return 1;
+}
+
+static int
+uri_parse_dec_octet(struct uri_parser *parser, string_t *literal,
+ uint8_t *octet_r)
+{
+ uint8_t octet = 0;
+ int count = 0;
+
+ /* RFC 3986:
+ *
+ * dec-octet = DIGIT ; 0-9
+ * / %x31-39 DIGIT ; 10-99
+ * / "1" 2DIGIT ; 100-199
+ * / "2" %x30-34 DIGIT ; 200-249
+ * / "25" %x30-35 ; 250-255
+ */
+
+ while (parser->cur < parser->end && i_isdigit(*parser->cur)) {
+ uint8_t prev = octet;
+
+ octet = octet * 10 + (uint8_t)(parser->cur[0] - '0');
+ if (octet < prev)
+ return -1;
+
+ if (literal != NULL)
+ str_append_c(literal, *parser->cur);
+
+ parser->cur++;
+ count++;
More information about the dovecot-cvs
mailing list