dovecot-2.2: JSON parser supports now nested objects and parsing...

dovecot at dovecot.org dovecot at dovecot.org
Thu Nov 29 06:26:44 EET 2012


details:   http://hg.dovecot.org/dovecot-2.2/rev/86572582647e
changeset: 15435:86572582647e
user:      Timo Sirainen <tss at iki.fi>
date:      Thu Nov 29 06:26:29 2012 +0200
description:
JSON parser supports now nested objects and parsing from non-blocking input stream.

diffstat:

 src/auth/db-dict.c         |   14 +-
 src/lib/Makefile.am        |    1 +
 src/lib/json-parser.c      |  295 ++++++++++++++++++++++++++++++++------------
 src/lib/json-parser.h      |   19 +-
 src/lib/test-json-parser.c |   98 ++++++++++++++
 src/lib/test-lib.c         |    1 +
 src/lib/test-lib.h         |    1 +
 7 files changed, 335 insertions(+), 94 deletions(-)

diffs (truncated from 688 to 300 lines):

diff -r 309c9fe1f0a9 -r 86572582647e src/auth/db-dict.c
--- a/src/auth/db-dict.c	Thu Nov 29 02:30:15 2012 +0200
+++ b/src/auth/db-dict.c	Thu Nov 29 06:26:29 2012 +0200
@@ -5,6 +5,7 @@
 #include "settings.h"
 #include "dict.h"
 #include "json-parser.h"
+#include "istream.h"
 #include "str.h"
 #include "auth-request.h"
 #include "auth-worker-client.h"
@@ -122,6 +123,7 @@
 db_dict_value_iter_init(struct dict_connection *conn, const char *value)
 {
 	struct db_dict_value_iter *iter;
+	struct istream *input;
 
 	i_assert(strcmp(conn->set.value_format, "json") == 0);
 
@@ -129,7 +131,9 @@
 	   value types are supported. */
 	iter = i_new(struct db_dict_value_iter, 1);
 	iter->key = str_new(default_pool, 64);
-	iter->parser = json_parser_init((const void *)value, strlen(value));
+	input = i_stream_create_from_data(value, strlen(value));
+	iter->parser = json_parser_init(input);
+	i_stream_unref(&input);
 	return iter;
 }
 
@@ -139,7 +143,7 @@
 	enum json_type type;
 	const char *value;
 
-	if (!json_parse_next(iter->parser, &type, &value))
+	if (json_parse_next(iter->parser, &type, &value) < 0)
 		return FALSE;
 	if (type != JSON_TYPE_OBJECT_KEY) {
 		iter->error = "Object expected";
@@ -152,10 +156,14 @@
 	str_truncate(iter->key, 0);
 	str_append(iter->key, value);
 
-	if (!json_parse_next(iter->parser, &type, &value)) {
+	if (json_parse_next(iter->parser, &type, &value) < 0) {
 		iter->error = "Missing value";
 		return FALSE;
 	}
+	if (type == JSON_TYPE_OBJECT) {
+		iter->error = "Nested objects not supported";
+		return FALSE;
+	}
 	*key_r = str_c(iter->key);
 	*value_r = value;
 	return TRUE;
diff -r 309c9fe1f0a9 -r 86572582647e src/lib/Makefile.am
--- a/src/lib/Makefile.am	Thu Nov 29 02:30:15 2012 +0200
+++ b/src/lib/Makefile.am	Thu Nov 29 06:26:29 2012 +0200
@@ -275,6 +275,7 @@
 	test-istream-crlf.c \
 	test-istream-seekable.c \
 	test-istream-tee.c \
+	test-json-parser.c \
 	test-llist.c \
 	test-mempool-alloconly.c \
 	test-network.c \
diff -r 309c9fe1f0a9 -r 86572582647e src/lib/json-parser.c
--- a/src/lib/json-parser.c	Thu Nov 29 02:30:15 2012 +0200
+++ b/src/lib/json-parser.c	Thu Nov 29 06:26:29 2012 +0200
@@ -2,12 +2,14 @@
 
 #include "lib.h"
 #include "str.h"
+#include "istream.h"
 #include "hex-dec.h"
 #include "unichar.h"
 #include "json-parser.h"
 
 enum json_state {
 	JSON_STATE_ROOT = 0,
+	JSON_STATE_OBJECT_OPEN,
 	JSON_STATE_OBJECT_KEY,
 	JSON_STATE_OBJECT_COLON,
 	JSON_STATE_OBJECT_VALUE,
@@ -16,22 +18,81 @@
 };
 
 struct json_parser {
-	const unsigned char *data, *end;
+	struct istream *input;
+	uoff_t highwater_offset;
+
+	const unsigned char *start, *end, *data;
 	const char *error;
 	string_t *value;
 
 	enum json_state state;
+	unsigned int nested_object_count;
 };
 
-struct json_parser *
-json_parser_init(const unsigned char *data, unsigned int len)
+static int
+json_try_parse_next(struct json_parser *parser, enum json_type *type_r,
+		    const char **value_r);
+
+static int json_parser_read_more(struct json_parser *parser)
+{
+	uoff_t cur_highwater = parser->input->v_offset +
+		i_stream_get_data_size(parser->input);
+	size_t size;
+	ssize_t ret;
+
+	i_assert(parser->highwater_offset <= cur_highwater);
+
+	if (parser->error != NULL)
+		return -1;
+
+	if (parser->highwater_offset == cur_highwater) {
+		ret = i_stream_read(parser->input);
+		if (ret == -2) {
+			parser->error = "Token too large";
+			return -1;
+		}
+		if (ret <= 0)
+			return ret;
+
+		cur_highwater = parser->input->v_offset +
+			i_stream_get_data_size(parser->input);
+		i_assert(parser->highwater_offset < cur_highwater);
+		parser->highwater_offset = cur_highwater;
+	}
+
+	parser->start = parser->data = i_stream_get_data(parser->input, &size);
+	parser->end = parser->start + size;
+	i_assert(size > 0);
+	return 1;
+}
+
+static void json_parser_update_input_pos(struct json_parser *parser)
+{
+	size_t size;
+
+	if (parser->data == parser->start)
+		return;
+
+	i_stream_skip(parser->input, parser->data - parser->start);
+	parser->start = parser->data = i_stream_get_data(parser->input, &size);
+	parser->end = parser->start + size;
+	if (size > 0) {
+		/* we skipped over some data and there's still data left.
+		   no need to read() the next time. */
+		parser->highwater_offset = 0;
+	} else {
+		parser->highwater_offset = parser->input->v_offset;
+	}
+}
+
+struct json_parser *json_parser_init(struct istream *input)
 {
 	struct json_parser *parser;
 
 	parser = i_new(struct json_parser, 1);
-	parser->data = data;
-	parser->end = data + len;
+	parser->input = input;
 	parser->value = str_new(default_pool, 128);
+	i_stream_ref(input);
 	return parser;
 }
 
@@ -41,12 +102,22 @@
 
 	*_parser = NULL;
 
-	if (parser->error == NULL && parser->data == parser->end &&
-	    parser->state != JSON_STATE_ROOT &&
-	    parser->state != JSON_STATE_DONE)
-		parser->error = "Missing '}'";
-
-	*error_r = parser->error;
+	if (parser->error != NULL) {
+		/* actual parser error */
+		*error_r = parser->error;
+	} else if (parser->input->stream_errno != 0) {
+		*error_r = t_strdup_printf("read(%s) failed: %m",
+					   i_stream_get_name(parser->input));
+	} else if (parser->data == parser->end &&
+		   !i_stream_have_bytes_left(parser->input) &&
+		   parser->state != JSON_STATE_ROOT &&
+		   parser->state != JSON_STATE_DONE) {
+		*error_r = "Missing '}'";
+	} else {
+		*error_r = NULL;
+	}
+	
+	i_stream_unref(&parser->input);
 	str_free(&parser->value);
 	i_free(parser);
 	return *error_r != NULL ? -1 : 0;
@@ -62,34 +133,35 @@
 		case '\n':
 			break;
 		default:
+			json_parser_update_input_pos(parser);
 			return TRUE;
 		}
 	}
+	json_parser_update_input_pos(parser);
 	return FALSE;
 }
 
 static int json_parse_string(struct json_parser *parser, const char **value_r)
 {
-	const unsigned char *p;
-
 	if (*parser->data != '"')
 		return -1;
+	parser->data++;
 
 	str_truncate(parser->value, 0);
-	for (p = parser->data + 1; p < parser->end; p++) {
-		if (*p == '"') {
-			parser->data = p + 1;
+	for (; parser->data !=  parser->end; parser->data++) {
+		if (*parser->data == '"') {
+			parser->data++;
 			*value_r = str_c(parser->value);
 			return 0;
 		}
-		if (*p != '\\')
-			str_append_c(parser->value, *p);
+		if (*parser->data != '\\')
+			str_append_c(parser->value, *parser->data);
 		else {
-			switch (*++p) {
+			switch (*++parser->data) {
 			case '"':
 			case '\\':
 			case '/':
-				str_append_c(parser->value, *p);
+				str_append_c(parser->value, *parser->data);
 				break;
 			case 'b':
 				str_append_c(parser->value, '\b');
@@ -107,11 +179,11 @@
 				str_append_c(parser->value, '\t');
 				break;
 			case 'u':
-				if (parser->end - p < 4)
+				if (parser->end - parser->data < 4)
 					return -1;
-				uni_ucs4_to_utf8_c(hex2dec(p, 4),
+				uni_ucs4_to_utf8_c(hex2dec(parser->data, 4),
 						   parser->value);
-				p += 3;
+				parser->data += 3;
 				break;
 			default:
 				return -1;
@@ -122,99 +194,131 @@
 }
 
 static int
-json_parse_digits(struct json_parser *parser, const unsigned char **_p)
+json_parse_digits(struct json_parser *parser)
 {
-	const unsigned char *p = *_p;
-
-	if (p >= parser->end || *p < '0' || *p > '9')
+	if (parser->data == parser->end ||
+	    *parser->data < '0' || *parser->data > '9')
 		return -1;
 
-	for (; p < parser->end && *p >= '0' && *p <= '9'; p++)
-		str_append_c(parser->value, *p++);
-	*_p = p;
+	while (parser->data != parser->end &&
+	       *parser->data >= '0' && *parser->data <= '9')
+		str_append_c(parser->value, *parser->data++);
 	return 0;
 }
 
-static int json_parse_int(struct json_parser *parser, const unsigned char **_p)
+static int json_parse_int(struct json_parser *parser)
 {
-	const unsigned char *p = *_p;
-
-	if (*p == '-') {
-		str_append_c(parser->value, *p++);
-		if (p == parser->end)
+	if (*parser->data == '-') {
+		str_append_c(parser->value, *parser->data++);
+		if (parser->data == parser->end)
 			return -1;
 	}
-	if (*p == '0')
-		str_append_c(parser->value, *p++);
+	if (*parser->data == '0')
+		str_append_c(parser->value, *parser->data++);


More information about the dovecot-cvs mailing list