dovecot-2.2: JSON parser: Added support for reading string value...

dovecot at dovecot.org dovecot at dovecot.org
Thu Nov 29 07:52:58 EET 2012


details:   http://hg.dovecot.org/dovecot-2.2/rev/014be18f7130
changeset: 15437:014be18f7130
user:      Timo Sirainen <tss at iki.fi>
date:      Thu Nov 29 07:52:51 2012 +0200
description:
JSON parser: Added support for reading string value as istream.

diffstat:

 src/lib/Makefile.am        |    2 +
 src/lib/istream-jsonstr.c  |  179 +++++++++++++++++++++++++++++++++++++++++++++
 src/lib/istream-jsonstr.h  |    7 +
 src/lib/json-parser.c      |  103 +++++++++++++++++++++++++
 src/lib/json-parser.h      |    6 +
 src/lib/test-json-parser.c |   49 ++++++++++-
 6 files changed, 340 insertions(+), 6 deletions(-)

diffs (truncated from 475 to 300 lines):

diff -r 9a31c44c1184 -r 014be18f7130 src/lib/Makefile.am
--- a/src/lib/Makefile.am	Thu Nov 29 07:30:23 2012 +0200
+++ b/src/lib/Makefile.am	Thu Nov 29 07:52:51 2012 +0200
@@ -62,6 +62,7 @@
 	istream-crlf.c \
 	istream-data.c \
 	istream-file.c \
+	istream-jsonstr.c \
 	istream-limit.c \
 	istream-mmap.c \
 	istream-rawlog.c \
@@ -187,6 +188,7 @@
 	istream-chain.h \
 	istream-concat.h \
 	istream-crlf.h \
+	istream-jsonstr.h \
 	istream-private.h \
 	istream-rawlog.h \
 	istream-seekable.h \
diff -r 9a31c44c1184 -r 014be18f7130 src/lib/istream-jsonstr.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/istream-jsonstr.c	Thu Nov 29 07:52:51 2012 +0200
@@ -0,0 +1,179 @@
+/* Copyright (c) 2012 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "hex-dec.h"
+#include "unichar.h"
+#include "istream-private.h"
+#include "istream-jsonstr.h"
+
+#define MAX_UTF8_LEN 6
+
+struct jsonstr_istream {
+	struct istream_private istream;
+
+	/* The end '"' was found */
+	unsigned int str_end:1;
+};
+
+static int
+i_stream_jsonstr_read_parent(struct jsonstr_istream *jstream,
+			     unsigned int min_bytes)
+{
+	struct istream_private *stream = &jstream->istream;
+	size_t size, avail;
+	ssize_t ret;
+
+	size = i_stream_get_data_size(stream->parent);
+	while (size < min_bytes) {
+		ret = i_stream_read(stream->parent);
+		if (ret <= 0 && (ret != -2 || stream->skip == 0)) {
+			stream->istream.stream_errno =
+				stream->parent->stream_errno;
+			stream->istream.eof = stream->parent->eof;
+			return ret;
+		}
+		size = i_stream_get_data_size(stream->parent);
+	}
+
+	if (!i_stream_try_alloc(stream, size, &avail))
+		return -2;
+	return 1;
+}
+
+static int
+i_stream_json_unescape(const unsigned char *src, unsigned char *dest,
+		       unsigned int *src_size_r, unsigned int *dest_size_r)
+{
+	switch (*src) {
+	case '"':
+	case '\\':
+	case '/':
+		*dest = *src;
+		break;
+	case 'b':
+		*dest = '\b';
+		break;
+	case 'f':
+		*dest = '\f';
+		break;
+	case 'n':
+		*dest = '\n';
+		break;
+	case 'r':
+		*dest = '\r';
+		break;
+	case 't':
+		*dest = '\t';
+		break;
+	case 'u': {
+		buffer_t buf;
+
+		buffer_create_from_data(&buf, dest, MAX_UTF8_LEN);
+		uni_ucs4_to_utf8_c(hex2dec(src+1, 4), &buf);
+		*src_size_r = 5;
+		*dest_size_r = buf.used;
+		return 0;
+	}
+	default:
+		return -1;
+	}
+	*src_size_r = 1;
+	*dest_size_r = 1;
+	return 0;
+}
+
+static ssize_t i_stream_jsonstr_read(struct istream_private *stream)
+{
+	struct jsonstr_istream *jstream = (struct jsonstr_istream *)stream;
+	const unsigned char *data;
+	unsigned int srcskip, destskip, extra;
+	size_t i, dest, size;
+	ssize_t ret;
+
+	if (jstream->str_end) {
+		stream->istream.eof = TRUE;
+		return -1;
+	}
+
+	ret = i_stream_jsonstr_read_parent(jstream, 1);
+	if (ret <= 0)
+		return ret;
+
+	/* @UNSAFE */
+	dest = stream->pos;
+	extra = 0;
+
+	data = i_stream_get_data(stream->parent, &size);
+	for (i = 0; i < size && dest < stream->buffer_size; ) {
+		if (data[i] == '"') {
+			jstream->str_end = TRUE;
+			if (dest == stream->pos) {
+				stream->istream.eof = TRUE;
+				return -1;
+			}
+			break;
+		} else if (data[i] == '\\') {
+			if (i+1 == size) {
+				/* not enough input for \x */
+				extra = 1;
+				break;
+			}
+			if ((data[i+1] == 'u' && i+1+4 >= size)) {
+				/* not enough input for \u0000 */
+				extra = 5;
+				break;
+			}
+			if (data[i+1] == 'u' && stream->buffer_size - dest < MAX_UTF8_LEN) {
+				/* UTF8 output is max. 6 chars */
+				if (dest == stream->pos)
+					return -2;
+				break;
+			}
+			i++;
+			if (i_stream_json_unescape(data + i,
+						   stream->w_buffer + dest,
+						   &srcskip, &destskip) < 0) {
+				/* invalid string */
+				stream->istream.stream_errno = EINVAL;
+				return -1;
+			}
+			i += srcskip;
+			i_assert(i <= size);
+			dest += destskip;
+			i_assert(dest <= stream->buffer_size);
+		} else {
+			stream->w_buffer[dest++] = data[i];
+			i++;
+		}
+	}
+	i_stream_skip(stream->parent, i);
+
+	ret = dest - stream->pos;
+	if (ret == 0) {
+		/* not enough input */
+		i_assert(extra > 0);
+		ret = i_stream_jsonstr_read_parent(jstream, i+extra+1);
+		if (ret <= 0)
+			return ret;
+		return i_stream_jsonstr_read(stream);
+	}
+	i_assert(ret > 0);
+	stream->pos = dest;
+	return ret;
+}
+
+struct istream *i_stream_create_jsonstr(struct istream *input)
+{
+	struct jsonstr_istream *dstream;
+
+	dstream = i_new(struct jsonstr_istream, 1);
+	dstream->istream.max_buffer_size = input->real_stream->max_buffer_size;
+	dstream->istream.read = i_stream_jsonstr_read;
+
+	dstream->istream.istream.readable_fd = FALSE;
+	dstream->istream.istream.blocking = input->blocking;
+	dstream->istream.istream.seekable = FALSE;
+	return i_stream_create(&dstream->istream, input,
+			       i_stream_get_fd(input));
+}
diff -r 9a31c44c1184 -r 014be18f7130 src/lib/istream-jsonstr.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/istream-jsonstr.h	Thu Nov 29 07:52:51 2012 +0200
@@ -0,0 +1,7 @@
+#ifndef ISTREAM_JSONSTR_H
+#define ISTREAM_JSONSTR_H
+
+/* Parse input until '"' is reached. Unescape JSON \x codes. */
+struct istream *i_stream_create_jsonstr(struct istream *input);
+
+#endif
diff -r 9a31c44c1184 -r 014be18f7130 src/lib/json-parser.c
--- a/src/lib/json-parser.c	Thu Nov 29 07:30:23 2012 +0200
+++ b/src/lib/json-parser.c	Thu Nov 29 07:52:51 2012 +0200
@@ -5,6 +5,7 @@
 #include "istream.h"
 #include "hex-dec.h"
 #include "unichar.h"
+#include "istream-jsonstr.h"
 #include "json-parser.h"
 
 enum json_state {
@@ -14,6 +15,7 @@
 	JSON_STATE_OBJECT_COLON,
 	JSON_STATE_OBJECT_VALUE,
 	JSON_STATE_OBJECT_VALUE_NEXT,
+	JSON_STATE_STRINPUT_FINISH,
 	JSON_STATE_DONE
 };
 
@@ -24,6 +26,7 @@
 	const unsigned char *start, *end, *data;
 	const char *error;
 	string_t *value;
+	struct istream *strinput;
 
 	enum json_state state;
 	unsigned int nested_object_count;
@@ -197,6 +200,37 @@
 	return 0;
 }
 
+static int json_skip_string(struct json_parser *parser)
+{
+	for (; parser->data != parser->end; parser->data++) {
+		if (*parser->data == '"') {
+			parser->data++;
+			return 0;
+		}
+		if (*parser->data == '\\') {
+			switch (*++parser->data) {
+			case '"':
+			case '\\':
+			case '/':
+			case 'b':
+			case 'f':
+			case 'n':
+			case 'r':
+			case 't':
+				break;
+			case 'u':
+				if (parser->end - parser->data < 4)
+					return -1;
+				parser->data += 3;
+				break;
+			default:
+				return -1;
+			}
+		}
+	}
+	return -1;
+}
+
 static int
 json_parse_digits(struct json_parser *parser)
 {
@@ -385,6 +419,11 @@
 		parser->data++;
 		json_parser_update_input_pos(parser);
 		return json_try_parse_next(parser, type_r, value_r);
+	case JSON_STATE_STRINPUT_FINISH:
+		if (json_skip_string(parser) < 0)
+			return -1;
+		parser->state = JSON_STATE_OBJECT_VALUE_NEXT;
+		return json_try_parse_next(parser, type_r, value_r);
 	case JSON_STATE_DONE:
 		parser->error = "Unexpected data at the end";
 		return -1;
@@ -398,6 +437,8 @@
 {
 	int ret;
 
+	i_assert(parser->strinput == NULL);
+
 	*value_r = NULL;
 
 	while ((ret = json_parser_read_more(parser)) > 0) {
@@ -412,3 +453,65 @@


More information about the dovecot-cvs mailing list