dovecot-2.2: lib-compression: Added initial support for LZ4

dovecot at dovecot.org dovecot at dovecot.org
Wed Jan 15 00:58:15 EET 2014


details:   http://hg.dovecot.org/dovecot-2.2/rev/fb4a0a84da50
changeset: 17104:fb4a0a84da50
user:      Timo Sirainen <tss at iki.fi>
date:      Wed Jan 15 00:57:59 2014 +0200
description:
lib-compression: Added initial support for LZ4
There's no standard file format for LZ4, so we created our own. The code has
had only minimal testing currently, so there may be bugs.

diffstat:

 configure.ac                       |   26 +++
 src/lib-compression/Makefile.am    |    2 +
 src/lib-compression/compression.c  |   18 ++
 src/lib-compression/iostream-lz4.h |   30 +++
 src/lib-compression/istream-lz4.c  |  318 +++++++++++++++++++++++++++++++++++++
 src/lib-compression/istream-zlib.h |    1 +
 src/lib-compression/ostream-lz4.c  |  188 +++++++++++++++++++++
 src/lib-compression/ostream-zlib.h |    1 +
 8 files changed, 584 insertions(+), 0 deletions(-)

diffs (truncated from 674 to 300 lines):

diff -r 3f3c9f93a0b3 -r fb4a0a84da50 configure.ac
--- a/configure.ac	Wed Jan 15 00:28:35 2014 +0200
+++ b/configure.ac	Wed Jan 15 00:57:59 2014 +0200
@@ -184,6 +184,11 @@
   TEST_WITH(lzma, $withval),
   want_lzma=auto)
 
+AC_ARG_WITH(lz4,
+AS_HELP_STRING([--with-lz4], [Build with LZ4 compression support]),
+  TEST_WITH(lz4, $withval),
+  want_lz4=auto)
+
 AC_ARG_WITH(libcap,
 AS_HELP_STRING([--with-libcap], [Build with libcap support (Dropping capabilities).]),
   TEST_WITH(libcap, $withval),
@@ -2691,6 +2696,27 @@
   ])
 fi
 AC_SUBST(COMPRESS_LIBS)
+
+if test "$want_lz4" != "no"; then
+  AC_CHECK_HEADER(lz4.h, [
+    AC_CHECK_LIB(lz4, LZ4_compress, [
+      have_lz4=yes
+      have_compress_lib=yes
+      AC_DEFINE(HAVE_LZ4,, Define if you have lz4 library)
+      COMPRESS_LIBS="$COMPRESS_LIBS -llz4"
+    ], [
+      if test "$want_lz4" = "yes"; then
+	AC_ERROR([Can't build with lz4 support: liblz4 not found])
+      fi
+    ])
+  ], [
+    if test "$want_lz4" = "yes"; then
+      AC_ERROR([Can't build with lz4 support: lz4.h not found])
+    fi
+  ])
+fi
+AC_SUBST(COMPRESS_LIBS)
+
 AM_CONDITIONAL(BUILD_ZLIB_PLUGIN, test "$have_compress_lib" = "yes")
 
 RPCGEN=${RPCGEN-rpcgen}
diff -r 3f3c9f93a0b3 -r fb4a0a84da50 src/lib-compression/Makefile.am
--- a/src/lib-compression/Makefile.am	Wed Jan 15 00:28:35 2014 +0200
+++ b/src/lib-compression/Makefile.am	Wed Jan 15 00:57:59 2014 +0200
@@ -7,9 +7,11 @@
 libcompression_la_SOURCES = \
 	compression.c \
 	istream-lzma.c \
+	istream-lz4.c \
 	istream-zlib.c \
 	istream-bzlib.c \
 	ostream-lzma.c \
+	ostream-lz4.c \
 	ostream-zlib.c \
 	ostream-bzlib.c
 libcompression_la_LIBADD = \
diff -r 3f3c9f93a0b3 -r fb4a0a84da50 src/lib-compression/compression.c
--- a/src/lib-compression/compression.c	Wed Jan 15 00:28:35 2014 +0200
+++ b/src/lib-compression/compression.c	Wed Jan 15 00:57:59 2014 +0200
@@ -4,6 +4,7 @@
 #include "istream.h"
 #include "istream-zlib.h"
 #include "ostream-zlib.h"
+#include "iostream-lz4.h"
 #include "compression.h"
 
 #ifndef HAVE_ZLIB
@@ -20,6 +21,10 @@
 #  define i_stream_create_lzma NULL
 #  define o_stream_create_lzma NULL
 #endif
+#ifndef HAVE_LZ4
+#  define i_stream_create_lz4 NULL
+#  define o_stream_create_lz4 NULL
+#endif
 
 static bool is_compressed_zlib(struct istream *input)
 {
@@ -63,6 +68,17 @@
 	return memcmp(data, "\xfd\x37\x7a\x58\x5a", 6) == 0;
 }
 
+static bool is_compressed_lz4(struct istream *input)
+{
+	const unsigned char *data;
+	size_t size;
+
+	if (i_stream_read_data(input, &data, &size, 6 - 1) <= 0)
+		return FALSE;
+	/* there is no standard LZ4 header, so we've created our own */
+	return memcmp(data, IOSTREAM_LZ4_MAGIC, IOSTREAM_LZ4_MAGIC_LEN) == 0;
+}
+
 const struct compression_handler *compression_lookup_handler(const char *name)
 {
 	unsigned int i;
@@ -113,5 +129,7 @@
 	  i_stream_create_deflate, o_stream_create_deflate },
 	{ "xz", ".xz", is_compressed_xz,
 	  i_stream_create_lzma, o_stream_create_lzma },
+	{ "lz4", ".lz4", is_compressed_lz4,
+	  i_stream_create_lz4, o_stream_create_lz4 },
 	{ NULL, NULL, NULL, NULL, NULL }
 };
diff -r 3f3c9f93a0b3 -r fb4a0a84da50 src/lib-compression/iostream-lz4.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-compression/iostream-lz4.h	Wed Jan 15 00:57:59 2014 +0200
@@ -0,0 +1,30 @@
+#ifndef IOSTREAM_LZ4_H
+#define IOSTREAM_LZ4_H
+
+/*
+   Dovecot's LZ4 compressed files contain:
+
+   IOSTREAM_LZ4_HEADER
+   n x (4 byte big-endian: compressed chunk length, compressed chunk)
+*/
+
+#define IOSTREAM_LZ4_MAGIC "Dovecot-LZ4\x0d\x2a\x9b\xc5"
+#define IOSTREAM_LZ4_MAGIC_LEN (sizeof(IOSTREAM_LZ4_MAGIC)-1)
+
+struct iostream_lz4_header {
+	unsigned char magic[IOSTREAM_LZ4_MAGIC_LEN];
+	/* OSTREAM_LZ4_CHUNK_SIZE in big-endian */
+	unsigned char max_uncompressed_chunk_size[4];
+};
+
+/* How large chunks we're buffering into memory before compressing them */
+#define OSTREAM_LZ4_CHUNK_SIZE (1024*64)
+/* How large chunks we allow in input data before returning a failure.
+   This must be at least OSTREAM_LZ4_CHUNK_SIZE, but for future compatibility
+   should be somewhat higher (but not too high to avoid wasting memory for
+   corrupted files). */
+#define ISTREAM_LZ4_CHUNK_SIZE (1024*1024)
+
+#define IOSTREAM_LZ4_CHUNK_PREFIX_LEN 4 /* big-endian size of chunk */
+
+#endif
diff -r 3f3c9f93a0b3 -r fb4a0a84da50 src/lib-compression/istream-lz4.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-compression/istream-lz4.c	Wed Jan 15 00:57:59 2014 +0200
@@ -0,0 +1,318 @@
+/* Copyright (c) 2013-2014 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+
+#ifdef HAVE_LZ4
+
+#include "buffer.h"
+#include "istream-private.h"
+#include "istream-zlib.h"
+#include "iostream-lz4.h"
+#include <lz4.h>
+
+struct lz4_istream {
+	struct istream_private istream;
+
+	uoff_t stream_size;
+	struct stat last_parent_statbuf;
+
+	buffer_t *chunk_buf;
+	uint32_t chunk_size, chunk_left, max_uncompressed_chunk_size;
+
+	unsigned int log_errors:1;
+	unsigned int marked:1;
+	unsigned int header_read:1;
+};
+
+static void i_stream_lz4_close(struct iostream_private *stream,
+			       bool close_parent)
+{
+	struct lz4_istream *zstream = (struct lz4_istream *)stream;
+
+	buffer_free(&zstream->chunk_buf);
+	if (close_parent)
+		i_stream_close(zstream->istream.parent);
+}
+
+static void lz4_read_error(struct lz4_istream *zstream, const char *error)
+{
+	io_stream_set_error(&zstream->istream.iostream,
+			    "lz4.read(%s): %s at %"PRIuUOFF_T,
+			    i_stream_get_name(&zstream->istream.istream), error,
+			    zstream->istream.abs_start_offset +
+			    zstream->istream.istream.v_offset);
+	if (zstream->log_errors)
+		i_error("%s", zstream->istream.iostream.error);
+}
+
+static int i_stream_lz4_read_header(struct lz4_istream *zstream)
+{
+	const struct iostream_lz4_header *hdr;
+	const unsigned char *data;
+	size_t size;
+	int ret;
+
+	ret = i_stream_read_data(zstream->istream.parent, &data, &size,
+				 sizeof(*hdr)-1);
+	if (ret < 0) {
+		zstream->istream.istream.stream_errno =
+			zstream->istream.parent->stream_errno;
+		return ret;
+	}
+	if (ret == 0 && !zstream->istream.istream.eof)
+		return 0;
+	hdr = (const void *)data;
+	if (ret == 0 || memcmp(hdr->magic, IOSTREAM_LZ4_MAGIC,
+			       IOSTREAM_LZ4_MAGIC_LEN) != 0) {
+		lz4_read_error(zstream, "wrong magic in header (not lz4 file?)");
+		zstream->istream.istream.stream_errno = EINVAL;
+		return -1;
+	}
+	zstream->max_uncompressed_chunk_size =
+		((uint32_t)hdr->max_uncompressed_chunk_size[0] << 24) |
+		(hdr->max_uncompressed_chunk_size[1] << 16) |
+		(hdr->max_uncompressed_chunk_size[2] << 8) |
+		hdr->max_uncompressed_chunk_size[3];
+	if (zstream->max_uncompressed_chunk_size > ISTREAM_LZ4_CHUNK_SIZE) {
+		lz4_read_error(zstream, t_strdup_printf(
+			"lz4 max chunk size too large (%u > %u)",
+			zstream->max_uncompressed_chunk_size,
+			ISTREAM_LZ4_CHUNK_SIZE));
+		zstream->istream.istream.stream_errno = EINVAL;
+		return -1;
+	}
+	i_stream_skip(zstream->istream.parent, sizeof(*hdr));
+	return 1;
+}
+
+static ssize_t i_stream_lz4_read(struct istream_private *stream)
+{
+	struct lz4_istream *zstream = (struct lz4_istream *)stream;
+	const unsigned char *data;
+	size_t size, max_size;
+	int ret;
+
+	if (!zstream->header_read) {
+		if ((ret = i_stream_lz4_read_header(zstream)) <= 0)
+			return ret;
+		zstream->header_read = TRUE;
+	}
+
+	if (zstream->chunk_left == 0) {
+		ret = i_stream_read_data(stream->parent, &data, &size,
+					 IOSTREAM_LZ4_CHUNK_PREFIX_LEN);
+		if (ret < 0) {
+			stream->istream.stream_errno =
+				stream->parent->stream_errno;
+			if (stream->istream.stream_errno != 0) {
+				stream->istream.eof = TRUE;
+				zstream->stream_size = stream->istream.v_offset +
+					stream->pos - stream->skip;
+			}
+			return ret;
+		}
+		if (ret == 0 && !stream->istream.eof)
+			return 0;
+		zstream->chunk_size = zstream->chunk_left =
+			((uint32_t)data[0] << 24) |
+			(data[1] << 16) | (data[2] << 8) | data[3];
+		if (zstream->chunk_size == 0 ||
+		    zstream->chunk_size > ISTREAM_LZ4_CHUNK_SIZE) {
+			lz4_read_error(zstream, t_strdup_printf(
+				"invalid lz4 chunk size: %u", zstream->chunk_size));
+			stream->istream.stream_errno = EINVAL;
+			return -1;
+		}
+		i_stream_skip(stream->parent, IOSTREAM_LZ4_CHUNK_PREFIX_LEN);
+		buffer_set_used_size(zstream->chunk_buf, 0);
+	}
+
+	/* read the whole compressed chunk into memory */
+	while (zstream->chunk_left > 0 &&
+	       (ret = i_stream_read_data(zstream->istream.parent,
+					 &data, &size, 0)) > 0) {
+		if (size > zstream->chunk_left)
+			size = zstream->chunk_left;
+		buffer_append(zstream->chunk_buf, data, size);
+		i_stream_skip(zstream->istream.parent, size);
+		zstream->chunk_left -= size;
+	}
+	if (zstream->chunk_left > 0) {
+		if (ret == -1 && zstream->istream.parent->stream_errno == 0) {
+			lz4_read_error(zstream, "truncated lz4 chunk");
+			stream->istream.stream_errno = EINVAL;
+			return -1;
+		}
+		zstream->istream.istream.stream_errno =
+			zstream->istream.parent->stream_errno;
+		return ret;
+	}
+	/* if we already have max_buffer_size amount of data, fail here */
+	i_stream_compress(stream);
+	if (stream->pos >= stream->max_buffer_size)
+		return -2;
+	/* allocate enough space for the old data and the new
+	   decompressed chunk. we don't know the original compressed size,
+	   so just allocate the max amount of memory. */


More information about the dovecot-cvs mailing list