[Dovecot] Compressed maildir

Andrey Panin pazke at donpac.ru
Wed Jun 15 11:12:06 EEST 2005


On 166, 06 15, 2005 at 04:54:21PM +1000, Curtis Maloney wrote:
> 
> Greetings,
> 
> 	I noticed a while back someone posted a patch/plugin that allowed 
> 	Dovecot to use compressed mbox files.  I'm now wondering how far that would 
> put us from having compressed maildir?  

Been here, done that :) You can try the attached patch, it's a little ugly, but
works on the production server for more than 2 months.

> I have a server with more CPU than 
> disk space, and while I can buy more HDD space, my backup solution doesn't 
> make that practical.
> 
> 	It seems to me that when looking for a message file, if it ends in 
> 	.gz unpack it, and otherwise everything acts as normal.  Worst case, this 
> is one strcat() and a stat() slower to find.
>
> Newly delivered messages could remain unpacked, and a cron job could come 
> by whenever to compress old/large/un-looked-at-for-months messages.  So, 
> new and frequently referenced messages would be as fast as ever, and older 
> messages would be slower.
> 
> 	I would love to dive in an do this myself, but 1- my time is very 
> 	very limited, working two jobs, and 2- I'm not running 1.0 yet, as it 
> apparently still doesn't support my Thunderbird users tagging their 
> messages (am I wrong?  please tell me I'm wrong... I want to upgrade! :)

-- 
Andrey Panin		| Linux and UNIX system administrator
pazke at donpac.ru		| PGP key: wwwkeys.pgp.net
-------------- next part --------------
diff -urdpNX /usr/share/dontdiff -x debian dovecot-1.0-test73.vanilla/configure.in dovecot-1.0-test73/configure.in
--- dovecot-1.0-test73.vanilla/configure.in	2005-06-15 01:43:30.000000000 +0400
+++ dovecot-1.0-test73/configure.in	2005-06-15 11:57:29.000000000 +0400
@@ -266,6 +266,8 @@ dnl **
 dnl ** just some generic stuff...
 dnl **
 
+LIBS="-lz"
+
 AC_CHECK_FUNC(socket, [], [
 	AC_CHECK_LIB(socket, socket, [
 		LIBS="$LIBS -lsocket"
diff -urdpNX /usr/share/dontdiff -x debian dovecot-1.0-test73.vanilla/src/lib/istream-zlib.c dovecot-1.0-test73/src/lib/istream-zlib.c
--- dovecot-1.0-test73.vanilla/src/lib/istream-zlib.c	1970-01-01 03:00:00.000000000 +0300
+++ dovecot-1.0-test73/src/lib/istream-zlib.c	2005-06-15 11:54:54.000000000 +0400
@@ -0,0 +1,190 @@
+/* Copyright (C) 2005 Timo Sirainen */
+
+#include "lib.h"
+#include "istream-internal.h"
+#include "istream-zlib.h"
+
+#include <zlib.h>
+
+#define I_STREAM_MIN_SIZE 4096
+
+struct zlib_istream {
+	struct _istream istream;
+
+	size_t max_buffer_size;
+
+	int fd;
+	gzFile *file;
+	buffer_t *buffer;
+	uoff_t cached_size;
+};
+
+static void _close(struct _iostream *stream)
+{
+	struct zlib_istream *zstream = (struct zlib_istream *)stream;
+
+	if (zstream->file != NULL) {
+		gzclose(zstream->file);
+		zstream->file = NULL;
+	}
+}
+
+static void _destroy(struct _iostream *stream __attr_unused__)
+{
+}
+
+static void _set_max_buffer_size(struct _iostream *stream, size_t max_size)
+{
+	struct zlib_istream *zstream = (struct zlib_istream *)stream;
+
+	zstream->max_buffer_size = max_size;
+}
+
+static void i_stream_grow_buffer(struct _istream *stream, size_t bytes)
+{
+	struct zlib_istream *zstream = (struct zlib_istream *)stream;
+	size_t old_size;
+
+	old_size = stream->buffer_size;
+
+	stream->buffer_size = stream->pos + bytes;
+	if (stream->buffer_size <= I_STREAM_MIN_SIZE)
+		stream->buffer_size = I_STREAM_MIN_SIZE;
+	else {
+		stream->buffer_size =
+			pool_get_exp_grown_size(stream->iostream.pool,
+						old_size, stream->buffer_size);
+	}
+
+	if (zstream->max_buffer_size > 0 &&
+	    stream->buffer_size > zstream->max_buffer_size)
+		stream->buffer_size = zstream->max_buffer_size;
+
+	stream->buffer = stream->w_buffer =
+		p_realloc(stream->iostream.pool, stream->w_buffer,
+			  old_size, stream->buffer_size);
+}
+
+static void i_stream_compress(struct _istream *stream)
+{
+	memmove(stream->w_buffer, stream->w_buffer + stream->skip,
+		stream->pos - stream->skip);
+	stream->pos -= stream->skip;
+
+	stream->skip = 0;
+}
+
+static ssize_t _read(struct _istream *stream)
+{
+	struct zlib_istream *zstream = (struct zlib_istream *)stream;
+	size_t size;
+	int ret;
+
+	if (stream->istream.closed)
+		return -1;
+
+	stream->istream.stream_errno = 0;
+
+	if (stream->pos == stream->buffer_size) {
+		if (stream->skip > 0) {
+			/* remove the unused bytes from beginning of buffer */
+                        i_stream_compress(stream);
+		} else if (zstream->max_buffer_size == 0 ||
+			   stream->buffer_size < zstream->max_buffer_size) {
+			/* buffer is full - grow it */
+			i_stream_grow_buffer(stream, I_STREAM_MIN_SIZE);
+		}
+
+		if (stream->pos == stream->buffer_size)
+			return -2; /* buffer full */
+	}
+
+	size = stream->buffer_size - stream->pos;
+
+	ret = -1;
+
+	ret = gzread(zstream->file, stream->w_buffer + stream->pos, size);
+	if (ret == 0) {
+		/* EOF */
+		stream->istream.eof = TRUE;
+		return -1;
+	}
+
+	if (ret < 0) {
+		if (errno == EINTR || errno == EAGAIN)
+			ret = 0;
+		else {
+			stream->istream.eof = TRUE;
+			stream->istream.stream_errno = errno;
+			return -1;
+		}
+	}
+
+	stream->pos += ret;
+	i_assert(ret != 0);
+	return ret;
+}
+
+static void _seek(struct _istream *stream, uoff_t v_offset)
+{
+	struct zlib_istream *zstream = (struct zlib_istream *) stream;
+
+	gzseek(zstream->file, v_offset, SEEK_SET);
+
+	stream->istream.stream_errno = 0;
+	stream->istream.v_offset = v_offset;
+	stream->skip = stream->pos = 0;
+}
+
+static const struct stat *_stat(struct _istream *stream)
+{
+	struct zlib_istream *zstream = (struct zlib_istream *) stream;
+	size_t size;
+
+	if (fstat(zstream->fd, &stream->statbuf) < 0) {
+		i_error("zlib_istream.fstat() failed: %m");
+		return NULL;
+	}
+
+	if (zstream->cached_size == (uoff_t)-1) {
+		uoff_t old_offset = stream->istream.v_offset;
+		do {
+			(void)i_stream_get_data(&stream->istream, &size);
+			i_stream_skip(&stream->istream, size);
+		} while (_read(stream) > 0);
+
+		zstream->cached_size = stream->istream.v_offset;
+		i_stream_seek(&stream->istream, old_offset);
+	}
+	stream->statbuf.st_size = zstream->cached_size;
+	return &stream->statbuf;
+}
+
+static void _sync(struct _istream *stream)
+{
+	struct zlib_istream *zstream = (struct zlib_istream *) stream;
+
+	zstream->cached_size = (uoff_t)-1;
+}
+
+struct istream *i_stream_create_zlib(int fd, pool_t pool)
+{
+	struct zlib_istream *zstream;
+
+	zstream = p_new(pool, struct zlib_istream, 1);
+	zstream->fd = fd;
+	zstream->file = gzdopen(fd, "r");
+	zstream->cached_size = (uoff_t)-1;
+
+	zstream->istream.iostream.close = _close;
+	zstream->istream.iostream.destroy = _destroy;
+	zstream->istream.iostream.set_max_buffer_size = _set_max_buffer_size;
+
+	zstream->istream.read = _read;
+	zstream->istream.seek = _seek;
+	zstream->istream.stat = _stat;
+	zstream->istream.sync = _sync;
+
+	zstream->istream.istream.seekable = TRUE;
+	return _i_stream_create(&zstream->istream, pool, fd, 0);
+}
diff -urdpNX /usr/share/dontdiff -x debian dovecot-1.0-test73.vanilla/src/lib/istream-zlib.h dovecot-1.0-test73/src/lib/istream-zlib.h
--- dovecot-1.0-test73.vanilla/src/lib/istream-zlib.h	1970-01-01 03:00:00.000000000 +0300
+++ dovecot-1.0-test73/src/lib/istream-zlib.h	2005-06-15 11:54:54.000000000 +0400
@@ -0,0 +1,6 @@
+#ifndef __ISTREAM_ZLIB_H
+#define __ISTREAM_ZLIB_H
+
+struct istream *i_stream_create_zlib(int fd, pool_t pool);
+
+#endif
diff -urdpNX /usr/share/dontdiff -x debian dovecot-1.0-test73.vanilla/src/lib/Makefile.am dovecot-1.0-test73/src/lib/Makefile.am
--- dovecot-1.0-test73.vanilla/src/lib/Makefile.am	2005-03-29 19:44:42.000000000 +0400
+++ dovecot-1.0-test73/src/lib/Makefile.am	2005-06-15 11:54:54.000000000 +0400
@@ -26,6 +26,7 @@ liblib_a_SOURCES = \
 	istream-limit.c \
 	istream-mmap.c \
 	istream-seekable.c \
+	istream-zlib.c \
 	ioloop.c \
 	ioloop-notify-none.c \
 	ioloop-notify-dn.c \
@@ -96,6 +97,7 @@ noinst_HEADERS = \
 	istream.h \
 	istream-internal.h \
 	istream-seekable.h \
+	istream-zlib.h \
 	ioloop.h \
 	ioloop-internal.h \
 	lib.h \
diff -urdpNX /usr/share/dontdiff -x debian dovecot-1.0-test73.vanilla/src/lib-storage/index/maildir/maildir-mail.c dovecot-1.0-test73/src/lib-storage/index/maildir/maildir-mail.c
--- dovecot-1.0-test73.vanilla/src/lib-storage/index/maildir/maildir-mail.c	2005-06-09 18:40:31.000000000 +0400
+++ dovecot-1.0-test73/src/lib-storage/index/maildir/maildir-mail.c	2005-06-15 12:02:13.000000000 +0400
@@ -5,6 +5,7 @@
 #include "index-mail.h"
 #include "maildir-storage.h"
 #include "maildir-uidlist.h"
+#include "istream-zlib.h"
 
 #include <fcntl.h>
 #include <unistd.h>
@@ -57,6 +58,17 @@ maildir_open_mail(struct maildir_mailbox
 		return NULL;
 	}
 
+	{
+		static const unsigned char zlib_magic[3] = { 0x1F, 0x8B, 8 };
+		unsigned char magic[3] = { };
+  
+		read(fd, magic, sizeof(magic));
+		lseek(fd, 0, SEEK_SET);
+  
+		if (memcmp(magic, zlib_magic, sizeof(magic)) == 0)
+  			return i_stream_create_zlib(fd, default_pool);
+	}
+
 	if (mbox->ibox.mail_read_mmaped) {
 		return i_stream_create_mmap(fd, default_pool,
 					    MAIL_MMAP_BLOCK_SIZE, 0, 0, TRUE);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://dovecot.org/pipermail/dovecot/attachments/20050615/857422ad/attachment-0001.bin>


More information about the dovecot mailing list