[dovecot-cvs] dovecot/src/plugins/fts-squat .cvsignore, NONE, 1.1 Makefile.am, NONE, 1.1 fts-backend-squat.c, NONE, 1.1 fts-squat-plugin.c, NONE, 1.1 fts-squat-plugin.h, NONE, 1.1 squat-test.c, NONE, 1.1 squat-trie.c, NONE, 1.1 squat-trie.h, NONE, 1.1 squat-uidlist.c, NONE, 1.1 squat-uidlist.h, NONE, 1.1

tss at dovecot.org tss at dovecot.org
Fri Dec 1 21:02:14 UTC 2006


Update of /var/lib/cvs/dovecot/src/plugins/fts-squat
In directory talvi:/tmp/cvs-serv10351/src/plugins/fts-squat

Added Files:
	.cvsignore Makefile.am fts-backend-squat.c fts-squat-plugin.c 
	fts-squat-plugin.h squat-test.c squat-trie.c squat-trie.h 
	squat-uidlist.c squat-uidlist.h 
Log Message:
Added "squat" full text search indexer backend. Its name and basic ideas
originate to Cyrus's squat index, but Dovecot's implementation is completely
different and it supports incremental updates.

Still a bit broken and lacks locking, but I wanted to get this into CVS now.



--- NEW FILE: .cvsignore ---
*.la
*.lo
*.o
.deps
.libs
Makefile
Makefile.in
so_locations
squat-test

--- NEW FILE: Makefile.am ---
AM_CPPFLAGS = \
	-I$(top_srcdir)/src/lib \
	-I$(top_srcdir)/src/lib-mail \
	-I$(top_srcdir)/src/lib-storage \
	-I$(top_srcdir)/src/plugins/fts

lib02_fts_squat_plugin_la_LDFLAGS = -module -avoid-version

module_LTLIBRARIES = \
	lib02_fts_squat_plugin.la

lib02_fts_squat_plugin_la_SOURCES = \
	fts-squat-plugin.c \
	fts-backend-squat.c \
	squat-trie.c \
	squat-uidlist.c

noinst_HEADERS = \
	fts-squat-plugin.h \
	squat-trie.h \
	squat-uidlist.h

noinst_PROGRAMS = squat-test

squat_test_SOURCES = \
	squat-test.c

common_objects = \
	squat-trie.lo \
	squat-uidlist.lo

libs = \
	$(top_builddir)/src/lib-storage/register/libstorage-register.a \
	$(STORAGE_LIBS) \
	$(top_builddir)/src/lib-storage/libstorage.a \
	$(top_builddir)/src/lib-storage/list/libstorage_list.a \
	$(top_builddir)/src/lib-imap/libimap.a \
	$(top_builddir)/src/lib-mail/libmail.a \
	$(top_builddir)/src/lib-charset/libcharset.a \
	$(top_builddir)/src/lib/liblib.a

squat_test_LDADD = \
	$(common_objects) \
	$(libs) \
	$(LIBICONV) \
	$(RAND_LIBS)

squat_test_DEPENDENCIES = $(libs) $(common_objects)

install-exec-local:
	for d in imap lda; do \
	  $(mkdir_p) $(DESTDIR)$(moduledir)/$$d; \
	  rm -f $(DESTDIR)$(moduledir)/$$d/lib02_fts_squat_plugin.so; \
	  $(LN_S) ../lib02_fts_squat_plugin.so $(DESTDIR)$(moduledir)/$$d; \
	done

--- NEW FILE: fts-backend-squat.c ---
/* Copyright (C) 2006 Timo Sirainen */

#include "lib.h"
#include "array.h"
#include "mail-storage.h"
#include "mail-search.h"
#include "squat-trie.h"
#include "fts-squat-plugin.h"

#define SQUAT_FILE_PREFIX "dovecot.index.search"

struct squat_fts_backend {
	struct fts_backend backend;
	struct squat_trie *trie;

	uint32_t last_uid;
};

static struct fts_backend *fts_backend_squat_init(struct mailbox *box)
{
	struct squat_fts_backend *backend;
	struct mail_storage *storage;
	const char *path;

	storage = mailbox_get_storage(box);
	path = mail_storage_get_mailbox_index_dir(storage,
						  mailbox_get_name(box));
	if (*path == '\0') {
		/* in-memory indexes */
		return NULL;
	}

	backend = i_new(struct squat_fts_backend, 1);
	backend->backend = fts_backend_squat;
	backend->trie =
		squat_trie_open(t_strconcat(path, "/"SQUAT_FILE_PREFIX, NULL));
	return &backend->backend;
}

static void fts_backend_squat_deinit(struct fts_backend *_backend)
{
	struct squat_fts_backend *backend =
		(struct squat_fts_backend *)_backend;

	squat_trie_close(backend->trie);
	i_free(backend);
}

static int fts_backend_squat_get_last_uid(struct fts_backend *_backend,
					  uint32_t *last_uid_r)
{
	struct squat_fts_backend *backend =
		(struct squat_fts_backend *)_backend;

	*last_uid_r = backend->last_uid;
	return 0;
}

static struct fts_backend_build_context *
fts_backend_squat_build_init(struct fts_backend *_backend, uint32_t *last_uid_r)
{
	struct squat_fts_backend *backend =
		(struct squat_fts_backend *)_backend;
	struct fts_backend_build_context *ctx;

	*last_uid_r = backend->last_uid;

	ctx = i_new(struct fts_backend_build_context, 1);
	ctx->backend = _backend;
	return ctx;
}

static int
fts_backend_squat_build_more(struct fts_backend_build_context *ctx,
			     uint32_t uid, const unsigned char *data,
			     size_t size)
{
	struct squat_fts_backend *backend =
		(struct squat_fts_backend *)ctx->backend;

	i_assert(uid >= backend->last_uid);
	backend->last_uid = uid;

	return squat_trie_add(backend->trie, uid, data, size);
}

static int
fts_backend_squat_build_deinit(struct fts_backend_build_context *ctx)
{
	struct squat_fts_backend *backend =
		(struct squat_fts_backend *)ctx->backend;

	squat_trie_flush(backend->trie);
	i_free(ctx);
	return 0;
}

static void
fts_backend_squat_expunge(struct fts_backend *_backend __attr_unused__,
			  struct mail *mail __attr_unused__)
{
}

static int get_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids,
		    unsigned int *message_count_r)
{
	struct mail_search_arg search_arg;
        struct mailbox_transaction_context *t;
	struct mail_search_context *ctx;
	struct mail *mail;
	unsigned int count = 0;
	int ret;

	memset(&search_arg, 0, sizeof(search_arg));
	search_arg.type = SEARCH_ALL;

	t = mailbox_transaction_begin(box, 0);
	ctx = mailbox_search_init(t, NULL, &search_arg, NULL);

	mail = mail_alloc(t, 0, NULL);
	while (mailbox_search_next(ctx, mail) > 0) {
		seq_range_array_add(uids, 0, mail->uid);
		count++;
	}
	mail_free(&mail);

	ret = mailbox_search_deinit(&ctx);
	mailbox_transaction_rollback(&t);

	*message_count_r = count;
	return ret;
}

static void
fts_backend_squat_expunge_finish(struct fts_backend *_backend,
				 struct mailbox *box, bool committed)
{
	struct squat_fts_backend *backend =
		(struct squat_fts_backend *)_backend;
	ARRAY_TYPE(seq_range) uids = ARRAY_INIT;
	unsigned int count;

	if (!committed)
		return;

	t_push();
	t_array_init(&uids, 128);
	if (get_uids(box, &uids, &count) == 0) {
		(void)squat_trie_mark_having_expunges(backend->trie, &uids,
						      count);
	}
	t_pop();
}

static int
fts_backend_squat_lookup(struct fts_backend *_backend, const char *key,
			 ARRAY_TYPE(seq_range) *result)
{
	struct squat_fts_backend *backend =
		(struct squat_fts_backend *)_backend;

	return squat_trie_lookup(backend->trie, result, key);
}

static int
fts_backend_squat_filter(struct fts_backend *_backend, const char *key,
			 ARRAY_TYPE(seq_range) *result)
{
	struct squat_fts_backend *backend =
		(struct squat_fts_backend *)_backend;

	return squat_trie_filter(backend->trie, result, key);
}

struct fts_backend fts_backend_squat = {
	MEMBER(name) "squat",
	MEMBER(definite_lookups) FALSE,

	{
		fts_backend_squat_init,
		fts_backend_squat_deinit,
		fts_backend_squat_get_last_uid,
		fts_backend_squat_build_init,
		fts_backend_squat_build_more,
		fts_backend_squat_build_deinit,
		fts_backend_squat_expunge,
		fts_backend_squat_expunge_finish,
		fts_backend_squat_lookup,
		fts_backend_squat_filter
	}
};

--- NEW FILE: fts-squat-plugin.c ---
/* Copyright (C) 2006 Timo Sirainen */

#include "lib.h"
#include "fts-squat-plugin.h"

void fts_squat_plugin_init(void)
{
	fts_backend_register(&fts_backend_squat);
}

void fts_squat_plugin_deinit(void)
{
	fts_backend_unregister(fts_backend_squat.name);
}

--- NEW FILE: fts-squat-plugin.h ---
#ifndef __FTS_SQUAT_PLUGIN_H
#define __FTS_SQUAT_PLUGIN_H

#include "fts-api-private.h"

extern struct fts_backend fts_backend_squat;

void fts_squat_plugin_init(void);
void fts_squat_plugin_deinit(void);

#endif

--- NEW FILE: squat-test.c ---
/* Copyright (C) 2006 Timo Sirainen */

#include "lib.h"
#include "array.h"
#include "istream.h"
#include "squat-trie.h"
#include "squat-uidlist.h"

#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <time.h>
#include <sys/time.h>

static void result_print(ARRAY_TYPE(seq_range) *result)
{
	const struct seq_range *range;
	unsigned int i, count;

	range = array_get(result, &count);
	for (i = 0; i < count; i++) {
		if (i != 0)
			printf(",");
		printf("%u", range[i].seq1);
		if (range[i].seq1 != range[i].seq2)
			printf("-%u", range[i].seq2);
	}
	printf("\n");
}

int main(int argc __attr_unused__, char *argv[])
{
	struct squat_trie *trie;
	struct istream *input;
	ARRAY_TYPE(seq_range) result;
	char *line, *str, buf[4096];
	int fd;
	ssize_t ret;
	unsigned int last = 0, seq = 0, leaves, uid_lists_mem, uid_lists_count;
	size_t mem;
	clock_t clock_start, clock_end;
	struct timeval tv_start, tv_end;
	double cputime;

	lib_init();
	(void)unlink("/tmp/squat-test-index.search");
	(void)unlink("/tmp/squat-test-index.search.uids");
	trie = squat_trie_open("/tmp/squat-test-index.search");

	clock_start = clock();
	gettimeofday(&tv_start, NULL);

	fd = open(argv[1], O_RDONLY);
	if (fd == -1)
		return 1;

	input = i_stream_create_file(fd, default_pool, 0, FALSE);
	while ((line = i_stream_read_next_line(input)) != NULL) {
		if (last != input->v_offset/(1024*100)) {
			fprintf(stderr, "\r%ukB", (unsigned)(input->v_offset/1024));
			fflush(stderr);
			last = input->v_offset/(1024*100);
		}
		if (strncmp(line, "From ", 5) == 0) {
			seq++;
			continue;
		}

		if (squat_trie_add(trie, seq, line, strlen(line)) < 0)
			break;
	}
	squat_trie_flush(trie);

	clock_end = clock();
	gettimeofday(&tv_end, NULL);

	cputime = (double)(clock_end - clock_start) / CLOCKS_PER_SEC;
	fprintf(stderr, "\n - Index time: %.2f CPU seconds, "
		"%.2f real seconds (%.02fMB/CPUs)\n", cputime,
		(tv_end.tv_sec - tv_start.tv_sec) +
		(tv_end.tv_usec - tv_start.tv_usec)/1000000.0,
		input->v_offset / cputime / (1024*1024));

	mem = squat_trie_mem_used(trie, &leaves);
	uid_lists_mem = squat_uidlist_mem_used(_squat_trie_get_uidlist(trie),
					       &uid_lists_count);
	fprintf(stderr, " - %u bytes in %u nodes (%.02f%%)\n"
		" - %u bytes in %u uid_lists (%.02f%%)\n"
		" - %u bytes total of %"PRIuUOFF_T" (%.02f%%)\n",
		(unsigned)mem, leaves, mem / (float)input->v_offset * 100.0,
		uid_lists_mem, uid_lists_count,
		uid_lists_mem / (float)input->v_offset * 100.0,
		(unsigned)(uid_lists_mem + mem), input->v_offset,
		(uid_lists_mem + mem) / (float)input->v_offset * 100.0);

	i_stream_unref(&input);
	close(fd);

	i_array_init(&result, 128);
	while ((str = fgets(buf, sizeof(buf), stdin)) != NULL) {
		ret = strlen(str)-1;
		str[ret] = 0;

		array_clear(&result);
		gettimeofday(&tv_start, NULL);
		if (!squat_trie_lookup(trie, &result, str))
			printf("No matches\n");
		else {
			gettimeofday(&tv_end, NULL);
			printf(" - Search took %.05f CPU seconds\n",
			       (tv_end.tv_sec - tv_start.tv_sec) +
			       (tv_end.tv_usec - tv_start.tv_usec)/1000000.0);
			result_print(&result);
		}
	}
	return 0;
}

--- NEW FILE: squat-trie.c ---
/* Copyright (C) 2006 Timo Sirainen */

#include "lib.h"
#include "array.h"
#include "bsearch-insert-pos.h"
#include "istream.h"
#include "ostream.h"
#include "mmap-util.h"
#include "squat-uidlist.h"
#include "squat-trie.h"

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>

/* for non-x86 use memcpy() when accessing unaligned int* addresses */
#if defined(__i386__) || defined(__x86_64__)
[...1274 lines suppressed...]
	while (len >= BLOCK_SIZE) {
		list = trie_lookup_node(trie, trie->root,
					data + len - BLOCK_SIZE, 1);
		if (list == 0) {
			array_clear(result);
			return 0;
		}
		if (squat_uidlist_filter(trie->uidlist, list, result) < 0) {
			squat_trie_set_corrupted(trie, "uidlist offset broken");
			return -1;
		}
		len--;
	}
	return array_count(result) > 0 ? 1 : 0;
}

struct squat_uidlist *_squat_trie_get_uidlist(struct squat_trie *trie)
{
	return trie->uidlist;
}

--- NEW FILE: squat-trie.h ---
#ifndef __SQUAT_TRIE_H
#define __SQUAT_TRIE_H

#include "seq-range-array.h"

struct squat_trie *squat_trie_open(const char *path);
void squat_trie_close(struct squat_trie *trie);

int squat_trie_add(struct squat_trie *trie, uint32_t uid,
		   const void *data, size_t size);
int squat_trie_flush(struct squat_trie *trie);
int squat_trie_compress(struct squat_trie *trie,
			const ARRAY_TYPE(seq_range) *existing_uids);

int squat_trie_mark_having_expunges(struct squat_trie *trie,
				    const ARRAY_TYPE(seq_range) *existing_uids,
				    unsigned int current_message_count);

int squat_trie_lookup(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
		      const char *str);
int squat_trie_filter(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
		      const char *str);

size_t squat_trie_mem_used(struct squat_trie *trie, unsigned int *count_r);

struct squat_uidlist *_squat_trie_get_uidlist(struct squat_trie *trie);

void _squat_trie_pack_num(buffer_t *buffer, uint32_t num);
uint32_t _squat_trie_unpack_num(const uint8_t **p, const uint8_t *end);

void squat_trie_set_corrupted(struct squat_trie *trie, const char *reason);

#endif

--- NEW FILE: squat-uidlist.c ---
/* Copyright (C) 2006 Timo Sirainen */

#include "lib.h"
#include "array.h"
#include "ostream.h"
#include "mmap-util.h"
#include "write-full.h"
#include "squat-trie.h"
#include "squat-uidlist.h"

#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>

#define UIDLIST_COMPRESS_PERCENTAGE 30
#define UIDLIST_UID_COMPRESS_PERCENTAGE 20
#define UIDLIST_COMPRESS_MIN_SIZE (1024*8)

#define UID_NODE_PREV_FLAG_OLD 0x00000001
#define UID_LIST_IDX_FLAG_SINGLE 0x80000000

struct squat_uidlist_header {
	uint32_t uidvalidity; // FIXME
	uint32_t header_size;
	uint32_t used_file_size;
	uint32_t deleted_space;

	uint32_t uid_max;
	uint32_t uid_count;
	uint32_t uids_expunged;
	uint32_t node_count;
};

struct uid_node {
	struct uid_node *prev;
	uint32_t uid;
};

struct squat_uidlist_get_context {
	struct squat_uidlist *uidlist;

	ARRAY_TYPE(seq_range) *result;

	uint32_t filter_pos;
};

struct squat_uidlist {
	struct squat_trie *trie;

	char *filepath;
	int fd;
	struct ostream *output;

	void *mmap_base;
	size_t mmap_size;
	struct squat_uidlist_header hdr;

	ARRAY_DEFINE(lists, struct uid_node);
	uint32_t first_new_list_idx;

	pool_t node_pool;
	buffer_t *tmp_buf, *list_buf;

	unsigned int check_expunges:1;
	unsigned int write_failed:1;
};

struct squat_uidlist_compress_ctx {
	struct squat_uidlist *uidlist;
	const ARRAY_TYPE(seq_range) *existing_uids;

	struct ostream *output;
	char *tmp_path;

	pool_t node_pool;
	struct uid_node *last_node;
	ARRAY_TYPE(seq_range) seen_uids;

	struct squat_uidlist_header hdr;

	unsigned int seen_expunged:1;
	unsigned int failed:1;
};

static void
squat_uidlist_set_syscall_error(struct squat_uidlist *uidlist,
				const char *function)
{
	i_error("%s failed with index search uidlist file %s: %m",
		function, uidlist->filepath);
}

static int squat_uidlist_map(struct squat_uidlist *uidlist)
{
	struct stat st;

	if (fstat(uidlist->fd, &st) < 0) {
		squat_uidlist_set_syscall_error(uidlist, "fstat()");
		return -1;
	}

	if (st.st_size <= sizeof(uidlist->hdr)) {
		memset(&uidlist->hdr, 0, sizeof(uidlist->hdr));
		uidlist->hdr.header_size = sizeof(uidlist->hdr);
		uidlist->hdr.used_file_size = sizeof(uidlist->hdr);
		return 0;
	}

	if (uidlist->mmap_base != NULL) {
		if (munmap(uidlist->mmap_base, uidlist->mmap_size) < 0)
			squat_uidlist_set_syscall_error(uidlist, "munmap()");
	}
	uidlist->mmap_size = st.st_size;

	uidlist->mmap_base =
		mmap(NULL, uidlist->mmap_size, PROT_READ | PROT_WRITE,
		     MAP_SHARED, uidlist->fd, 0);
	if (uidlist->mmap_base == MAP_FAILED) {
		uidlist->mmap_size = 0;
		uidlist->mmap_base = NULL;
		squat_uidlist_set_syscall_error(uidlist, "mmap()");
		return -1;
	}

	memcpy(&uidlist->hdr, uidlist->mmap_base, sizeof(uidlist->hdr));
	// FIXME: verify header

	if (uidlist->hdr.uids_expunged)
		uidlist->check_expunges = TRUE;

	uidlist->first_new_list_idx = uidlist->mmap_size;
	return 1;
}

static int squat_uidlist_open(struct squat_uidlist *uidlist)
{
	i_assert(uidlist->fd == -1);

	uidlist->fd = open(uidlist->filepath, O_RDWR | O_CREAT, 0600);
	if (uidlist->fd == -1) {
		squat_uidlist_set_syscall_error(uidlist, "open()");
		return -1;
	}

	return squat_uidlist_map(uidlist);
}

static void squat_uidlist_close(struct squat_uidlist *uidlist)
{
	if (uidlist->mmap_base != NULL) {
		if (munmap(uidlist->mmap_base, uidlist->mmap_size) < 0)
			squat_uidlist_set_syscall_error(uidlist, "munmap()");
		uidlist->mmap_base = NULL;
	}
	uidlist->mmap_size = 0;

	if (uidlist->fd != -1) {
		if (close(uidlist->fd) < 0)
			squat_uidlist_set_syscall_error(uidlist, "close()");
		uidlist->fd = -1;
	}
}

struct squat_uidlist *
squat_uidlist_init(struct squat_trie *trie, const char *path)
{
	struct squat_uidlist *uidlist;

	uidlist = i_new(struct squat_uidlist, 1);
	uidlist->trie = trie;
	uidlist->filepath = i_strdup(path);
	uidlist->fd = -1;
	uidlist->first_new_list_idx = 1;
	i_array_init(&uidlist->lists, 65536);
	uidlist->node_pool =
		pool_alloconly_create("squat uidlist node pool", 65536);
	uidlist->tmp_buf = buffer_create_dynamic(default_pool, 16);
	uidlist->list_buf = buffer_create_dynamic(default_pool, 256);
	(void)squat_uidlist_open(uidlist);
	return uidlist;
}

void squat_uidlist_deinit(struct squat_uidlist *uidlist)
{
	squat_uidlist_close(uidlist);

	pool_unref(uidlist->node_pool);
	array_free(&uidlist->lists);
	buffer_free(uidlist->tmp_buf);
	buffer_free(uidlist->list_buf);
	i_free(uidlist);
}

int squat_uidlist_add(struct squat_uidlist *uidlist, uint32_t *_uid_list_idx,
		      uint32_t uid)
{
	uint32_t uid_list_idx = *_uid_list_idx;
	struct uid_node *node, *old_node;

	i_assert(uid >= uidlist->hdr.uid_max);

	if (uid_list_idx == 0) {
		*_uid_list_idx = uid | UID_LIST_IDX_FLAG_SINGLE;
		return 0;
	}

	if (uid > uidlist->hdr.uid_max) {
		uidlist->hdr.uid_max = uid;
		uidlist->hdr.uid_count++;
	}

	if (uid_list_idx < uidlist->first_new_list_idx) {
		/* continue an existing list in the uidlist file */
		old_node = POINTER_CAST((uid_list_idx << 1) |
					UID_NODE_PREV_FLAG_OLD);
		uid_list_idx = uidlist->first_new_list_idx +
			array_count(&uidlist->lists);
		node = array_append_space(&uidlist->lists);

		uidlist->hdr.node_count++;
	} else if ((uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
		uint32_t old_uid = uid_list_idx & ~UID_LIST_IDX_FLAG_SINGLE;

		if (uid == old_uid) {
			/* trying to add the same uid again */
			return 0;
		}

		/* convert single UID to a list */
		old_node = p_new(uidlist->node_pool, struct uid_node, 1);
		old_node->uid = old_uid;

		uid_list_idx = uidlist->first_new_list_idx +
			array_count(&uidlist->lists);
		node = array_append_space(&uidlist->lists);

		uidlist->hdr.node_count++;
	} else {
		/* update an in-memory list */
		uint32_t arr_idx = uid_list_idx - uidlist->first_new_list_idx;
		if (arr_idx >= array_count(&uidlist->lists)) {
			/* broken */
			squat_trie_set_corrupted(uidlist->trie,
				"corrupted uidlist index (adding)");
			return -1;
		}

		node = array_idx_modifiable(&uidlist->lists, arr_idx);
		if (node->uid == uid) {
			/* trying to add the same uid again */
			return 0;
		}

		old_node = p_new(uidlist->node_pool, struct uid_node, 1);
		*old_node = *node;
	}

	node->prev = old_node;
	node->uid = uid;
	*_uid_list_idx = uid_list_idx;
	return 0;
}

static int
squat_uidlist_copy_existing(struct squat_uidlist *uidlist,  size_t offset,
			    uint32_t *prev_uid_r, uint32_t *written_uid_r)
{
	const uint8_t *data, *data_start, *end, *p = NULL;
	uint32_t size, num, prev_uid, next_uid;

	if (offset >= uidlist->mmap_size)
		return -1;

	data = CONST_PTR_OFFSET(uidlist->mmap_base, offset);
	end = CONST_PTR_OFFSET(uidlist->mmap_base, uidlist->mmap_size);

	size = _squat_trie_unpack_num(&data, end);
	if (data + size > end)
		return -1;

	data_start = data;
	end = data + size;

	prev_uid = next_uid = _squat_trie_unpack_num(&data, end);
	p = data;
	while (data != end) {
		num = _squat_trie_unpack_num(&data, end);
		next_uid = prev_uid + (num >> 1) + 1;

		if ((num & 1) != 0) {
			/* prev_uid..next_uid */
			if (data == end) {
				/* try to increase this range */
				break;
			}

			/* beginning a new uid/range */
			num = _squat_trie_unpack_num(&data, end);
			next_uid += num + 1;

			prev_uid = next_uid;
			p = data;
		}

		prev_uid = next_uid;
		p = data;
	}

	*written_uid_r = prev_uid;
	*prev_uid_r = next_uid;

	uidlist->hdr.deleted_space +=
		(end - (const uint8_t *)uidlist->mmap_base) - offset;

	buffer_append(uidlist->list_buf, data_start, p - data_start);
	return 0;
}

static int
squat_uidlist_write_range(struct squat_uidlist *uidlist,
			  const struct uid_node *node,
			  uint32_t *prev_uid_r, uint32_t *written_uid_r,
			  int level)
{
	buffer_t *buffer = uidlist->list_buf;
	uint32_t written_uid, prev_uid;
	uint32_t prev_idx = POINTER_CAST_TO(node->prev, uint32_t);

	*prev_uid_r = node->uid;

	if (node->prev == NULL) {
		/* first UID */
		_squat_trie_pack_num(buffer, node->uid);
	} else {
		if ((prev_idx & UID_NODE_PREV_FLAG_OLD) != 0) {
			prev_idx >>= 1;
			if (squat_uidlist_copy_existing(uidlist, prev_idx,
							&prev_uid,
							&written_uid) < 0 ||
			    prev_uid >= node->uid) {
				squat_trie_set_corrupted(uidlist->trie,
					"corrupted continued uidlist index");
				return -1;
			}
		} else {
			if (squat_uidlist_write_range(uidlist, node->prev,
						      &prev_uid, &written_uid,
						      level+1) < 0)
				return -1;
		}

		/* prev_uid contains the previous node's UID.
		   written_uid contains the last written UID. */
		if (prev_uid + 1 == node->uid) {
			if (level != 0) {
				/* this node continue the range */
				*written_uid_r = written_uid;
				return 0;
			} else {
				/* finishing range */
				_squat_trie_pack_num(buffer, 1 |
					((node->uid - written_uid - 1) << 1));
				return 0;
			}
		}
		i_assert(prev_uid < node->uid);
		if (written_uid != prev_uid) {
			i_assert(written_uid < prev_uid);

			/* range ends at prev_uid */
			_squat_trie_pack_num(buffer, 1 |
				((prev_uid - written_uid - 1) << 1));
			/* next uid/range */
			_squat_trie_pack_num(buffer, node->uid - prev_uid - 1);
		} else {
			/* no range */
			_squat_trie_pack_num(buffer,
					     ((node->uid - prev_uid - 1) << 1));
		}
	}

	*written_uid_r = node->uid;
	return 0;
}

static void squat_uidlist_write_init(struct squat_uidlist *uidlist)
{
	i_assert(uidlist->output == NULL);

	uidlist->output = o_stream_create_file(uidlist->fd, default_pool,
					       0, FALSE);
	if (uidlist->hdr.used_file_size < sizeof(uidlist->hdr)) {
		/* creating a new file, write a dummy header */
		o_stream_seek(uidlist->output, 0);
		o_stream_send(uidlist->output, &uidlist->hdr,
			      sizeof(uidlist->hdr));
	} else {
		o_stream_seek(uidlist->output,
			      uidlist->hdr.used_file_size);
	}
}

static int squat_uidlist_write_listbuf(struct squat_uidlist *uidlist,
				       struct ostream *output)
{
	/* write size + buffer */
	buffer_set_used_size(uidlist->tmp_buf, 0);
	_squat_trie_pack_num(uidlist->tmp_buf, uidlist->list_buf->used);

	if (o_stream_send(output, uidlist->tmp_buf->data,
			  uidlist->tmp_buf->used) < 0 ||
	    o_stream_send(output, uidlist->list_buf->data,
			  uidlist->list_buf->used) < 0) {
		return -1;
	}
	return 0;
}

int squat_uidlist_finish_list(struct squat_uidlist *uidlist,
			      uint32_t *_uid_list_idx)
{
	uint32_t uid_list_idx = *_uid_list_idx;
	const struct uid_node *node;
	uint32_t prev_uid, written_uid;

	if ((uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
		/* this is a single UID "list" */
		return 0;
	}
	if (uid_list_idx < uidlist->first_new_list_idx) {
		/* the list hasn't changed */
		return 0;
	}

	uid_list_idx -= uidlist->first_new_list_idx;
	if (uid_list_idx >= array_count(&uidlist->lists)) {
		/* broken */
		squat_trie_set_corrupted(uidlist->trie,
					 "corrupted uidlist index (finishing)");
		return -1;
	}

	/* write the uidlist into a buffer */
	node = array_idx(&uidlist->lists, uid_list_idx);
	buffer_set_used_size(uidlist->list_buf, 0);
	if (squat_uidlist_write_range(uidlist, node,
				      &prev_uid, &written_uid, 0) < 0) {
		uidlist->write_failed = TRUE;
		return -1;
	}

	if (uidlist->output == NULL)
		squat_uidlist_write_init(uidlist);

	/* new uidlist index is the offset in uidlist file */
	*_uid_list_idx = uidlist->output->offset;

	if (squat_uidlist_write_listbuf(uidlist, uidlist->output) < 0)
		uidlist->write_failed = TRUE;
	return 0;
}

static void squat_uidlist_write_header(struct squat_uidlist *uidlist)
{
	uidlist->hdr.used_file_size = uidlist->output->offset;

	o_stream_seek(uidlist->output, 0);
	o_stream_send(uidlist->output, &uidlist->hdr, sizeof(uidlist->hdr));
}

int squat_uidlist_flush(struct squat_uidlist *uidlist, uint32_t uid_validity)
{
	int ret = uidlist->write_failed ? -1 : 0;

	if (uidlist->output != NULL) {
		if (ret == 0) {
			uidlist->hdr.uidvalidity = uid_validity;
			squat_uidlist_write_header(uidlist);
		}
		o_stream_destroy(&uidlist->output);
	}

	array_clear(&uidlist->lists);
	p_clear(uidlist->node_pool);

	uidlist->write_failed = FALSE;

	(void)squat_uidlist_map(uidlist);
	return ret;
}

bool squat_uidlist_need_compress(struct squat_uidlist *uidlist,
				 unsigned int current_message_count)
{
	uint32_t max_del_space, max_uid_del_count;

	if (uidlist->hdr.used_file_size >= UIDLIST_COMPRESS_MIN_SIZE) {
		/* see if we've reached the max. deleted space in file */
		max_del_space = uidlist->hdr.used_file_size / 100 *
			UIDLIST_COMPRESS_PERCENTAGE;
		if (uidlist->hdr.deleted_space > max_del_space)
			return TRUE;
	}
	if (uidlist->hdr.uid_count > current_message_count) {
		if (current_message_count == 0)
			return TRUE;

		max_uid_del_count = uidlist->hdr.uid_count *
			UIDLIST_UID_COMPRESS_PERCENTAGE / 100;
		if ((uidlist->hdr.uid_count - current_message_count) >
		    max_uid_del_count)
			return TRUE;
	}
	return FALSE;
}

int squat_uidlist_mark_having_expunges(struct squat_uidlist *uidlist,
				       bool update_disk)
{
	uidlist->check_expunges = TRUE;

	if (update_disk) {
		uidlist->hdr.uids_expunged = TRUE;

		// FIXME: make sure uidlist.hdr is in updated state
		if (pwrite_full(uidlist->fd, &uidlist->hdr,
				sizeof(uidlist->hdr), 0) < 0) {
			squat_uidlist_set_syscall_error(uidlist,
							"pwrite_full()");
			return -1;
		}
	}
	return 0;
}

struct squat_uidlist_compress_ctx *
squat_uidlist_compress_begin(struct squat_uidlist *uidlist,
			     const ARRAY_TYPE(seq_range) *existing_uids)
{
	struct squat_uidlist_compress_ctx *ctx;
	int fd;

	ctx = i_new(struct squat_uidlist_compress_ctx, 1);
	ctx->uidlist = uidlist;
	ctx->tmp_path = i_strconcat(uidlist->filepath, ".tmp", NULL);

	if (existing_uids != NULL) {
		ctx->node_pool = pool_alloconly_create("compress node pool",
						       1024);
		ctx->existing_uids = existing_uids;
		i_array_init(&ctx->seen_uids,
			     I_MIN(128, array_count(existing_uids)));
	}

	fd = open(ctx->tmp_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
	if (fd == -1) {
		ctx->failed = TRUE;
		i_error("open(%s) failed: %m", ctx->tmp_path);
	} else {
		ctx->output = o_stream_create_file(fd, default_pool, 0, TRUE);
		o_stream_send(ctx->output, &ctx->hdr, sizeof(ctx->hdr));
	}
	return ctx;
}

static bool
squat_uidlist_is_expunged(struct squat_uidlist_compress_ctx *ctx, uint32_t uid)
{
	if (ctx->existing_uids == NULL)
		return FALSE;

	return !seq_range_exists(ctx->existing_uids, uid);
}

static void
squat_uidlist_compress_add_uid(struct squat_uidlist_compress_ctx *ctx,
			       uint32_t uid)
{
	struct uid_node *node;

	if (squat_uidlist_is_expunged(ctx, uid)) {
		ctx->seen_expunged = TRUE;
		return;
	}

	if (!seq_range_exists(&ctx->seen_uids, uid)) {
		if (uid > ctx->hdr.uid_max)
			ctx->hdr.uid_max = uid;
		ctx->hdr.uid_count++;
		seq_range_array_add(&ctx->seen_uids, 0, uid);
	}

	node = p_new(ctx->node_pool, struct uid_node, 1);
	node->prev = ctx->last_node;
	node->uid = uid;

	ctx->last_node = node;
}

static int
squat_uidlist_remove_expunged(struct squat_uidlist_compress_ctx *ctx,
			      const uint8_t *data, size_t size,
			      bool *all_expunged_r)
{
	const uint8_t *end;
	uint32_t num, prev_uid, next_uid, written_uid;

	end = data + size;

	p_clear(ctx->node_pool);
	ctx->seen_expunged = FALSE;
	ctx->last_node = NULL;

	prev_uid = _squat_trie_unpack_num(&data, end);
	squat_uidlist_compress_add_uid(ctx, prev_uid);

	while (data != end) {
		num = _squat_trie_unpack_num(&data, end);
		next_uid = prev_uid + (num >> 1) + 1;
		if ((num & 1) != 0) {
			for (prev_uid++; prev_uid <= next_uid; prev_uid++)
				squat_uidlist_compress_add_uid(ctx, prev_uid);

			if (data == end)
				break;
			num = _squat_trie_unpack_num(&data, end);
			next_uid += num + 1;
		}
		squat_uidlist_compress_add_uid(ctx, next_uid);
		prev_uid = next_uid;
	}

	if (!ctx->seen_expunged) {
		/* no changes */
		return 0;
	}
	if (ctx->last_node == NULL) {
		/* everything expunged */
		*all_expunged_r = TRUE;
		return 1;
	}

	/* recreate the list and write it */
	buffer_set_used_size(ctx->uidlist->list_buf, 0);
	if (squat_uidlist_write_range(ctx->uidlist, ctx->last_node,
				      &prev_uid, &written_uid, 0) < 0)
		return -1;
	if (squat_uidlist_write_listbuf(ctx->uidlist, ctx->output) < 0)
		return -1;
	*all_expunged_r = FALSE;
	return 1;
}

int squat_uidlist_compress_next(struct squat_uidlist_compress_ctx *ctx,
				uint32_t *uid_list_idx)
{
	struct squat_uidlist *uidlist = ctx->uidlist;
	const uint8_t *data, *p, *end;
	uint32_t size;
	int ret;

	if ((*uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
		uint32_t uid = *uid_list_idx & ~UID_LIST_IDX_FLAG_SINGLE;

		if (ctx->uidlist->check_expunges) {
			if (squat_uidlist_is_expunged(ctx, uid))
				return 0;
		}
		return 1;
	}

	if (ctx->output == NULL)
		return -1;

	if (*uid_list_idx >= uidlist->mmap_size) {
		squat_trie_set_corrupted(uidlist->trie,
			"uidlist index points outside file (compressing)");
		return -1;
	}

	data = p = CONST_PTR_OFFSET(uidlist->mmap_base, *uid_list_idx);
	end = CONST_PTR_OFFSET(uidlist->mmap_base, uidlist->mmap_size);

	size = _squat_trie_unpack_num(&p, end);
	if (data + size > end) {
		squat_trie_set_corrupted(uidlist->trie,
			"corrupted uidlist index (compressing)");
		return -1;
	}

	*uid_list_idx = ctx->output->offset;

	if (!ctx->uidlist->check_expunges)
		ret = 0;
	else {
		bool all_expunged;

		ret = squat_uidlist_remove_expunged(ctx, p, size,
						    &all_expunged);
		if (ret < 0) {
			ctx->failed = TRUE;
			return -1;
		}
		if (ret > 0 && all_expunged)
			return 0;
	}

	if (ret == 0) {
		if (o_stream_send(ctx->output, data, p - data + size) < 0) {
			ctx->failed = TRUE;
			return -1;
		}
	}

	ctx->hdr.node_count++;
	return 1;
}

void squat_uidlist_compress_rollback(struct squat_uidlist_compress_ctx **_ctx)
{
	struct squat_uidlist_compress_ctx *ctx = *_ctx;

	*_ctx = NULL;

	if (ctx->node_pool != NULL)
		pool_unref(ctx->node_pool);
	if (array_is_created(&ctx->seen_uids))
		array_free(&ctx->seen_uids);
	if (ctx->output != NULL) {
		if (ctx->failed)
			(void)unlink(ctx->tmp_path);
		o_stream_destroy(&ctx->output);
	}
	i_free(ctx->tmp_path);
	i_free(ctx);
}

int squat_uidlist_compress_commit(struct squat_uidlist_compress_ctx **_ctx)
{
	struct squat_uidlist_compress_ctx *ctx = *_ctx;
	int ret = 0;

	if (ctx->failed) {
		squat_uidlist_compress_rollback(_ctx);
		return -1;
	}

	/* write the header */
	ctx->hdr.uidvalidity = ctx->uidlist->hdr.uidvalidity;
	ctx->hdr.header_size = sizeof(ctx->hdr);
	ctx->hdr.used_file_size = ctx->output->offset;

	if (ctx->existing_uids == NULL) {
		ctx->hdr.uid_max = ctx->uidlist->hdr.uid_max;
		ctx->hdr.uid_count = ctx->uidlist->hdr.uid_count;
	}

	o_stream_seek(ctx->output, 0);
	if (o_stream_send(ctx->output, &ctx->hdr, sizeof(ctx->hdr)) < 0)
		ret = -1;

	if (ret == 0) {
		if (rename(ctx->tmp_path, ctx->uidlist->filepath) < 0) {
			i_error("rename(%s, %s) failed: %m",
				ctx->tmp_path, ctx->uidlist->filepath);
			ret = -1;
		} else {
			/* reopen */
			ctx->uidlist->check_expunges = FALSE;
			squat_uidlist_close(ctx->uidlist);
			(void)squat_uidlist_open(ctx->uidlist);
		}
	}

	if (ret < 0)
		ctx->failed = TRUE;

	squat_uidlist_compress_rollback(_ctx);
	return ret;
}

static void
squat_uidlist_get_add_uid(struct squat_uidlist_get_context *ctx, uint32_t uid)
{
	if (ctx->filter_pos == 0) {
		seq_range_array_add(ctx->result, 0, uid);
		return;
	}

	for (; ctx->filter_pos < uid; ctx->filter_pos++)
		seq_range_array_remove(ctx->result, ctx->filter_pos);
	ctx->filter_pos++;
}

static int
squat_uidlist_get_range_list(struct squat_uidlist_get_context *ctx,
			     size_t offset)
{
	const uint8_t *data, *end;
	uint32_t size, num, prev_uid, next_uid;

	if (offset >= ctx->uidlist->mmap_size)
		return -1;

	data = CONST_PTR_OFFSET(ctx->uidlist->mmap_base, offset);
	end = CONST_PTR_OFFSET(ctx->uidlist->mmap_base,
			       ctx->uidlist->mmap_size);

	size = _squat_trie_unpack_num(&data, end);
	if (data + size > end)
		return -1;

	end = data + size;

	prev_uid = _squat_trie_unpack_num(&data, end);
	squat_uidlist_get_add_uid(ctx, prev_uid);

	while (data != end) {
		num = _squat_trie_unpack_num(&data, end);
		next_uid = prev_uid + (num >> 1) + 1;
		if ((num & 1) != 0) {
			for (prev_uid++; prev_uid <= next_uid; prev_uid++)
				squat_uidlist_get_add_uid(ctx, prev_uid);

			if (data == end)
				break;
			num = _squat_trie_unpack_num(&data, end);
			next_uid += num + 1;
		}
		squat_uidlist_get_add_uid(ctx, next_uid);
		prev_uid = next_uid;
	}
	return 0;
}

static int
squat_uidlist_get_ctx(struct squat_uidlist_get_context *ctx,
		      uint32_t uid_list_idx)
{
	if ((uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
		uint32_t uid = uid_list_idx & ~UID_LIST_IDX_FLAG_SINGLE;
		squat_uidlist_get_add_uid(ctx, uid);
		return 0;
	}

	return squat_uidlist_get_range_list(ctx, uid_list_idx);
}

int squat_uidlist_get(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
		      ARRAY_TYPE(seq_range) *result)
{
	struct squat_uidlist_get_context ctx;

	memset(&ctx, 0, sizeof(ctx));
	ctx.uidlist = uidlist;
	ctx.result = result;

	return squat_uidlist_get_ctx(&ctx, uid_list_idx);
}

int squat_uidlist_filter(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
			 ARRAY_TYPE(seq_range) *result)
{
	struct squat_uidlist_get_context ctx;
	const struct seq_range *range;
	unsigned int count;

	memset(&ctx, 0, sizeof(ctx));
	ctx.uidlist = uidlist;
	ctx.result = result;
	ctx.filter_pos = 1;

	return squat_uidlist_get_ctx(&ctx, uid_list_idx);

	range = array_get(ctx.result, &count);
	if (count > 0) {
		for (; ctx.filter_pos <= range[count-1].seq2; ctx.filter_pos++)
			seq_range_array_remove(result, ctx.filter_pos);
	}
}

size_t squat_uidlist_mem_used(struct squat_uidlist *uidlist,
			      unsigned int *count_r)
{
	*count_r = uidlist->hdr.node_count;

	return uidlist->hdr.used_file_size;
}

--- NEW FILE: squat-uidlist.h ---
#ifndef __SQUAT_UIDLIST_H
#define __SQUAT_UIDLIST_H

#include "seq-range-array.h"

struct squat_trie;
struct squat_uidlist;

struct squat_uidlist *
squat_uidlist_init(struct squat_trie *trie, const char *path);
void squat_uidlist_deinit(struct squat_uidlist *uidlist);

/* Add new UID to given UID list. The uid_list_idx is updated to contain the
   new list index. It must be put through _finish_list() before it's actually
   written to disk. */
int squat_uidlist_add(struct squat_uidlist *uidlist, uint32_t *uid_list_idx,
		      uint32_t uid);
/* Write UID list into disk. The uid_list_idx is updated to contain the new
   permanent index for it. */
int squat_uidlist_finish_list(struct squat_uidlist *uidlist,
			      uint32_t *uid_list_idx);
int squat_uidlist_flush(struct squat_uidlist *uidlist, uint32_t uid_validity);
/* Returns TRUE if uidlist should be compressed. current_message_count can be
   (unsigned int)-1 if you don't want include it in the check. */
bool squat_uidlist_need_compress(struct squat_uidlist *uidlist,
				 unsigned int current_message_count);
/* Mark the uidlist containing expunged messages. update_disk=FALSE should be
   done when the uidlist is going to be compressed and this function only tells
   the compression to check for the expunged messages. */
int squat_uidlist_mark_having_expunges(struct squat_uidlist *uidlist,
				       bool update_disk);

/* Compress the uidlist file. existing_uids may be NULL if they're not known. */
struct squat_uidlist_compress_ctx *
squat_uidlist_compress_begin(struct squat_uidlist *uidlist,
			     const ARRAY_TYPE(seq_range) *existing_uids);
int squat_uidlist_compress_next(struct squat_uidlist_compress_ctx *ctx,
				uint32_t *uid_list_idx);
void squat_uidlist_compress_rollback(struct squat_uidlist_compress_ctx **ctx);
int squat_uidlist_compress_commit(struct squat_uidlist_compress_ctx **ctx);

/* Returns UIDs for a given UID list index. */
int squat_uidlist_get(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
		      ARRAY_TYPE(seq_range) *result);
/* Filter out UIDs which don't appear in the given UID list from the given
   result array */
int squat_uidlist_filter(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
			 ARRAY_TYPE(seq_range) *result);

size_t squat_uidlist_mem_used(struct squat_uidlist *uidlist,
			      unsigned int *count_r);

#endif



More information about the dovecot-cvs mailing list