[dovecot-cvs] dovecot/src/plugins/fts-squat fts-backend-squat.c, 1.1, 1.2 squat-test.c, 1.1, 1.2 squat-trie.c, 1.1, 1.2 squat-trie.h, 1.1, 1.2 squat-uidlist.c, 1.1, 1.2 squat-uidlist.h, 1.1, 1.2

tss at dovecot.org tss at dovecot.org
Wed Dec 6 15:45:48 UTC 2006


Update of /var/lib/cvs/dovecot/src/plugins/fts-squat
In directory talvi:/tmp/cvs-serv1287/fts-squat

Modified Files:
	fts-backend-squat.c squat-test.c squat-trie.c squat-trie.h 
	squat-uidlist.c squat-uidlist.h 
Log Message:
Fixes. Should be pretty much working now.



Index: fts-backend-squat.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-squat/fts-backend-squat.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- fts-backend-squat.c	1 Dec 2006 21:02:11 -0000	1.1
+++ fts-backend-squat.c	6 Dec 2006 15:45:46 -0000	1.2
@@ -2,7 +2,7 @@
 
 #include "lib.h"
 #include "array.h"
-#include "mail-storage.h"
+#include "mail-storage-private.h"
 #include "mail-search.h"
 #include "squat-trie.h"
 #include "fts-squat-plugin.h"
@@ -12,8 +12,11 @@
 struct squat_fts_backend {
 	struct fts_backend backend;
 	struct squat_trie *trie;
+};
 
-	uint32_t last_uid;
+struct squat_fts_backend_build_context {
+	struct fts_backend_build_context ctx;
+	struct squat_trie_build_context *trie_ctx;
 };
 
 static struct fts_backend *fts_backend_squat_init(struct mailbox *box)
@@ -33,7 +36,8 @@
 	backend = i_new(struct squat_fts_backend, 1);
 	backend->backend = fts_backend_squat;
 	backend->trie =
-		squat_trie_open(t_strconcat(path, "/"SQUAT_FILE_PREFIX, NULL));
+		squat_trie_open(t_strconcat(path, "/"SQUAT_FILE_PREFIX, NULL),
+				storage->lock_method);
 	return &backend->backend;
 }
 
@@ -52,8 +56,7 @@
 	struct squat_fts_backend *backend =
 		(struct squat_fts_backend *)_backend;
 
-	*last_uid_r = backend->last_uid;
-	return 0;
+	return squat_trie_get_last_uid(backend->trie, last_uid_r);
 }
 
 static struct fts_backend_build_context *
@@ -61,38 +64,35 @@
 {
 	struct squat_fts_backend *backend =
 		(struct squat_fts_backend *)_backend;
-	struct fts_backend_build_context *ctx;
-
-	*last_uid_r = backend->last_uid;
+	struct squat_fts_backend_build_context *ctx;
 
-	ctx = i_new(struct fts_backend_build_context, 1);
-	ctx->backend = _backend;
-	return ctx;
+	ctx = i_new(struct squat_fts_backend_build_context, 1);
+	ctx->ctx.backend = _backend;
+	ctx->trie_ctx = squat_trie_build_init(backend->trie, last_uid_r);
+	return &ctx->ctx;
 }
 
 static int
-fts_backend_squat_build_more(struct fts_backend_build_context *ctx,
+fts_backend_squat_build_more(struct fts_backend_build_context *_ctx,
 			     uint32_t uid, const unsigned char *data,
 			     size_t size)
 {
-	struct squat_fts_backend *backend =
-		(struct squat_fts_backend *)ctx->backend;
-
-	i_assert(uid >= backend->last_uid);
-	backend->last_uid = uid;
+	struct squat_fts_backend_build_context *ctx =
+		(struct squat_fts_backend_build_context *)_ctx;
 
-	return squat_trie_add(backend->trie, uid, data, size);
+	return squat_trie_build_more(ctx->trie_ctx, uid, data, size);
 }
 
 static int
-fts_backend_squat_build_deinit(struct fts_backend_build_context *ctx)
+fts_backend_squat_build_deinit(struct fts_backend_build_context *_ctx)
 {
-	struct squat_fts_backend *backend =
-		(struct squat_fts_backend *)ctx->backend;
+	struct squat_fts_backend_build_context *ctx =
+		(struct squat_fts_backend_build_context *)_ctx;
+	int ret;
 
-	squat_trie_flush(backend->trie);
+	ret = squat_trie_build_deinit(ctx->trie_ctx);
 	i_free(ctx);
-	return 0;
+	return ret;
 }
 
 static void
@@ -152,6 +152,22 @@
 	t_pop();
 }
 
+static int fts_backend_squat_lock(struct fts_backend *_backend)
+{
+	struct squat_fts_backend *backend =
+		(struct squat_fts_backend *)_backend;
+
+	return squat_trie_lock(backend->trie, F_RDLCK);
+}
+
+static void fts_backend_squat_unlock(struct fts_backend *_backend)
+{
+	struct squat_fts_backend *backend =
+		(struct squat_fts_backend *)_backend;
+
+	squat_trie_unlock(backend->trie);
+}
+
 static int
 fts_backend_squat_lookup(struct fts_backend *_backend, const char *key,
 			 ARRAY_TYPE(seq_range) *result)
@@ -185,6 +201,8 @@
 		fts_backend_squat_build_deinit,
 		fts_backend_squat_expunge,
 		fts_backend_squat_expunge_finish,
+		fts_backend_squat_lock,
+		fts_backend_squat_unlock,
 		fts_backend_squat_lookup,
 		fts_backend_squat_filter
 	}

Index: squat-test.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-squat/squat-test.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- squat-test.c	1 Dec 2006 21:02:11 -0000	1.1
+++ squat-test.c	6 Dec 2006 15:45:46 -0000	1.2
@@ -2,6 +2,7 @@
 
 #include "lib.h"
 #include "array.h"
+#include "file-lock.h"
 #include "istream.h"
 #include "squat-trie.h"
 #include "squat-uidlist.h"
@@ -31,12 +32,14 @@
 int main(int argc __attr_unused__, char *argv[])
 {
 	struct squat_trie *trie;
+	struct squat_trie_build_context *build_ctx;
 	struct istream *input;
 	ARRAY_TYPE(seq_range) result;
 	char *line, *str, buf[4096];
 	int fd;
 	ssize_t ret;
 	unsigned int last = 0, seq = 0, leaves, uid_lists_mem, uid_lists_count;
+	uint32_t last_uid;
 	size_t mem;
 	clock_t clock_start, clock_end;
 	struct timeval tv_start, tv_end;
@@ -45,7 +48,8 @@
 	lib_init();
 	(void)unlink("/tmp/squat-test-index.search");
 	(void)unlink("/tmp/squat-test-index.search.uids");
-	trie = squat_trie_open("/tmp/squat-test-index.search");
+	trie = squat_trie_open("/tmp/squat-test-index.search",
+			       FILE_LOCK_METHOD_FCNTL);
 
 	clock_start = clock();
 	gettimeofday(&tv_start, NULL);
@@ -54,6 +58,7 @@
 	if (fd == -1)
 		return 1;
 
+	build_ctx = squat_trie_build_init(trie, &last_uid);
 	input = i_stream_create_file(fd, default_pool, 0, FALSE);
 	while ((line = i_stream_read_next_line(input)) != NULL) {
 		if (last != input->v_offset/(1024*100)) {
@@ -66,10 +71,11 @@
 			continue;
 		}
 
-		if (squat_trie_add(trie, seq, line, strlen(line)) < 0)
+		if (squat_trie_build_more(build_ctx, seq,
+					  line, strlen(line)) < 0)
 			break;
 	}
-	squat_trie_flush(trie);
+	squat_trie_build_deinit(build_ctx);
 
 	clock_end = clock();
 	gettimeofday(&tv_end, NULL);

Index: squat-trie.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-squat/squat-trie.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- squat-trie.c	1 Dec 2006 21:02:11 -0000	1.1
+++ squat-trie.c	6 Dec 2006 15:45:46 -0000	1.2
@@ -3,6 +3,7 @@
 #include "lib.h"
 #include "array.h"
 #include "bsearch-insert-pos.h"
+#include "file-lock.h"
 #include "istream.h"
 #include "ostream.h"
 #include "mmap-util.h"
@@ -15,14 +16,17 @@
 #include <fcntl.h>
 #include <ctype.h>
 
+#define TRIE_COMPRESS_PERCENTAGE 30
+#define TRIE_COMPRESS_MIN_SIZE (1024*50)
+
+#define SQUAT_TRIE_VERSION 1
+#define SQUAT_TRIE_LOCK_TIMEOUT 60
+
 /* for non-x86 use memcpy() when accessing unaligned int* addresses */
 #if defined(__i386__) || defined(__x86_64__)
 #  define ALLOW_UNALIGNED_ACCESS
 #endif
 
-#define TRIE_COMPRESS_PERCENTAGE 30
-#define TRIE_COMPRESS_MIN_SIZE (1024*50)
-
 #define BLOCK_SIZE 4
 
 #define ALIGN(size) \
@@ -34,6 +38,11 @@
 	dev_t dev;
 	ino_t ino;
 
+	enum file_lock_method lock_method;
+	struct file_lock *file_lock;
+	int lock_count;
+	int lock_type; /* F_RDLCK / F_WRLCK */
+
 	void *mmap_base;
 	size_t mmap_size;
 
@@ -42,22 +51,42 @@
 	struct squat_uidlist *uidlist;
 	struct trie_node *root;
 	buffer_t *buf;
-	unsigned int deleted_space;
+
+	unsigned int corrupted:1;
+};
+
+struct squat_trie_build_context {
+	struct squat_trie *trie;
+
 	struct ostream *output;
-	struct squat_uidlist_compress_ctx *compress_ctx;
 
 	uint32_t prev_uid;
 	unsigned int prev_added_size;
 	uint16_t prev_added[BLOCK_SIZE-1];
 
 	unsigned int node_count;
+	unsigned int deleted_space;
 
-	unsigned int corrupted:1;
+	unsigned int failed:1;
+	unsigned int locked:1;
+};
+
+struct squat_trie_compress_context {
+	struct squat_trie *trie;
+
+	const char *tmp_path;
+	struct ostream *output;
+
+	struct squat_uidlist_compress_ctx *uidlist_ctx;
+
+	unsigned int node_count;
 };
 
 struct squat_trie_header {
+	uint8_t version;
+	uint8_t unused[3];
+
 	uint32_t uidvalidity;
-	uint32_t header_size;
 	uint32_t used_file_size;
 	uint32_t deleted_space;
 	uint32_t node_count;
@@ -113,10 +142,11 @@
 		 ALIGN(sizeof(uint16_t) * ((node)->chars_16bit_count)))
 
 static int
-squat_trie_compress_node(struct squat_trie *trie, struct trie_node *node,
-			 unsigned int level);
-static int trie_write_node(struct squat_trie *trie, unsigned int level,
-			   struct trie_node *node);
+squat_trie_compress_node(struct squat_trie_compress_context *ctx,
+			 struct trie_node *node, unsigned int level);
+static int trie_write_node(struct squat_trie_build_context *ctx,
+			   unsigned int level, struct trie_node *node);
+static int squat_trie_build_flush(struct squat_trie_build_context *ctx);
 
 static int chr_8bit_cmp(const void *_key, const void *_chr)
 {
@@ -330,6 +360,9 @@
 
 static void trie_close(struct squat_trie *trie)
 {
+	if (trie->file_lock != NULL)
+		file_lock_free(&trie->file_lock);
+
 	if (trie->mmap_base != NULL) {
 		if (munmap(trie->mmap_base, trie->mmap_size) < 0)
 			squat_trie_set_syscall_error(trie, "munmap()");
@@ -347,10 +380,31 @@
 	trie->corrupted = FALSE;
 }
 
+static int trie_map_check_header(struct squat_trie *trie,
+				 const struct squat_trie_header *hdr)
+{
+	if (hdr->version != SQUAT_TRIE_VERSION)
+		return -1;
+
+	if (hdr->used_file_size > trie->mmap_size) {
+		squat_trie_set_corrupted(trie, "used_file_size too large");
+		return -1;
+	}
+	if (hdr->root_offset > trie->mmap_size ||
+	    hdr->root_offset < sizeof(*hdr)) {
+		squat_trie_set_corrupted(trie, "invalid root_offset");
+		return -1;
+	}
+
+	return 0;
+}
+
 static int trie_map(struct squat_trie *trie)
 {
 	struct stat st;
 
+	i_assert(trie->lock_count > 0);
+
 	if (fstat(trie->fd, &st) < 0) {
 		squat_trie_set_syscall_error(trie, "fstat()");
 		return -1;
@@ -374,11 +428,13 @@
 	}
 
 	trie->hdr = trie->mmap_base;
+	if (trie_map_check_header(trie, trie->hdr) < 0)
+		return -1;
+
 	if (trie_map_node(trie, trie->hdr->root_offset, 1, &trie->root) < 0) {
 		trie_close(trie);
 		return 0;
 	}
-	trie->node_count = trie->hdr->node_count;
 	return 1;
 }
 
@@ -396,7 +452,8 @@
 	return trie_map(trie);
 }
 
-struct squat_trie *squat_trie_open(const char *path)
+struct squat_trie *
+squat_trie_open(const char *path, enum file_lock_method lock_method)
 {
 	struct squat_trie *trie;
 	const char *uidlist_path;
@@ -404,9 +461,12 @@
 	trie = i_new(struct squat_trie, 1);
 	trie->fd = -1;
 	trie->filepath = i_strdup(path);
+	trie->lock_method = lock_method;
+	trie->buf = buffer_create_dynamic(default_pool, 1024);
+
 	uidlist_path = t_strconcat(path, ".uids", NULL);
 	trie->uidlist = squat_uidlist_init(trie, uidlist_path);
-	trie->buf = buffer_create_dynamic(default_pool, 1024);
+
 	(void)trie_open(trie);
 	return trie;
 }
@@ -419,6 +479,47 @@
 	i_free(trie);
 }
 
+int squat_trie_get_last_uid(struct squat_trie *trie, uint32_t *uid_r)
+{
+	return squat_uidlist_get_last_uid(trie->uidlist, uid_r);
+}
+
+int squat_trie_lock(struct squat_trie *trie, int lock_type)
+{
+	int ret;
+
+	i_assert(lock_type == F_RDLCK || lock_type == F_WRLCK);
+
+	if (trie->lock_count > 0) {
+		/* read lock -> write lock would deadlock */
+		i_assert(trie->lock_type == lock_type || lock_type == F_RDLCK);
+
+		trie->lock_count++;
+		return 1;
+	}
+
+	i_assert(trie->file_lock == NULL);
+	ret = file_wait_lock(trie->fd, trie->filepath, lock_type,
+			     trie->lock_method, SQUAT_TRIE_LOCK_TIMEOUT,
+			     &trie->file_lock);
+	if (ret <= 0)
+		return ret;
+
+	trie->lock_count++;
+	trie->lock_type = lock_type;
+	return 1;
+}
+
+void squat_trie_unlock(struct squat_trie *trie)
+{
+	i_assert(trie->lock_count > 0);
+
+	if (--trie->lock_count > 0)
+		return;
+
+	file_unlock(&trie->file_lock);
+}
+
 static struct trie_node *
 node_alloc(uint16_t chr, unsigned int level)
 {
@@ -528,9 +629,11 @@
 }
 
 static int
-trie_insert_node(struct squat_trie *trie, struct trie_node **parent,
+trie_insert_node(struct squat_trie_build_context *ctx,
+		 struct trie_node **parent,
 		 const uint16_t *data, uint32_t uid, unsigned int level)
 {
+	struct squat_trie *trie = ctx->trie;
 	struct trie_node *node = *parent;
 	uint32_t char_idx, idx_base_offset;
 	bool modified = FALSE;
@@ -540,7 +643,7 @@
 		unsigned int count;
 
 		if (node == NULL) {
-			trie->node_count++;
+			ctx->node_count++;
 			node = *parent = node_alloc(*data, level);
 			char_idx = 0;
 			count = 1;
@@ -568,7 +671,7 @@
 		unsigned int count;
 
 		if (node == NULL) {
-			trie->node_count++;
+			ctx->node_count++;
 			node = *parent = node_alloc(*data, level);
 			char_idx = 0;
 			count = 1;
@@ -609,7 +712,7 @@
 					  &children[char_idx]) < 0)
 				return -1;
 		}
-		ret = trie_insert_node(trie, &children[char_idx],
+		ret = trie_insert_node(ctx, &children[char_idx],
 				       data + 1, uid, level + 1);
 		if (ret < 0)
 			return -1;
@@ -705,28 +808,69 @@
 	return TRUE;
 }
 
-int squat_trie_add(struct squat_trie *trie, uint32_t uid,
-		   const void *data, size_t size)
+struct squat_trie_build_context *
+squat_trie_build_init(struct squat_trie *trie, uint32_t *last_uid_r)
+{
+	struct squat_trie_build_context *ctx;
+
+	ctx = i_new(struct squat_trie_build_context, 1);
+	ctx->trie = trie;
+
+	if (squat_trie_lock(trie, F_WRLCK) <= 0)
+		ctx->failed = TRUE;
+	else {
+		ctx->locked = TRUE;
+		ctx->node_count = trie->hdr->node_count;
+
+		if (squat_uidlist_get_last_uid(trie->uidlist, last_uid_r) < 0)
+			ctx->failed = TRUE;
+	}
+
+	if (ctx->failed)
+		*last_uid_r = 0;
+	return ctx;
+}
+
+int squat_trie_build_deinit(struct squat_trie_build_context *ctx)
+{
+	int ret = ctx->failed ? -1 : 0;
+
+	if (ret == 0)
+		ret = squat_trie_build_flush(ctx);
+
+	if (ctx->locked)
+		squat_trie_unlock(ctx->trie);
+
+	i_free(ctx);
+	return ret;
+}
+
+int squat_trie_build_more(struct squat_trie_build_context *ctx, uint32_t uid,
+			  const void *data, size_t size)
 {
 	const uint16_t *str;
 	uint16_t buf[(BLOCK_SIZE-1)*2];
 	unsigned int i, tmp_size;
 
+	if (ctx->failed)
+		return -1;
+
 	t_push();
-	str = data_normalize(data, size, trie->buf);
+	str = data_normalize(data, size, ctx->trie->buf);
 
-	if (uid == trie->prev_uid) {
+	if (uid == ctx->prev_uid) {
 		/* @UNSAFE: continue from last block */
-		memcpy(buf, trie->prev_added,
-		       sizeof(buf[0]) * trie->prev_added_size);
+		memcpy(buf, ctx->prev_added,
+		       sizeof(buf[0]) * ctx->prev_added_size);
 		tmp_size = I_MIN(size, BLOCK_SIZE-1);
-		memcpy(buf + trie->prev_added_size, str,
+		memcpy(buf + ctx->prev_added_size, str,
 		       sizeof(buf[0]) * tmp_size);
 
-		tmp_size += trie->prev_added_size;
+		tmp_size += ctx->prev_added_size;
 		for (i = 0; i + BLOCK_SIZE <= tmp_size; i++) {
 			if (block_want_add(buf+i)) {
-				if (trie_insert_node(trie, &trie->root,
+				if (trie_insert_node(ctx,
+						     &ctx->trie->root,
 						     buf + i, uid, 1) < 0) {
 					t_pop();
 					return -1;
@@ -735,9 +879,9 @@
 		}
 
 		if (size < BLOCK_SIZE) {
-			trie->prev_added_size = I_MIN(tmp_size, BLOCK_SIZE-1);
-			memcpy(trie->prev_added, buf + i,
-			       sizeof(buf[0]) * trie->prev_added_size);
+			ctx->prev_added_size = I_MIN(tmp_size, BLOCK_SIZE-1);
+			memcpy(ctx->prev_added, buf + i,
+			       sizeof(buf[0]) * ctx->prev_added_size);
 			t_pop();
 			return 0;
 		}
@@ -745,7 +889,7 @@
 
 	for (i = 0; i + BLOCK_SIZE <= size; i++) {
 		if (block_want_add(str+i)) {
-			if (trie_insert_node(trie, &trie->root,
+			if (trie_insert_node(ctx, &ctx->trie->root,
 					     str + i, uid, 1) < 0) {
 				t_pop();
 				return -1;
@@ -753,9 +897,9 @@
 		}
 	}
 
-	trie->prev_added_size = I_MIN(size, BLOCK_SIZE-1);
-	memcpy(trie->prev_added, str + i,
-	       sizeof(trie->prev_added[0]) * trie->prev_added_size);
+	ctx->prev_added_size = I_MIN(size, BLOCK_SIZE-1);
+	memcpy(ctx->prev_added, str + i,
+	       sizeof(ctx->prev_added[0]) * ctx->prev_added_size);
 	t_pop();
 	return 0;
 }
@@ -815,31 +959,32 @@
 	return 0;
 }
 
-static void node_pack_leaf(struct squat_trie *trie, struct trie_node *node)
+static void node_pack_leaf(buffer_t *buf, struct trie_node *node)
 {
 	uint8_t *chars8 = NODE_CHARS8(node);
 	uint16_t *chars16 = NODE_CHARS16(node);
 	uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node);
 	uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node);
 
-	buffer_set_used_size(trie->buf, 0);
-	_squat_trie_pack_num(trie->buf, (node->chars_8bit_count << 1) |
+	buffer_set_used_size(buf, 0);
+	_squat_trie_pack_num(buf, (node->chars_8bit_count << 1) |
 			     (node->chars_16bit_count > 0 ? 1 : 0));
-	buffer_append(trie->buf, chars8, node->chars_8bit_count);
-	buffer_append(trie->buf, idx8, sizeof(*idx8) * node->chars_8bit_count);
+	buffer_append(buf, chars8, node->chars_8bit_count);
+	buffer_append(buf, idx8, sizeof(*idx8) * node->chars_8bit_count);
 
 	if (node->chars_16bit_count > 0) {
-		_squat_trie_pack_num(trie->buf, node->chars_16bit_count);
-		buffer_append(trie->buf, chars16,
+		_squat_trie_pack_num(buf, node->chars_16bit_count);
+		buffer_append(buf, chars16,
 			      sizeof(*chars16) * node->chars_16bit_count);
-		buffer_append(trie->buf, idx16,
+		buffer_append(buf, idx16,
 			      sizeof(*idx16) * node->chars_16bit_count);
 	}
 }
 
 static int
-trie_write_node_children(struct squat_trie *trie, unsigned int level,
-			 struct trie_node **children, unsigned int count)
+trie_write_node_children(struct squat_trie_build_context *ctx,
+			 unsigned int level, struct trie_node **children,
+			 unsigned int count)
 {
 	unsigned int i;
 	size_t child_idx;
@@ -847,63 +992,66 @@
 	for (i = 0; i < count; i++) {
 		child_idx = POINTER_CAST_TO(children[i], size_t);
 		if ((child_idx & 1) == 0) {
-			if (trie_write_node(trie, level, children[i]) < 0)
+			if (trie_write_node(ctx, level, children[i]) < 0)
 				return -1;
 		}
 	}
 	return 0;
 }
 
-static int trie_write_node(struct squat_trie *trie, unsigned int level,
-			   struct trie_node *node)
+static int trie_write_node(struct squat_trie_build_context *ctx,
+			   unsigned int level, struct trie_node *node)
 {
+	struct squat_trie *trie = ctx->trie;
 	uoff_t offset;
 
 	if (level < BLOCK_SIZE) {
 		struct trie_node **children8 = NODE_CHILDREN8(node);
 		struct trie_node **children16 = NODE_CHILDREN16(node);
 
-		trie_write_node_children(trie, level + 1,
+		trie_write_node_children(ctx, level + 1,
 					 children8, node->chars_8bit_count);
-		trie_write_node_children(trie, level + 1,
+		trie_write_node_children(ctx, level + 1,
 					 children16, node->chars_16bit_count);
 		node_pack(trie->buf, node);
 	} else {
 		if (node_leaf_finish(trie, node) < 0)
 			return -1;
 
-		node_pack_leaf(trie, node);
+		node_pack_leaf(trie->buf, node);
 	}
 
-	offset = trie->output->offset;
+	offset = ctx->output->offset;
 	if ((offset & 1) != 0) {
-		o_stream_send(trie->output, "", 1);
+		o_stream_send(ctx->output, "", 1);
 		offset++;
 	}
 
 	if (node->resized && node->orig_size != trie->buf->used) {
 		/* append to end of file. the parent node is written later. */
 		node->file_offset = offset;
-		o_stream_send(trie->output, trie->buf->data, trie->buf->used);
+		o_stream_send(ctx->output, trie->buf->data, trie->buf->used);
 
-		trie->deleted_space += node->orig_size;
+		ctx->deleted_space += node->orig_size;
 	} else if (node->modified) {
 		/* overwrite node's contents */
 		i_assert(node->file_offset != 0);
 		i_assert(trie->buf->used <= node->orig_size);
 
 		/* FIXME: write only the indexes if !node->resized */
-		o_stream_seek(trie->output, node->file_offset);
-		o_stream_send(trie->output, trie->buf->data, trie->buf->used);
-		o_stream_seek(trie->output, offset);
+		o_stream_seek(ctx->output, node->file_offset);
+		o_stream_send(ctx->output, trie->buf->data, trie->buf->used);
+		o_stream_seek(ctx->output, offset);
 
-		trie->deleted_space += trie->buf->used - node->orig_size;
+		ctx->deleted_space += trie->buf->used - node->orig_size;
 	}
 	return 0;
 }
 
-static int trie_nodes_write(struct squat_trie *trie, uint32_t *uidvalidity_r)
+static int
+trie_nodes_write(struct squat_trie_build_context *ctx, uint32_t *uidvalidity_r)
 {
+	struct squat_trie *trie = ctx->trie;
 	struct squat_trie_header hdr;
 
 	if (trie->fd == -1) {
@@ -915,8 +1063,8 @@
 		}
 
 		memset(&hdr, 0, sizeof(hdr));
+		hdr.version = SQUAT_TRIE_VERSION;
 		hdr.uidvalidity = 0; // FIXME
-		hdr.header_size = sizeof(hdr);
 	} else {
 		hdr = *trie->hdr;
 		if (lseek(trie->fd, hdr.used_file_size, SEEK_SET) < 0) {
@@ -925,23 +1073,23 @@
 		}
 	}
 
-	trie->output = o_stream_create_file(trie->fd, default_pool, 0, FALSE);
+	ctx->output = o_stream_create_file(trie->fd, default_pool, 0, FALSE);
 	if (hdr.used_file_size == 0)
-		o_stream_send(trie->output, &hdr, sizeof(hdr));
+		o_stream_send(ctx->output, &hdr, sizeof(hdr));
 
-	trie->deleted_space = 0;
-	if (trie_write_node(trie, 1, trie->root) < 0)
+	ctx->deleted_space = 0;
+	if (trie_write_node(ctx, 1, trie->root) < 0)
 		return -1;
 
 	/* update the header */
 	hdr.root_offset = trie->root->file_offset;
-	hdr.used_file_size = trie->output->offset;
-	hdr.deleted_space += trie->deleted_space;
-	hdr.node_count = trie->node_count;
-	o_stream_seek(trie->output, 0);
-	o_stream_send(trie->output, &hdr, sizeof(hdr));
+	hdr.used_file_size = ctx->output->offset;
+	hdr.deleted_space += ctx->deleted_space;
+	hdr.node_count = ctx->node_count;
+	o_stream_seek(ctx->output, 0);
+	o_stream_send(ctx->output, &hdr, sizeof(hdr));
 
-	o_stream_destroy(&trie->output);
+	o_stream_destroy(&ctx->output);
 	*uidvalidity_r = hdr.uidvalidity;
 	return 0;
 }
@@ -963,8 +1111,9 @@
 					   current_message_count);
 }
 
-int squat_trie_flush(struct squat_trie *trie)
+static int squat_trie_build_flush(struct squat_trie_build_context *ctx)
 {
+	struct squat_trie *trie = ctx->trie;
 	uint32_t uidvalidity;
 
 	if (trie->root == NULL) {
@@ -972,7 +1121,7 @@
 		return 0;
 	}
 
-	if (trie_nodes_write(trie, &uidvalidity) < 0)
+	if (trie_nodes_write(ctx, &uidvalidity) < 0)
 		return -1;
 	if (squat_uidlist_flush(trie->uidlist, uidvalidity) < 0)
 		return -1;
@@ -1031,7 +1180,7 @@
 }
 
 static int
-squat_trie_compress_children(struct squat_trie *trie,
+squat_trie_compress_children(struct squat_trie_compress_context *ctx,
 			     struct trie_node **children, unsigned int count,
 			     unsigned int level)
 {
@@ -1046,10 +1195,10 @@
 		i_assert((child_idx & 1) != 0);
 		child_idx &= ~1;
 
-		if (trie_map_node(trie, child_idx, level, &child_node) < 0)
+		if (trie_map_node(ctx->trie, child_idx, level, &child_node) < 0)
 			return -1;
 
-		ret = squat_trie_compress_node(trie, child_node, level);
+		ret = squat_trie_compress_node(ctx, child_node, level);
 		if (child_node->file_offset != 0)
 			children[i] = POINTER_CAST(child_node->file_offset | 1);
 		else {
@@ -1064,8 +1213,9 @@
 	return need_char_compress ? 0 : 1;
 }
 
-static int squat_trie_compress_leaf_uidlist(struct squat_trie *trie,
-					    struct trie_node *node)
+static int
+squat_trie_compress_leaf_uidlist(struct squat_trie_compress_context *ctx,
+				 struct trie_node *node)
 {
 	uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node);
 	uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node);
@@ -1074,7 +1224,7 @@
 	bool compress_chars = FALSE;
 
 	for (i = 0; i < node->chars_8bit_count; i++) {
-		ret = squat_uidlist_compress_next(trie->compress_ctx, &idx8[i]);
+		ret = squat_uidlist_compress_next(ctx->uidlist_ctx, &idx8[i]);
 		if (ret < 0)
 			return -1;
 		if (ret == 0) {
@@ -1087,8 +1237,7 @@
 		compress_chars = FALSE;
 	}
 	for (i = 0; i < node->chars_16bit_count; i++) {
-		ret = squat_uidlist_compress_next(trie->compress_ctx,
-						  &idx16[i]);
+		ret = squat_uidlist_compress_next(ctx->uidlist_ctx, &idx16[i]);
 		if (ret < 0)
 			return -1;
 		if (ret == 0) {
@@ -1104,34 +1253,35 @@
 }
 
 static int
-squat_trie_compress_node(struct squat_trie *trie, struct trie_node *node,
-			 unsigned int level)
+squat_trie_compress_node(struct squat_trie_compress_context *ctx,
+			 struct trie_node *node, unsigned int level)
 {
+	struct squat_trie *trie = ctx->trie;
 	int ret;
 
 	if (level == BLOCK_SIZE) {
-		if (squat_trie_compress_leaf_uidlist(trie, node))
+		if (squat_trie_compress_leaf_uidlist(ctx, node))
 			return -1;
 
 		if (node->chars_8bit_count == 0 &&
 		    node->chars_16bit_count == 0) {
 			/* everything expunged */
-			trie->node_count--;
+			ctx->node_count--;
 			node->file_offset = 0;
 			return 0;
 		}
-		node_pack_leaf(trie, node);
+		node_pack_leaf(trie->buf, node);
 	} else {
 		struct trie_node **children8 = NODE_CHILDREN8(node);
 		struct trie_node **children16 = NODE_CHILDREN16(node);
 
-		if ((ret = squat_trie_compress_children(trie, children8,
+		if ((ret = squat_trie_compress_children(ctx, children8,
 							node->chars_8bit_count,
 							level + 1)) < 0)
 			return -1;
 		if (ret == 0)
 			squat_trie_compress_chars8(node);
-		if ((ret = squat_trie_compress_children(trie, children16,
+		if ((ret = squat_trie_compress_children(ctx, children16,
 							node->chars_16bit_count,
 							level + 1)) < 0)
 			return -1;
@@ -1141,80 +1291,104 @@
 		if (node->chars_8bit_count == 0 &&
 		    node->chars_16bit_count == 0) {
 			/* everything expunged */
-			trie->node_count--;
+			ctx->node_count--;
 			node->file_offset = 0;
 			return 0;
 		}
 		node_pack(trie->buf, node);
 	}
 
-	if ((trie->output->offset & 1) != 0)
-		o_stream_send(trie->output, "", 1);
-	node->file_offset = trie->output->offset;
+	if ((ctx->output->offset & 1) != 0)
+		o_stream_send(ctx->output, "", 1);
+	node->file_offset = ctx->output->offset;
 
-	o_stream_send(trie->output, trie->buf->data, trie->buf->used);
+	o_stream_send(ctx->output, trie->buf->data, trie->buf->used);
 	return 0;
 }
 
-int squat_trie_compress(struct squat_trie *trie,
-			const ARRAY_TYPE(seq_range) *existing_uids)
+static int squat_trie_compress_init(struct squat_trie_compress_context *ctx,
+				    struct squat_trie *trie)
 {
-	struct trie_node *node;
 	struct squat_trie_header hdr;
-	const char *tmp_path;
-	int fd, ret;
-
-	if (trie_map_node(trie, trie->hdr->root_offset, 1, &node) < 0)
-		return -1;
+	int fd;
 
-	tmp_path = t_strconcat(trie->filepath, ".tmp", NULL);
-	fd = open(tmp_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
+	ctx->tmp_path = t_strconcat(trie->filepath, ".tmp", NULL);
+	fd = open(ctx->tmp_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
 	if (fd == -1) {
-		i_error("open(%s, O_CREAT) failed: %m", tmp_path);
-		i_free(node);
+		i_error("open(%s, O_CREAT) failed: %m", ctx->tmp_path);
 		return -1;
 	}
 
-	i_assert(trie->output == NULL);
-	trie->output = o_stream_create_file(fd, default_pool, 0, TRUE);
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->trie = trie;
+	ctx->output = o_stream_create_file(fd, default_pool, 0, TRUE);
+	ctx->node_count = trie->hdr->node_count;
 
 	/* write a dummy header first */
 	memset(&hdr, 0, sizeof(hdr));
-	o_stream_send(trie->output, &hdr, sizeof(hdr));
+	o_stream_send(ctx->output, &hdr, sizeof(hdr));
+	return 0;
+}
 
-	/* do the compression */
-	trie->compress_ctx =
-		squat_uidlist_compress_begin(trie->uidlist, existing_uids);
-	ret = squat_trie_compress_node(trie, node, 1);
+static void
+squat_trie_compress_write_header(struct squat_trie_compress_context *ctx,
+				 struct trie_node *root_node)
+{
+	struct squat_trie_header hdr;
 
-	/* write the header */
-	hdr.uidvalidity = trie->hdr->uidvalidity;
-	hdr.header_size = sizeof(hdr);
-	hdr.root_offset = node->file_offset;
-	hdr.used_file_size = trie->output->offset;
-	hdr.node_count = trie->node_count;
-	o_stream_seek(trie->output, 0);
-	o_stream_send(trie->output, &hdr, sizeof(hdr));
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.version = SQUAT_TRIE_VERSION;
+	hdr.uidvalidity = ctx->trie->hdr->uidvalidity;
+	hdr.root_offset = root_node->file_offset;
+	hdr.used_file_size = ctx->output->offset;
+	hdr.node_count = ctx->node_count;
 
-	i_free(node);
+	o_stream_seek(ctx->output, 0);
+	o_stream_send(ctx->output, &hdr, sizeof(hdr));
+}
 
-	/* finish the compression */
-	if (ret == 0)
-		ret = squat_uidlist_compress_commit(&trie->compress_ctx);
-	else
-		squat_uidlist_compress_rollback(&trie->compress_ctx);
+int squat_trie_compress(struct squat_trie *trie,
+			const ARRAY_TYPE(seq_range) *existing_uids)
+{
+	struct squat_trie_compress_context ctx;
+	struct trie_node *node;
+	int ret;
+
+	if (squat_trie_lock(trie, F_WRLCK) <= 0)
+		return -1;
+
+	if (squat_trie_compress_init(&ctx, trie) < 0) {
+		squat_trie_unlock(trie);
+		return -1;
+	}
 
+	ret = trie_map_node(trie, trie->hdr->root_offset, 1, &node);
 	if (ret == 0) {
-		if (rename(tmp_path, trie->filepath) < 0) {
+		/* do the compression */
+		ctx.uidlist_ctx = squat_uidlist_compress_begin(trie->uidlist,
+							       existing_uids);
+		if ((ret = squat_trie_compress_node(&ctx, node, 1)) < 0)
+			squat_uidlist_compress_rollback(&ctx.uidlist_ctx);
+		else {
+			ret = squat_uidlist_compress_commit(&ctx.uidlist_ctx);
+
+			squat_trie_compress_write_header(&ctx, node);
+		}
+		i_free(node);
+	}
+
+	if (ret == 0) {
+		if (rename(ctx.tmp_path, trie->filepath) < 0) {
 			i_error("rename(%s, %s) failed: %m",
-				tmp_path, trie->filepath);
+				ctx.tmp_path, trie->filepath);
 			ret = -1;
 		}
 	}
-	o_stream_destroy(&trie->output);
+	o_stream_destroy(&ctx.output);
+	squat_trie_unlock(trie);
 
 	if (ret < 0)
-		(void)unlink(tmp_path);
+		(void)unlink(ctx.tmp_path);
 	else {
 		trie_close(trie);
 		if (trie_open(trie) <= 0)
@@ -1245,17 +1419,37 @@
 	return trie->mmap_size;
 }
 
-int squat_trie_lookup(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
-		      const char *str)
+static int squat_trie_lookup_init(struct squat_trie *trie, const char *str,
+				  const uint16_t **data_r, unsigned int *len_r)
 {
 	const uint16_t *data;
 	unsigned int len = strlen(str);
-	uint32_t list;
 
 	if (len < BLOCK_SIZE)
 		return -1;
 
 	data = data_normalize(str, len, trie->buf);
+
+	/* skip the blocks that can't exist */
+	while (!block_want_add(data + len - BLOCK_SIZE)) {
+		if (--len < BLOCK_SIZE)
+			return -1;
+	}
+
+	if (squat_trie_lock(trie, F_RDLCK) <= 0)
+		return -1;
+
+	*data_r = data;
+	*len_r = len;
+	return 0;
+}
+
+static int
+squat_trie_lookup_locked(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
+			 const uint16_t *data, unsigned int len)
+{
+	uint32_t list;
+
 	list = trie_lookup_node(trie, trie->root, data + len - BLOCK_SIZE, 1);
 	if (list == 0)
 		return 0;
@@ -1266,6 +1460,10 @@
 	}
 	while (len > BLOCK_SIZE) {
 		len--;
+
+		if (!block_want_add(data + len - BLOCK_SIZE))
+			continue;
+
 		list = trie_lookup_node(trie, trie->root,
 					data + len - BLOCK_SIZE, 1);
 		if (list == 0) {
@@ -1280,18 +1478,31 @@
 	return array_count(result) > 0 ? 1 : 0;
 }
 
-int squat_trie_filter(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
+int squat_trie_lookup(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
 		      const char *str)
 {
 	const uint16_t *data;
-	unsigned int len = strlen(str);
-	uint32_t list;
+	unsigned int len;
+	int ret;
 
-	if (len < BLOCK_SIZE)
+	if (squat_trie_lookup_init(trie, str, &data, &len) < 0)
 		return -1;
 
-	data = data_normalize(str, len, trie->buf);
-	while (len >= BLOCK_SIZE) {
+	ret = squat_trie_lookup_locked(trie, result, data, len);
+	squat_trie_unlock(trie);
+	return ret;
+}
+
+static int
+squat_trie_filter_locked(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
+			 const uint16_t *data, unsigned int len)
+{
+	uint32_t list;
+
+	for (; len >= BLOCK_SIZE; len--) {
+		if (!block_want_add(data + len - BLOCK_SIZE))
+			continue;
+
 		list = trie_lookup_node(trie, trie->root,
 					data + len - BLOCK_SIZE, 1);
 		if (list == 0) {
@@ -1302,11 +1513,24 @@
 			squat_trie_set_corrupted(trie, "uidlist offset broken");
 			return -1;
 		}
-		len--;
 	}
 	return array_count(result) > 0 ? 1 : 0;
 }
 
+int squat_trie_filter(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
+		      const char *str)
+{
+	const uint16_t *data;
+	unsigned int len;
+	int ret;
+
+	if (squat_trie_lookup_init(trie, str, &data, &len) < 0)
+		return -1;
+	ret = squat_trie_filter_locked(trie, result, data, len);
+	squat_trie_unlock(trie);
+	return ret;
+}
+
 struct squat_uidlist *_squat_trie_get_uidlist(struct squat_trie *trie)
 {
 	return trie->uidlist;

Index: squat-trie.h
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-squat/squat-trie.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- squat-trie.h	1 Dec 2006 21:02:11 -0000	1.1
+++ squat-trie.h	6 Dec 2006 15:45:46 -0000	1.2
@@ -1,14 +1,25 @@
 #ifndef __SQUAT_TRIE_H
 #define __SQUAT_TRIE_H
 
+enum file_lock_method;
+
 #include "seq-range-array.h"
 
-struct squat_trie *squat_trie_open(const char *path);
+struct squat_trie *
+squat_trie_open(const char *path, enum file_lock_method lock_method);
 void squat_trie_close(struct squat_trie *trie);
 
-int squat_trie_add(struct squat_trie *trie, uint32_t uid,
-		   const void *data, size_t size);
-int squat_trie_flush(struct squat_trie *trie);
+int squat_trie_get_last_uid(struct squat_trie *trie, uint32_t *last_uid_r);
+
+int squat_trie_lock(struct squat_trie *trie, int lock_type);
+void squat_trie_unlock(struct squat_trie *trie);
+
+struct squat_trie_build_context *
+squat_trie_build_init(struct squat_trie *trie, uint32_t *last_uid_r);
+int squat_trie_build_more(struct squat_trie_build_context *ctx, uint32_t uid,
+			  const void *data, size_t size);
+int squat_trie_build_deinit(struct squat_trie_build_context *ctx);
+
 int squat_trie_compress(struct squat_trie *trie,
 			const ARRAY_TYPE(seq_range) *existing_uids);
 

Index: squat-uidlist.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-squat/squat-uidlist.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- squat-uidlist.c	1 Dec 2006 21:02:11 -0000	1.1
+++ squat-uidlist.c	6 Dec 2006 15:45:46 -0000	1.2
@@ -28,7 +28,8 @@
 
 	uint32_t uid_max;
 	uint32_t uid_count;
-	uint32_t uids_expunged;
+	uint8_t uids_expunged; /* updated without locking */
+	uint8_t unused[3];
 	uint32_t node_count;
 };
 
@@ -192,6 +193,12 @@
 	i_free(uidlist);
 }
 
+int squat_uidlist_get_last_uid(struct squat_uidlist *uidlist, uint32_t *uid_r)
+{
+	*uid_r = uidlist->hdr.uid_max;
+	return 0;
+}
+
 int squat_uidlist_add(struct squat_uidlist *uidlist, uint32_t *_uid_list_idx,
 		      uint32_t uid)
 {
@@ -518,14 +525,15 @@
 int squat_uidlist_mark_having_expunges(struct squat_uidlist *uidlist,
 				       bool update_disk)
 {
+	uint8_t flag = 1;
+	size_t offset;
+
 	uidlist->check_expunges = TRUE;
 
 	if (update_disk) {
-		uidlist->hdr.uids_expunged = TRUE;
-
-		// FIXME: make sure uidlist.hdr is in updated state
-		if (pwrite_full(uidlist->fd, &uidlist->hdr,
-				sizeof(uidlist->hdr), 0) < 0) {
+		/* NOTE: we're writing this flag without locking */
+		offset = offsetof(struct squat_uidlist_header, uids_expunged);
+		if (pwrite_full(uidlist->fd, &flag, sizeof(flag), offset) < 0) {
 			squat_uidlist_set_syscall_error(uidlist,
 							"pwrite_full()");
 			return -1;

Index: squat-uidlist.h
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-squat/squat-uidlist.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- squat-uidlist.h	1 Dec 2006 21:02:11 -0000	1.1
+++ squat-uidlist.h	6 Dec 2006 15:45:46 -0000	1.2
@@ -10,6 +10,8 @@
 squat_uidlist_init(struct squat_trie *trie, const char *path);
 void squat_uidlist_deinit(struct squat_uidlist *uidlist);
 
+int squat_uidlist_get_last_uid(struct squat_uidlist *uidlist, uint32_t *uid_r);
+
 /* Add new UID to given UID list. The uid_list_idx is updated to contain the
    new list index. It must be put through _finish_list() before it's actually
    written to disk. */



More information about the dovecot-cvs mailing list