[dovecot-cvs] dovecot/src/plugins/fts-squat .cvsignore, NONE, 1.1 Makefile.am, NONE, 1.1 fts-backend-squat.c, NONE, 1.1 fts-squat-plugin.c, NONE, 1.1 fts-squat-plugin.h, NONE, 1.1 squat-test.c, NONE, 1.1 squat-trie.c, NONE, 1.1 squat-trie.h, NONE, 1.1 squat-uidlist.c, NONE, 1.1 squat-uidlist.h, NONE, 1.1
tss at dovecot.org
tss at dovecot.org
Fri Dec 1 21:02:14 UTC 2006
Update of /var/lib/cvs/dovecot/src/plugins/fts-squat
In directory talvi:/tmp/cvs-serv10351/src/plugins/fts-squat
Added Files:
.cvsignore Makefile.am fts-backend-squat.c fts-squat-plugin.c
fts-squat-plugin.h squat-test.c squat-trie.c squat-trie.h
squat-uidlist.c squat-uidlist.h
Log Message:
Added "squat" full text search indexer backend. Its name and basic ideas
originate to Cyrus's squat index, but Dovecot's implementation is completely
different and it supports incremental updates.
Still a bit broken and lacks locking, but I wanted to get this into CVS now.
--- NEW FILE: .cvsignore ---
*.la
*.lo
*.o
.deps
.libs
Makefile
Makefile.in
so_locations
squat-test
--- NEW FILE: Makefile.am ---
AM_CPPFLAGS = \
-I$(top_srcdir)/src/lib \
-I$(top_srcdir)/src/lib-mail \
-I$(top_srcdir)/src/lib-storage \
-I$(top_srcdir)/src/plugins/fts
lib02_fts_squat_plugin_la_LDFLAGS = -module -avoid-version
module_LTLIBRARIES = \
lib02_fts_squat_plugin.la
lib02_fts_squat_plugin_la_SOURCES = \
fts-squat-plugin.c \
fts-backend-squat.c \
squat-trie.c \
squat-uidlist.c
noinst_HEADERS = \
fts-squat-plugin.h \
squat-trie.h \
squat-uidlist.h
noinst_PROGRAMS = squat-test
squat_test_SOURCES = \
squat-test.c
common_objects = \
squat-trie.lo \
squat-uidlist.lo
libs = \
$(top_builddir)/src/lib-storage/register/libstorage-register.a \
$(STORAGE_LIBS) \
$(top_builddir)/src/lib-storage/libstorage.a \
$(top_builddir)/src/lib-storage/list/libstorage_list.a \
$(top_builddir)/src/lib-imap/libimap.a \
$(top_builddir)/src/lib-mail/libmail.a \
$(top_builddir)/src/lib-charset/libcharset.a \
$(top_builddir)/src/lib/liblib.a
squat_test_LDADD = \
$(common_objects) \
$(libs) \
$(LIBICONV) \
$(RAND_LIBS)
squat_test_DEPENDENCIES = $(libs) $(common_objects)
install-exec-local:
for d in imap lda; do \
$(mkdir_p) $(DESTDIR)$(moduledir)/$$d; \
rm -f $(DESTDIR)$(moduledir)/$$d/lib02_fts_squat_plugin.so; \
$(LN_S) ../lib02_fts_squat_plugin.so $(DESTDIR)$(moduledir)/$$d; \
done
--- NEW FILE: fts-backend-squat.c ---
/* Copyright (C) 2006 Timo Sirainen */
#include "lib.h"
#include "array.h"
#include "mail-storage.h"
#include "mail-search.h"
#include "squat-trie.h"
#include "fts-squat-plugin.h"
#define SQUAT_FILE_PREFIX "dovecot.index.search"
struct squat_fts_backend {
struct fts_backend backend;
struct squat_trie *trie;
uint32_t last_uid;
};
static struct fts_backend *fts_backend_squat_init(struct mailbox *box)
{
struct squat_fts_backend *backend;
struct mail_storage *storage;
const char *path;
storage = mailbox_get_storage(box);
path = mail_storage_get_mailbox_index_dir(storage,
mailbox_get_name(box));
if (*path == '\0') {
/* in-memory indexes */
return NULL;
}
backend = i_new(struct squat_fts_backend, 1);
backend->backend = fts_backend_squat;
backend->trie =
squat_trie_open(t_strconcat(path, "/"SQUAT_FILE_PREFIX, NULL));
return &backend->backend;
}
static void fts_backend_squat_deinit(struct fts_backend *_backend)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)_backend;
squat_trie_close(backend->trie);
i_free(backend);
}
static int fts_backend_squat_get_last_uid(struct fts_backend *_backend,
uint32_t *last_uid_r)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)_backend;
*last_uid_r = backend->last_uid;
return 0;
}
static struct fts_backend_build_context *
fts_backend_squat_build_init(struct fts_backend *_backend, uint32_t *last_uid_r)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)_backend;
struct fts_backend_build_context *ctx;
*last_uid_r = backend->last_uid;
ctx = i_new(struct fts_backend_build_context, 1);
ctx->backend = _backend;
return ctx;
}
static int
fts_backend_squat_build_more(struct fts_backend_build_context *ctx,
uint32_t uid, const unsigned char *data,
size_t size)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)ctx->backend;
i_assert(uid >= backend->last_uid);
backend->last_uid = uid;
return squat_trie_add(backend->trie, uid, data, size);
}
static int
fts_backend_squat_build_deinit(struct fts_backend_build_context *ctx)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)ctx->backend;
squat_trie_flush(backend->trie);
i_free(ctx);
return 0;
}
static void
fts_backend_squat_expunge(struct fts_backend *_backend __attr_unused__,
struct mail *mail __attr_unused__)
{
}
static int get_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids,
unsigned int *message_count_r)
{
struct mail_search_arg search_arg;
struct mailbox_transaction_context *t;
struct mail_search_context *ctx;
struct mail *mail;
unsigned int count = 0;
int ret;
memset(&search_arg, 0, sizeof(search_arg));
search_arg.type = SEARCH_ALL;
t = mailbox_transaction_begin(box, 0);
ctx = mailbox_search_init(t, NULL, &search_arg, NULL);
mail = mail_alloc(t, 0, NULL);
while (mailbox_search_next(ctx, mail) > 0) {
seq_range_array_add(uids, 0, mail->uid);
count++;
}
mail_free(&mail);
ret = mailbox_search_deinit(&ctx);
mailbox_transaction_rollback(&t);
*message_count_r = count;
return ret;
}
static void
fts_backend_squat_expunge_finish(struct fts_backend *_backend,
struct mailbox *box, bool committed)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)_backend;
ARRAY_TYPE(seq_range) uids = ARRAY_INIT;
unsigned int count;
if (!committed)
return;
t_push();
t_array_init(&uids, 128);
if (get_uids(box, &uids, &count) == 0) {
(void)squat_trie_mark_having_expunges(backend->trie, &uids,
count);
}
t_pop();
}
static int
fts_backend_squat_lookup(struct fts_backend *_backend, const char *key,
ARRAY_TYPE(seq_range) *result)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)_backend;
return squat_trie_lookup(backend->trie, result, key);
}
static int
fts_backend_squat_filter(struct fts_backend *_backend, const char *key,
ARRAY_TYPE(seq_range) *result)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)_backend;
return squat_trie_filter(backend->trie, result, key);
}
struct fts_backend fts_backend_squat = {
MEMBER(name) "squat",
MEMBER(definite_lookups) FALSE,
{
fts_backend_squat_init,
fts_backend_squat_deinit,
fts_backend_squat_get_last_uid,
fts_backend_squat_build_init,
fts_backend_squat_build_more,
fts_backend_squat_build_deinit,
fts_backend_squat_expunge,
fts_backend_squat_expunge_finish,
fts_backend_squat_lookup,
fts_backend_squat_filter
}
};
--- NEW FILE: fts-squat-plugin.c ---
/* Copyright (C) 2006 Timo Sirainen */
#include "lib.h"
#include "fts-squat-plugin.h"
void fts_squat_plugin_init(void)
{
fts_backend_register(&fts_backend_squat);
}
void fts_squat_plugin_deinit(void)
{
fts_backend_unregister(fts_backend_squat.name);
}
--- NEW FILE: fts-squat-plugin.h ---
#ifndef __FTS_SQUAT_PLUGIN_H
#define __FTS_SQUAT_PLUGIN_H
#include "fts-api-private.h"
extern struct fts_backend fts_backend_squat;
void fts_squat_plugin_init(void);
void fts_squat_plugin_deinit(void);
#endif
--- NEW FILE: squat-test.c ---
/* Copyright (C) 2006 Timo Sirainen */
#include "lib.h"
#include "array.h"
#include "istream.h"
#include "squat-trie.h"
#include "squat-uidlist.h"
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <time.h>
#include <sys/time.h>
static void result_print(ARRAY_TYPE(seq_range) *result)
{
const struct seq_range *range;
unsigned int i, count;
range = array_get(result, &count);
for (i = 0; i < count; i++) {
if (i != 0)
printf(",");
printf("%u", range[i].seq1);
if (range[i].seq1 != range[i].seq2)
printf("-%u", range[i].seq2);
}
printf("\n");
}
int main(int argc __attr_unused__, char *argv[])
{
struct squat_trie *trie;
struct istream *input;
ARRAY_TYPE(seq_range) result;
char *line, *str, buf[4096];
int fd;
ssize_t ret;
unsigned int last = 0, seq = 0, leaves, uid_lists_mem, uid_lists_count;
size_t mem;
clock_t clock_start, clock_end;
struct timeval tv_start, tv_end;
double cputime;
lib_init();
(void)unlink("/tmp/squat-test-index.search");
(void)unlink("/tmp/squat-test-index.search.uids");
trie = squat_trie_open("/tmp/squat-test-index.search");
clock_start = clock();
gettimeofday(&tv_start, NULL);
fd = open(argv[1], O_RDONLY);
if (fd == -1)
return 1;
input = i_stream_create_file(fd, default_pool, 0, FALSE);
while ((line = i_stream_read_next_line(input)) != NULL) {
if (last != input->v_offset/(1024*100)) {
fprintf(stderr, "\r%ukB", (unsigned)(input->v_offset/1024));
fflush(stderr);
last = input->v_offset/(1024*100);
}
if (strncmp(line, "From ", 5) == 0) {
seq++;
continue;
}
if (squat_trie_add(trie, seq, line, strlen(line)) < 0)
break;
}
squat_trie_flush(trie);
clock_end = clock();
gettimeofday(&tv_end, NULL);
cputime = (double)(clock_end - clock_start) / CLOCKS_PER_SEC;
fprintf(stderr, "\n - Index time: %.2f CPU seconds, "
"%.2f real seconds (%.02fMB/CPUs)\n", cputime,
(tv_end.tv_sec - tv_start.tv_sec) +
(tv_end.tv_usec - tv_start.tv_usec)/1000000.0,
input->v_offset / cputime / (1024*1024));
mem = squat_trie_mem_used(trie, &leaves);
uid_lists_mem = squat_uidlist_mem_used(_squat_trie_get_uidlist(trie),
&uid_lists_count);
fprintf(stderr, " - %u bytes in %u nodes (%.02f%%)\n"
" - %u bytes in %u uid_lists (%.02f%%)\n"
" - %u bytes total of %"PRIuUOFF_T" (%.02f%%)\n",
(unsigned)mem, leaves, mem / (float)input->v_offset * 100.0,
uid_lists_mem, uid_lists_count,
uid_lists_mem / (float)input->v_offset * 100.0,
(unsigned)(uid_lists_mem + mem), input->v_offset,
(uid_lists_mem + mem) / (float)input->v_offset * 100.0);
i_stream_unref(&input);
close(fd);
i_array_init(&result, 128);
while ((str = fgets(buf, sizeof(buf), stdin)) != NULL) {
ret = strlen(str)-1;
str[ret] = 0;
array_clear(&result);
gettimeofday(&tv_start, NULL);
if (!squat_trie_lookup(trie, &result, str))
printf("No matches\n");
else {
gettimeofday(&tv_end, NULL);
printf(" - Search took %.05f CPU seconds\n",
(tv_end.tv_sec - tv_start.tv_sec) +
(tv_end.tv_usec - tv_start.tv_usec)/1000000.0);
result_print(&result);
}
}
return 0;
}
--- NEW FILE: squat-trie.c ---
/* Copyright (C) 2006 Timo Sirainen */
#include "lib.h"
#include "array.h"
#include "bsearch-insert-pos.h"
#include "istream.h"
#include "ostream.h"
#include "mmap-util.h"
#include "squat-uidlist.h"
#include "squat-trie.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
/* for non-x86 use memcpy() when accessing unaligned int* addresses */
#if defined(__i386__) || defined(__x86_64__)
[...1274 lines suppressed...]
while (len >= BLOCK_SIZE) {
list = trie_lookup_node(trie, trie->root,
data + len - BLOCK_SIZE, 1);
if (list == 0) {
array_clear(result);
return 0;
}
if (squat_uidlist_filter(trie->uidlist, list, result) < 0) {
squat_trie_set_corrupted(trie, "uidlist offset broken");
return -1;
}
len--;
}
return array_count(result) > 0 ? 1 : 0;
}
struct squat_uidlist *_squat_trie_get_uidlist(struct squat_trie *trie)
{
return trie->uidlist;
}
--- NEW FILE: squat-trie.h ---
#ifndef __SQUAT_TRIE_H
#define __SQUAT_TRIE_H
#include "seq-range-array.h"
struct squat_trie *squat_trie_open(const char *path);
void squat_trie_close(struct squat_trie *trie);
int squat_trie_add(struct squat_trie *trie, uint32_t uid,
const void *data, size_t size);
int squat_trie_flush(struct squat_trie *trie);
int squat_trie_compress(struct squat_trie *trie,
const ARRAY_TYPE(seq_range) *existing_uids);
int squat_trie_mark_having_expunges(struct squat_trie *trie,
const ARRAY_TYPE(seq_range) *existing_uids,
unsigned int current_message_count);
int squat_trie_lookup(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
const char *str);
int squat_trie_filter(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
const char *str);
size_t squat_trie_mem_used(struct squat_trie *trie, unsigned int *count_r);
struct squat_uidlist *_squat_trie_get_uidlist(struct squat_trie *trie);
void _squat_trie_pack_num(buffer_t *buffer, uint32_t num);
uint32_t _squat_trie_unpack_num(const uint8_t **p, const uint8_t *end);
void squat_trie_set_corrupted(struct squat_trie *trie, const char *reason);
#endif
--- NEW FILE: squat-uidlist.c ---
/* Copyright (C) 2006 Timo Sirainen */
#include "lib.h"
#include "array.h"
#include "ostream.h"
#include "mmap-util.h"
#include "write-full.h"
#include "squat-trie.h"
#include "squat-uidlist.h"
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#define UIDLIST_COMPRESS_PERCENTAGE 30
#define UIDLIST_UID_COMPRESS_PERCENTAGE 20
#define UIDLIST_COMPRESS_MIN_SIZE (1024*8)
#define UID_NODE_PREV_FLAG_OLD 0x00000001
#define UID_LIST_IDX_FLAG_SINGLE 0x80000000
struct squat_uidlist_header {
uint32_t uidvalidity; // FIXME
uint32_t header_size;
uint32_t used_file_size;
uint32_t deleted_space;
uint32_t uid_max;
uint32_t uid_count;
uint32_t uids_expunged;
uint32_t node_count;
};
struct uid_node {
struct uid_node *prev;
uint32_t uid;
};
struct squat_uidlist_get_context {
struct squat_uidlist *uidlist;
ARRAY_TYPE(seq_range) *result;
uint32_t filter_pos;
};
struct squat_uidlist {
struct squat_trie *trie;
char *filepath;
int fd;
struct ostream *output;
void *mmap_base;
size_t mmap_size;
struct squat_uidlist_header hdr;
ARRAY_DEFINE(lists, struct uid_node);
uint32_t first_new_list_idx;
pool_t node_pool;
buffer_t *tmp_buf, *list_buf;
unsigned int check_expunges:1;
unsigned int write_failed:1;
};
struct squat_uidlist_compress_ctx {
struct squat_uidlist *uidlist;
const ARRAY_TYPE(seq_range) *existing_uids;
struct ostream *output;
char *tmp_path;
pool_t node_pool;
struct uid_node *last_node;
ARRAY_TYPE(seq_range) seen_uids;
struct squat_uidlist_header hdr;
unsigned int seen_expunged:1;
unsigned int failed:1;
};
static void
squat_uidlist_set_syscall_error(struct squat_uidlist *uidlist,
const char *function)
{
i_error("%s failed with index search uidlist file %s: %m",
function, uidlist->filepath);
}
static int squat_uidlist_map(struct squat_uidlist *uidlist)
{
struct stat st;
if (fstat(uidlist->fd, &st) < 0) {
squat_uidlist_set_syscall_error(uidlist, "fstat()");
return -1;
}
if (st.st_size <= sizeof(uidlist->hdr)) {
memset(&uidlist->hdr, 0, sizeof(uidlist->hdr));
uidlist->hdr.header_size = sizeof(uidlist->hdr);
uidlist->hdr.used_file_size = sizeof(uidlist->hdr);
return 0;
}
if (uidlist->mmap_base != NULL) {
if (munmap(uidlist->mmap_base, uidlist->mmap_size) < 0)
squat_uidlist_set_syscall_error(uidlist, "munmap()");
}
uidlist->mmap_size = st.st_size;
uidlist->mmap_base =
mmap(NULL, uidlist->mmap_size, PROT_READ | PROT_WRITE,
MAP_SHARED, uidlist->fd, 0);
if (uidlist->mmap_base == MAP_FAILED) {
uidlist->mmap_size = 0;
uidlist->mmap_base = NULL;
squat_uidlist_set_syscall_error(uidlist, "mmap()");
return -1;
}
memcpy(&uidlist->hdr, uidlist->mmap_base, sizeof(uidlist->hdr));
// FIXME: verify header
if (uidlist->hdr.uids_expunged)
uidlist->check_expunges = TRUE;
uidlist->first_new_list_idx = uidlist->mmap_size;
return 1;
}
static int squat_uidlist_open(struct squat_uidlist *uidlist)
{
i_assert(uidlist->fd == -1);
uidlist->fd = open(uidlist->filepath, O_RDWR | O_CREAT, 0600);
if (uidlist->fd == -1) {
squat_uidlist_set_syscall_error(uidlist, "open()");
return -1;
}
return squat_uidlist_map(uidlist);
}
static void squat_uidlist_close(struct squat_uidlist *uidlist)
{
if (uidlist->mmap_base != NULL) {
if (munmap(uidlist->mmap_base, uidlist->mmap_size) < 0)
squat_uidlist_set_syscall_error(uidlist, "munmap()");
uidlist->mmap_base = NULL;
}
uidlist->mmap_size = 0;
if (uidlist->fd != -1) {
if (close(uidlist->fd) < 0)
squat_uidlist_set_syscall_error(uidlist, "close()");
uidlist->fd = -1;
}
}
struct squat_uidlist *
squat_uidlist_init(struct squat_trie *trie, const char *path)
{
struct squat_uidlist *uidlist;
uidlist = i_new(struct squat_uidlist, 1);
uidlist->trie = trie;
uidlist->filepath = i_strdup(path);
uidlist->fd = -1;
uidlist->first_new_list_idx = 1;
i_array_init(&uidlist->lists, 65536);
uidlist->node_pool =
pool_alloconly_create("squat uidlist node pool", 65536);
uidlist->tmp_buf = buffer_create_dynamic(default_pool, 16);
uidlist->list_buf = buffer_create_dynamic(default_pool, 256);
(void)squat_uidlist_open(uidlist);
return uidlist;
}
void squat_uidlist_deinit(struct squat_uidlist *uidlist)
{
squat_uidlist_close(uidlist);
pool_unref(uidlist->node_pool);
array_free(&uidlist->lists);
buffer_free(uidlist->tmp_buf);
buffer_free(uidlist->list_buf);
i_free(uidlist);
}
int squat_uidlist_add(struct squat_uidlist *uidlist, uint32_t *_uid_list_idx,
uint32_t uid)
{
uint32_t uid_list_idx = *_uid_list_idx;
struct uid_node *node, *old_node;
i_assert(uid >= uidlist->hdr.uid_max);
if (uid_list_idx == 0) {
*_uid_list_idx = uid | UID_LIST_IDX_FLAG_SINGLE;
return 0;
}
if (uid > uidlist->hdr.uid_max) {
uidlist->hdr.uid_max = uid;
uidlist->hdr.uid_count++;
}
if (uid_list_idx < uidlist->first_new_list_idx) {
/* continue an existing list in the uidlist file */
old_node = POINTER_CAST((uid_list_idx << 1) |
UID_NODE_PREV_FLAG_OLD);
uid_list_idx = uidlist->first_new_list_idx +
array_count(&uidlist->lists);
node = array_append_space(&uidlist->lists);
uidlist->hdr.node_count++;
} else if ((uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
uint32_t old_uid = uid_list_idx & ~UID_LIST_IDX_FLAG_SINGLE;
if (uid == old_uid) {
/* trying to add the same uid again */
return 0;
}
/* convert single UID to a list */
old_node = p_new(uidlist->node_pool, struct uid_node, 1);
old_node->uid = old_uid;
uid_list_idx = uidlist->first_new_list_idx +
array_count(&uidlist->lists);
node = array_append_space(&uidlist->lists);
uidlist->hdr.node_count++;
} else {
/* update an in-memory list */
uint32_t arr_idx = uid_list_idx - uidlist->first_new_list_idx;
if (arr_idx >= array_count(&uidlist->lists)) {
/* broken */
squat_trie_set_corrupted(uidlist->trie,
"corrupted uidlist index (adding)");
return -1;
}
node = array_idx_modifiable(&uidlist->lists, arr_idx);
if (node->uid == uid) {
/* trying to add the same uid again */
return 0;
}
old_node = p_new(uidlist->node_pool, struct uid_node, 1);
*old_node = *node;
}
node->prev = old_node;
node->uid = uid;
*_uid_list_idx = uid_list_idx;
return 0;
}
static int
squat_uidlist_copy_existing(struct squat_uidlist *uidlist, size_t offset,
uint32_t *prev_uid_r, uint32_t *written_uid_r)
{
const uint8_t *data, *data_start, *end, *p = NULL;
uint32_t size, num, prev_uid, next_uid;
if (offset >= uidlist->mmap_size)
return -1;
data = CONST_PTR_OFFSET(uidlist->mmap_base, offset);
end = CONST_PTR_OFFSET(uidlist->mmap_base, uidlist->mmap_size);
size = _squat_trie_unpack_num(&data, end);
if (data + size > end)
return -1;
data_start = data;
end = data + size;
prev_uid = next_uid = _squat_trie_unpack_num(&data, end);
p = data;
while (data != end) {
num = _squat_trie_unpack_num(&data, end);
next_uid = prev_uid + (num >> 1) + 1;
if ((num & 1) != 0) {
/* prev_uid..next_uid */
if (data == end) {
/* try to increase this range */
break;
}
/* beginning a new uid/range */
num = _squat_trie_unpack_num(&data, end);
next_uid += num + 1;
prev_uid = next_uid;
p = data;
}
prev_uid = next_uid;
p = data;
}
*written_uid_r = prev_uid;
*prev_uid_r = next_uid;
uidlist->hdr.deleted_space +=
(end - (const uint8_t *)uidlist->mmap_base) - offset;
buffer_append(uidlist->list_buf, data_start, p - data_start);
return 0;
}
static int
squat_uidlist_write_range(struct squat_uidlist *uidlist,
const struct uid_node *node,
uint32_t *prev_uid_r, uint32_t *written_uid_r,
int level)
{
buffer_t *buffer = uidlist->list_buf;
uint32_t written_uid, prev_uid;
uint32_t prev_idx = POINTER_CAST_TO(node->prev, uint32_t);
*prev_uid_r = node->uid;
if (node->prev == NULL) {
/* first UID */
_squat_trie_pack_num(buffer, node->uid);
} else {
if ((prev_idx & UID_NODE_PREV_FLAG_OLD) != 0) {
prev_idx >>= 1;
if (squat_uidlist_copy_existing(uidlist, prev_idx,
&prev_uid,
&written_uid) < 0 ||
prev_uid >= node->uid) {
squat_trie_set_corrupted(uidlist->trie,
"corrupted continued uidlist index");
return -1;
}
} else {
if (squat_uidlist_write_range(uidlist, node->prev,
&prev_uid, &written_uid,
level+1) < 0)
return -1;
}
/* prev_uid contains the previous node's UID.
written_uid contains the last written UID. */
if (prev_uid + 1 == node->uid) {
if (level != 0) {
/* this node continue the range */
*written_uid_r = written_uid;
return 0;
} else {
/* finishing range */
_squat_trie_pack_num(buffer, 1 |
((node->uid - written_uid - 1) << 1));
return 0;
}
}
i_assert(prev_uid < node->uid);
if (written_uid != prev_uid) {
i_assert(written_uid < prev_uid);
/* range ends at prev_uid */
_squat_trie_pack_num(buffer, 1 |
((prev_uid - written_uid - 1) << 1));
/* next uid/range */
_squat_trie_pack_num(buffer, node->uid - prev_uid - 1);
} else {
/* no range */
_squat_trie_pack_num(buffer,
((node->uid - prev_uid - 1) << 1));
}
}
*written_uid_r = node->uid;
return 0;
}
static void squat_uidlist_write_init(struct squat_uidlist *uidlist)
{
i_assert(uidlist->output == NULL);
uidlist->output = o_stream_create_file(uidlist->fd, default_pool,
0, FALSE);
if (uidlist->hdr.used_file_size < sizeof(uidlist->hdr)) {
/* creating a new file, write a dummy header */
o_stream_seek(uidlist->output, 0);
o_stream_send(uidlist->output, &uidlist->hdr,
sizeof(uidlist->hdr));
} else {
o_stream_seek(uidlist->output,
uidlist->hdr.used_file_size);
}
}
static int squat_uidlist_write_listbuf(struct squat_uidlist *uidlist,
struct ostream *output)
{
/* write size + buffer */
buffer_set_used_size(uidlist->tmp_buf, 0);
_squat_trie_pack_num(uidlist->tmp_buf, uidlist->list_buf->used);
if (o_stream_send(output, uidlist->tmp_buf->data,
uidlist->tmp_buf->used) < 0 ||
o_stream_send(output, uidlist->list_buf->data,
uidlist->list_buf->used) < 0) {
return -1;
}
return 0;
}
int squat_uidlist_finish_list(struct squat_uidlist *uidlist,
uint32_t *_uid_list_idx)
{
uint32_t uid_list_idx = *_uid_list_idx;
const struct uid_node *node;
uint32_t prev_uid, written_uid;
if ((uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
/* this is a single UID "list" */
return 0;
}
if (uid_list_idx < uidlist->first_new_list_idx) {
/* the list hasn't changed */
return 0;
}
uid_list_idx -= uidlist->first_new_list_idx;
if (uid_list_idx >= array_count(&uidlist->lists)) {
/* broken */
squat_trie_set_corrupted(uidlist->trie,
"corrupted uidlist index (finishing)");
return -1;
}
/* write the uidlist into a buffer */
node = array_idx(&uidlist->lists, uid_list_idx);
buffer_set_used_size(uidlist->list_buf, 0);
if (squat_uidlist_write_range(uidlist, node,
&prev_uid, &written_uid, 0) < 0) {
uidlist->write_failed = TRUE;
return -1;
}
if (uidlist->output == NULL)
squat_uidlist_write_init(uidlist);
/* new uidlist index is the offset in uidlist file */
*_uid_list_idx = uidlist->output->offset;
if (squat_uidlist_write_listbuf(uidlist, uidlist->output) < 0)
uidlist->write_failed = TRUE;
return 0;
}
static void squat_uidlist_write_header(struct squat_uidlist *uidlist)
{
uidlist->hdr.used_file_size = uidlist->output->offset;
o_stream_seek(uidlist->output, 0);
o_stream_send(uidlist->output, &uidlist->hdr, sizeof(uidlist->hdr));
}
int squat_uidlist_flush(struct squat_uidlist *uidlist, uint32_t uid_validity)
{
int ret = uidlist->write_failed ? -1 : 0;
if (uidlist->output != NULL) {
if (ret == 0) {
uidlist->hdr.uidvalidity = uid_validity;
squat_uidlist_write_header(uidlist);
}
o_stream_destroy(&uidlist->output);
}
array_clear(&uidlist->lists);
p_clear(uidlist->node_pool);
uidlist->write_failed = FALSE;
(void)squat_uidlist_map(uidlist);
return ret;
}
bool squat_uidlist_need_compress(struct squat_uidlist *uidlist,
unsigned int current_message_count)
{
uint32_t max_del_space, max_uid_del_count;
if (uidlist->hdr.used_file_size >= UIDLIST_COMPRESS_MIN_SIZE) {
/* see if we've reached the max. deleted space in file */
max_del_space = uidlist->hdr.used_file_size / 100 *
UIDLIST_COMPRESS_PERCENTAGE;
if (uidlist->hdr.deleted_space > max_del_space)
return TRUE;
}
if (uidlist->hdr.uid_count > current_message_count) {
if (current_message_count == 0)
return TRUE;
max_uid_del_count = uidlist->hdr.uid_count *
UIDLIST_UID_COMPRESS_PERCENTAGE / 100;
if ((uidlist->hdr.uid_count - current_message_count) >
max_uid_del_count)
return TRUE;
}
return FALSE;
}
int squat_uidlist_mark_having_expunges(struct squat_uidlist *uidlist,
bool update_disk)
{
uidlist->check_expunges = TRUE;
if (update_disk) {
uidlist->hdr.uids_expunged = TRUE;
// FIXME: make sure uidlist.hdr is in updated state
if (pwrite_full(uidlist->fd, &uidlist->hdr,
sizeof(uidlist->hdr), 0) < 0) {
squat_uidlist_set_syscall_error(uidlist,
"pwrite_full()");
return -1;
}
}
return 0;
}
struct squat_uidlist_compress_ctx *
squat_uidlist_compress_begin(struct squat_uidlist *uidlist,
const ARRAY_TYPE(seq_range) *existing_uids)
{
struct squat_uidlist_compress_ctx *ctx;
int fd;
ctx = i_new(struct squat_uidlist_compress_ctx, 1);
ctx->uidlist = uidlist;
ctx->tmp_path = i_strconcat(uidlist->filepath, ".tmp", NULL);
if (existing_uids != NULL) {
ctx->node_pool = pool_alloconly_create("compress node pool",
1024);
ctx->existing_uids = existing_uids;
i_array_init(&ctx->seen_uids,
I_MIN(128, array_count(existing_uids)));
}
fd = open(ctx->tmp_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
if (fd == -1) {
ctx->failed = TRUE;
i_error("open(%s) failed: %m", ctx->tmp_path);
} else {
ctx->output = o_stream_create_file(fd, default_pool, 0, TRUE);
o_stream_send(ctx->output, &ctx->hdr, sizeof(ctx->hdr));
}
return ctx;
}
static bool
squat_uidlist_is_expunged(struct squat_uidlist_compress_ctx *ctx, uint32_t uid)
{
if (ctx->existing_uids == NULL)
return FALSE;
return !seq_range_exists(ctx->existing_uids, uid);
}
static void
squat_uidlist_compress_add_uid(struct squat_uidlist_compress_ctx *ctx,
uint32_t uid)
{
struct uid_node *node;
if (squat_uidlist_is_expunged(ctx, uid)) {
ctx->seen_expunged = TRUE;
return;
}
if (!seq_range_exists(&ctx->seen_uids, uid)) {
if (uid > ctx->hdr.uid_max)
ctx->hdr.uid_max = uid;
ctx->hdr.uid_count++;
seq_range_array_add(&ctx->seen_uids, 0, uid);
}
node = p_new(ctx->node_pool, struct uid_node, 1);
node->prev = ctx->last_node;
node->uid = uid;
ctx->last_node = node;
}
static int
squat_uidlist_remove_expunged(struct squat_uidlist_compress_ctx *ctx,
const uint8_t *data, size_t size,
bool *all_expunged_r)
{
const uint8_t *end;
uint32_t num, prev_uid, next_uid, written_uid;
end = data + size;
p_clear(ctx->node_pool);
ctx->seen_expunged = FALSE;
ctx->last_node = NULL;
prev_uid = _squat_trie_unpack_num(&data, end);
squat_uidlist_compress_add_uid(ctx, prev_uid);
while (data != end) {
num = _squat_trie_unpack_num(&data, end);
next_uid = prev_uid + (num >> 1) + 1;
if ((num & 1) != 0) {
for (prev_uid++; prev_uid <= next_uid; prev_uid++)
squat_uidlist_compress_add_uid(ctx, prev_uid);
if (data == end)
break;
num = _squat_trie_unpack_num(&data, end);
next_uid += num + 1;
}
squat_uidlist_compress_add_uid(ctx, next_uid);
prev_uid = next_uid;
}
if (!ctx->seen_expunged) {
/* no changes */
return 0;
}
if (ctx->last_node == NULL) {
/* everything expunged */
*all_expunged_r = TRUE;
return 1;
}
/* recreate the list and write it */
buffer_set_used_size(ctx->uidlist->list_buf, 0);
if (squat_uidlist_write_range(ctx->uidlist, ctx->last_node,
&prev_uid, &written_uid, 0) < 0)
return -1;
if (squat_uidlist_write_listbuf(ctx->uidlist, ctx->output) < 0)
return -1;
*all_expunged_r = FALSE;
return 1;
}
int squat_uidlist_compress_next(struct squat_uidlist_compress_ctx *ctx,
uint32_t *uid_list_idx)
{
struct squat_uidlist *uidlist = ctx->uidlist;
const uint8_t *data, *p, *end;
uint32_t size;
int ret;
if ((*uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
uint32_t uid = *uid_list_idx & ~UID_LIST_IDX_FLAG_SINGLE;
if (ctx->uidlist->check_expunges) {
if (squat_uidlist_is_expunged(ctx, uid))
return 0;
}
return 1;
}
if (ctx->output == NULL)
return -1;
if (*uid_list_idx >= uidlist->mmap_size) {
squat_trie_set_corrupted(uidlist->trie,
"uidlist index points outside file (compressing)");
return -1;
}
data = p = CONST_PTR_OFFSET(uidlist->mmap_base, *uid_list_idx);
end = CONST_PTR_OFFSET(uidlist->mmap_base, uidlist->mmap_size);
size = _squat_trie_unpack_num(&p, end);
if (data + size > end) {
squat_trie_set_corrupted(uidlist->trie,
"corrupted uidlist index (compressing)");
return -1;
}
*uid_list_idx = ctx->output->offset;
if (!ctx->uidlist->check_expunges)
ret = 0;
else {
bool all_expunged;
ret = squat_uidlist_remove_expunged(ctx, p, size,
&all_expunged);
if (ret < 0) {
ctx->failed = TRUE;
return -1;
}
if (ret > 0 && all_expunged)
return 0;
}
if (ret == 0) {
if (o_stream_send(ctx->output, data, p - data + size) < 0) {
ctx->failed = TRUE;
return -1;
}
}
ctx->hdr.node_count++;
return 1;
}
void squat_uidlist_compress_rollback(struct squat_uidlist_compress_ctx **_ctx)
{
struct squat_uidlist_compress_ctx *ctx = *_ctx;
*_ctx = NULL;
if (ctx->node_pool != NULL)
pool_unref(ctx->node_pool);
if (array_is_created(&ctx->seen_uids))
array_free(&ctx->seen_uids);
if (ctx->output != NULL) {
if (ctx->failed)
(void)unlink(ctx->tmp_path);
o_stream_destroy(&ctx->output);
}
i_free(ctx->tmp_path);
i_free(ctx);
}
int squat_uidlist_compress_commit(struct squat_uidlist_compress_ctx **_ctx)
{
struct squat_uidlist_compress_ctx *ctx = *_ctx;
int ret = 0;
if (ctx->failed) {
squat_uidlist_compress_rollback(_ctx);
return -1;
}
/* write the header */
ctx->hdr.uidvalidity = ctx->uidlist->hdr.uidvalidity;
ctx->hdr.header_size = sizeof(ctx->hdr);
ctx->hdr.used_file_size = ctx->output->offset;
if (ctx->existing_uids == NULL) {
ctx->hdr.uid_max = ctx->uidlist->hdr.uid_max;
ctx->hdr.uid_count = ctx->uidlist->hdr.uid_count;
}
o_stream_seek(ctx->output, 0);
if (o_stream_send(ctx->output, &ctx->hdr, sizeof(ctx->hdr)) < 0)
ret = -1;
if (ret == 0) {
if (rename(ctx->tmp_path, ctx->uidlist->filepath) < 0) {
i_error("rename(%s, %s) failed: %m",
ctx->tmp_path, ctx->uidlist->filepath);
ret = -1;
} else {
/* reopen */
ctx->uidlist->check_expunges = FALSE;
squat_uidlist_close(ctx->uidlist);
(void)squat_uidlist_open(ctx->uidlist);
}
}
if (ret < 0)
ctx->failed = TRUE;
squat_uidlist_compress_rollback(_ctx);
return ret;
}
static void
squat_uidlist_get_add_uid(struct squat_uidlist_get_context *ctx, uint32_t uid)
{
if (ctx->filter_pos == 0) {
seq_range_array_add(ctx->result, 0, uid);
return;
}
for (; ctx->filter_pos < uid; ctx->filter_pos++)
seq_range_array_remove(ctx->result, ctx->filter_pos);
ctx->filter_pos++;
}
static int
squat_uidlist_get_range_list(struct squat_uidlist_get_context *ctx,
size_t offset)
{
const uint8_t *data, *end;
uint32_t size, num, prev_uid, next_uid;
if (offset >= ctx->uidlist->mmap_size)
return -1;
data = CONST_PTR_OFFSET(ctx->uidlist->mmap_base, offset);
end = CONST_PTR_OFFSET(ctx->uidlist->mmap_base,
ctx->uidlist->mmap_size);
size = _squat_trie_unpack_num(&data, end);
if (data + size > end)
return -1;
end = data + size;
prev_uid = _squat_trie_unpack_num(&data, end);
squat_uidlist_get_add_uid(ctx, prev_uid);
while (data != end) {
num = _squat_trie_unpack_num(&data, end);
next_uid = prev_uid + (num >> 1) + 1;
if ((num & 1) != 0) {
for (prev_uid++; prev_uid <= next_uid; prev_uid++)
squat_uidlist_get_add_uid(ctx, prev_uid);
if (data == end)
break;
num = _squat_trie_unpack_num(&data, end);
next_uid += num + 1;
}
squat_uidlist_get_add_uid(ctx, next_uid);
prev_uid = next_uid;
}
return 0;
}
static int
squat_uidlist_get_ctx(struct squat_uidlist_get_context *ctx,
uint32_t uid_list_idx)
{
if ((uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
uint32_t uid = uid_list_idx & ~UID_LIST_IDX_FLAG_SINGLE;
squat_uidlist_get_add_uid(ctx, uid);
return 0;
}
return squat_uidlist_get_range_list(ctx, uid_list_idx);
}
int squat_uidlist_get(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
ARRAY_TYPE(seq_range) *result)
{
struct squat_uidlist_get_context ctx;
memset(&ctx, 0, sizeof(ctx));
ctx.uidlist = uidlist;
ctx.result = result;
return squat_uidlist_get_ctx(&ctx, uid_list_idx);
}
int squat_uidlist_filter(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
ARRAY_TYPE(seq_range) *result)
{
struct squat_uidlist_get_context ctx;
const struct seq_range *range;
unsigned int count;
memset(&ctx, 0, sizeof(ctx));
ctx.uidlist = uidlist;
ctx.result = result;
ctx.filter_pos = 1;
return squat_uidlist_get_ctx(&ctx, uid_list_idx);
range = array_get(ctx.result, &count);
if (count > 0) {
for (; ctx.filter_pos <= range[count-1].seq2; ctx.filter_pos++)
seq_range_array_remove(result, ctx.filter_pos);
}
}
size_t squat_uidlist_mem_used(struct squat_uidlist *uidlist,
unsigned int *count_r)
{
*count_r = uidlist->hdr.node_count;
return uidlist->hdr.used_file_size;
}
--- NEW FILE: squat-uidlist.h ---
#ifndef __SQUAT_UIDLIST_H
#define __SQUAT_UIDLIST_H
#include "seq-range-array.h"
struct squat_trie;
struct squat_uidlist;
struct squat_uidlist *
squat_uidlist_init(struct squat_trie *trie, const char *path);
void squat_uidlist_deinit(struct squat_uidlist *uidlist);
/* Add new UID to given UID list. The uid_list_idx is updated to contain the
new list index. It must be put through _finish_list() before it's actually
written to disk. */
int squat_uidlist_add(struct squat_uidlist *uidlist, uint32_t *uid_list_idx,
uint32_t uid);
/* Write UID list into disk. The uid_list_idx is updated to contain the new
permanent index for it. */
int squat_uidlist_finish_list(struct squat_uidlist *uidlist,
uint32_t *uid_list_idx);
int squat_uidlist_flush(struct squat_uidlist *uidlist, uint32_t uid_validity);
/* Returns TRUE if uidlist should be compressed. current_message_count can be
(unsigned int)-1 if you don't want include it in the check. */
bool squat_uidlist_need_compress(struct squat_uidlist *uidlist,
unsigned int current_message_count);
/* Mark the uidlist containing expunged messages. update_disk=FALSE should be
done when the uidlist is going to be compressed and this function only tells
the compression to check for the expunged messages. */
int squat_uidlist_mark_having_expunges(struct squat_uidlist *uidlist,
bool update_disk);
/* Compress the uidlist file. existing_uids may be NULL if they're not known. */
struct squat_uidlist_compress_ctx *
squat_uidlist_compress_begin(struct squat_uidlist *uidlist,
const ARRAY_TYPE(seq_range) *existing_uids);
int squat_uidlist_compress_next(struct squat_uidlist_compress_ctx *ctx,
uint32_t *uid_list_idx);
void squat_uidlist_compress_rollback(struct squat_uidlist_compress_ctx **ctx);
int squat_uidlist_compress_commit(struct squat_uidlist_compress_ctx **ctx);
/* Returns UIDs for a given UID list index. */
int squat_uidlist_get(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
ARRAY_TYPE(seq_range) *result);
/* Filter out UIDs which don't appear in the given UID list from the given
result array */
int squat_uidlist_filter(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
ARRAY_TYPE(seq_range) *result);
size_t squat_uidlist_mem_used(struct squat_uidlist *uidlist,
unsigned int *count_r);
#endif
More information about the dovecot-cvs
mailing list