[dovecot-cvs] dovecot/src/plugins/fts .cvsignore, NONE, 1.1 Makefile.am, NONE, 1.1 fts-api-private.h, NONE, 1.1 fts-api.c, NONE, 1.1 fts-api.h, NONE, 1.1 fts-plugin.c, NONE, 1.1 fts-plugin.h, NONE, 1.1 fts-storage.c, NONE, 1.1
cras at dovecot.org
cras at dovecot.org
Sun Sep 17 19:28:14 EEST 2006
Update of /var/lib/cvs/dovecot/src/plugins/fts
In directory talvi:/tmp/cvs-serv5772/src/plugins/fts
Added Files:
.cvsignore Makefile.am fts-api-private.h fts-api.c fts-api.h
fts-plugin.c fts-plugin.h fts-storage.c
Log Message:
Added full text search plugin framework. Still missing support for handling
expunges. Also it could use better logic with more complex SEARCH queries to
figure out when the FTS index should be used.
--- NEW FILE: .cvsignore ---
*.la
*.lo
*.o
.deps
.libs
Makefile
Makefile.in
so_locations
--- NEW FILE: Makefile.am ---
AM_CPPFLAGS = \
-I$(top_srcdir)/src/lib \
-I$(top_srcdir)/src/lib-mail \
-I$(top_srcdir)/src/lib-storage
lib01_fts_plugin_la_LDFLAGS = -module -avoid-version
module_LTLIBRARIES = \
lib01_fts_plugin.la
lib01_fts_plugin_la_SOURCES = \
fts-api.c \
fts-plugin.c \
fts-storage.c
noinst_HEADERS = \
fts-api.h \
fts-api-private.h \
fts-plugin.h
install-exec-local:
for d in imap lda; do \
$(mkdir_p) $(DESTDIR)$(moduledir)/$$d; \
rm -f $(DESTDIR)$(moduledir)/$$d/lib01_fts_plugin.so; \
$(LN_S) ../lib01_fts_plugin.so $(DESTDIR)$(moduledir)/$$d; \
done
--- NEW FILE: fts-api-private.h ---
#ifndef __FTS_API_PRIVATE_H
#define __FTS_API_PRIVATE_H
#include "fts-api.h"
struct fts_backend_vfuncs {
struct fts_backend *(*init)(const char *path);
void (*deinit)(struct fts_backend *backend);
struct fts_backend_build_context *
(*build_init)(struct fts_backend *backend,
uint32_t *last_uid_r);
int (*build_more)(struct fts_backend_build_context *ctx, uint32_t uid,
const void *data, size_t size);
int (*build_deinit)(struct fts_backend_build_context *ctx);
int (*lookup)(struct fts_backend *backend, const char *key,
ARRAY_TYPE(seq_range) *result);
int (*filter)(struct fts_backend *backend, const char *key,
ARRAY_TYPE(seq_range) *result);
};
struct fts_backend {
const char *name;
struct fts_backend_vfuncs v;
};
struct fts_backend_build_context {
struct fts_backend *backend;
};
void fts_backend_register(const struct fts_backend *backend);
void fts_backend_unregister(const char *name);
#endif
--- NEW FILE: fts-api.c ---
/* Copyright (C) 2006 Timo Sirainen */
#include "lib.h"
#include "array.h"
#include "fts-api-private.h"
static ARRAY_DEFINE(backends, const struct fts_backend *);
void fts_backend_register(const struct fts_backend *backend)
{
if (!array_is_created(&backends)) {
ARRAY_CREATE(&backends, default_pool,
const struct fts_backend *, 4);
}
array_append(&backends, &backend, 1);
}
void fts_backend_unregister(const char *name)
{
const struct fts_backend *const *be;
unsigned int i, count;
be = array_get(&backends, &count);
for (i = 0; i < count; i++) {
if (strcmp(be[i]->name, name) == 0) {
array_delete(&backends, i, 1);
break;
}
}
if (i == count)
i_panic("fts_backend_unregister(%s): unknown backend", name);
if (count == 1)
array_free(&backends);
}
struct fts_backend *
fts_backend_init(const char *backend_name, const char *path)
{
const struct fts_backend *const *be;
unsigned int i, count;
be = array_get(&backends, &count);
for (i = 0; i < count; i++) {
if (strcmp(be[i]->name, backend_name) == 0)
return be[i]->v.init(path);
}
i_error("Unknown FTS backend: %s", backend_name);
return NULL;
}
void fts_backend_deinit(struct fts_backend *backend)
{
return backend->v.deinit(backend);
}
struct fts_backend_build_context *
fts_backend_build_init(struct fts_backend *backend, uint32_t *last_uid_r)
{
return backend->v.build_init(backend, last_uid_r);
}
int fts_backend_build_more(struct fts_backend_build_context *ctx, uint32_t uid,
const void *data, size_t size)
{
return ctx->backend->v.build_more(ctx, uid, data, size);
}
int fts_backend_build_deinit(struct fts_backend_build_context *ctx)
{
return ctx->backend->v.build_deinit(ctx);
}
int fts_backend_lookup(struct fts_backend *backend, const char *key,
ARRAY_TYPE(seq_range) *result)
{
return backend->v.lookup(backend, key, result);
}
int fts_backend_filter(struct fts_backend *backend, const char *key,
ARRAY_TYPE(seq_range) *result)
{
return backend->v.filter(backend, key, result);
}
--- NEW FILE: fts-api.h ---
#ifndef __FTS_API_H
#define __FTS_API_H
#include "seq-range-array.h"
struct fts_backend *
fts_backend_init(const char *backend_name, const char *path);
void fts_backend_deinit(struct fts_backend *backend);
struct fts_backend_build_context *
fts_backend_build_init(struct fts_backend *backend, uint32_t *last_uid_r);
int fts_backend_build_more(struct fts_backend_build_context *ctx, uint32_t uid,
const void *data, size_t size);
int fts_backend_build_deinit(struct fts_backend_build_context *ctx);
int fts_backend_lookup(struct fts_backend *backend, const char *key,
ARRAY_TYPE(seq_range) *result);
int fts_backend_filter(struct fts_backend *backend, const char *key,
ARRAY_TYPE(seq_range) *result);
#endif
--- NEW FILE: fts-plugin.c ---
/* Copyright (C) 2006 Timo Sirainen */
#include "lib.h"
#include "mail-storage-private.h"
#include "fts-plugin.h"
void (*fts_next_hook_mailbox_opened)(struct mailbox *box);
void fts_plugin_init(void)
{
fts_next_hook_mailbox_opened = hook_mailbox_opened;
hook_mailbox_opened = fts_mailbox_opened;
}
void fts_plugin_deinit(void)
{
if (hook_mailbox_opened == fts_mailbox_opened)
hook_mailbox_opened = fts_next_hook_mailbox_opened;
}
--- NEW FILE: fts-plugin.h ---
#ifndef __FTS_PLUGIN_H
#define __FTS_PLUGIN_H
extern void (*fts_next_hook_mailbox_opened)(struct mailbox *box);
void fts_mailbox_opened(struct mailbox *box);
void fts_plugin_init(void);
void fts_plugin_deinit(void);
#endif
--- NEW FILE: fts-storage.c ---
/* Copyright (C) 2006 Timo Sirainen */
#include "lib.h"
#include "array.h"
#include "str.h"
#include "istream.h"
#include "message-parser.h"
#include "message-decoder.h"
#include "mail-search.h"
#include "mail-storage-private.h"
#include "fts-api-private.h"
#include "fts-plugin.h"
#include <stdlib.h>
#define FTS_INDEX_NAME "dovecot.index.fts"
#define FTS_CONTEXT(obj) \
*((void **)array_idx_modifiable(&(obj)->module_contexts, \
fts_storage_module_id))
struct fts_mailbox {
struct mailbox_vfuncs super;
struct fts_backend *backend;
};
struct fts_search_context {
ARRAY_TYPE(seq_range) result;
unsigned int result_pos;
};
static unsigned int fts_storage_module_id = 0;
static bool fts_storage_module_id_set = FALSE;
static int fts_mailbox_close(struct mailbox *box)
{
struct fts_mailbox *fbox = FTS_CONTEXT(box);
fts_backend_deinit(fbox->backend);
i_free(fbox);
return fbox->super.close(box);
}
static int uid_range_to_seq(struct mailbox *box,
ARRAY_TYPE(seq_range) *uid_range,
ARRAY_TYPE(seq_range) *seq_range)
{
const struct seq_range *range;
struct seq_range new_range;
unsigned int i, count;
range = array_get(uid_range, &count);
ARRAY_CREATE(seq_range, default_pool, struct seq_range, count);
for (i = 0; i < count; i++) {
if (mailbox_get_uids(box, range[i].seq1, range[i].seq2,
&new_range.seq1, &new_range.seq2) < 0) {
array_free(seq_range);
return -1;
}
if (new_range.seq1 != 0)
array_append(seq_range, &new_range, 1);
}
return 0;
}
struct fts_storage_build_context {
struct fts_backend_build_context *build;
uint32_t uid;
string_t *headers;
bool save_part;
};
static int fts_build_mail_header(struct fts_storage_build_context *ctx,
const struct message_block *block)
{
const struct message_header_line *hdr = block->hdr;
/* hdr->full_value is always set because we get the block from
message_decoder */
str_append(ctx->headers, hdr->name);
str_append_n(ctx->headers, hdr->middle, hdr->middle_len);
str_append_n(ctx->headers, hdr->full_value, hdr->full_value_len);
if (!hdr->no_newline)
str_append_c(ctx->headers, '\n');
if (!ctx->save_part) {
if (strcasecmp(hdr->name, "Content-Type") == 0) {
/* we'll index only text/xxx and message/rfc822 parts
for now */
if ((block->part->flags &
(MESSAGE_PART_FLAG_TEXT |
MESSAGE_PART_FLAG_MESSAGE_RFC822)) == 0)
return 0;
ctx->save_part = TRUE;
}
return 1;
}
if (fts_backend_build_more(ctx->build, ctx->uid, str_data(ctx->headers),
str_len(ctx->headers)) < 0)
return -1;
str_truncate(ctx->headers, 0);
return 1;
}
static int
fts_build_mail(struct fts_storage_build_context *ctx, struct mail *mail)
{
struct istream *input;
struct message_parser_ctx *parser;
struct message_decoder_context *decoder;
struct message_block raw_block, block;
struct message_part *prev_part, *skip_part;
int ret;
ctx->uid = mail->uid;
input = mail_get_stream(mail, NULL, NULL);
if (input == NULL)
return -1;
prev_part = skip_part = NULL;
parser = message_parser_init(pool_datastack_create(), input);
decoder = message_decoder_init();
for (;;) {
ret = message_parser_parse_next_block(parser, &raw_block);
i_assert(ret != 0);
if (ret < 0) {
if (input->stream_errno == 0)
ret = 0;
break;
}
if (raw_block.part != prev_part) {
str_truncate(ctx->headers, 0);
ctx->save_part = FALSE;
skip_part = NULL;
} else if (raw_block.part == skip_part)
continue;
if (!message_decoder_decode_next_block(decoder, &raw_block,
&block))
continue;
if (block.hdr != NULL) {
ret = fts_build_mail_header(ctx, &block);
if (ret < 0)
break;
if (ret == 0)
skip_part = raw_block.part;
} else {
if (fts_backend_build_more(ctx->build, mail->uid,
block.data,
block.size) < 0) {
ret = -1;
break;
}
}
}
(void)message_parser_deinit(&parser);
message_decoder_deinit(&decoder);
return ret;
}
static int fts_build_new(struct mailbox_transaction_context *t)
{
struct fts_mailbox *fbox = FTS_CONTEXT(t->box);
struct fts_storage_build_context ctx;
struct mail_search_context *search_ctx;
struct mail_search_seqset seqset;
struct mail_search_arg search_arg;
struct mail *mail;
uint32_t last_uid;
int ret = 0;
memset(&ctx, 0, sizeof(ctx));
ctx.build = fts_backend_build_init(fbox->backend, &last_uid);
memset(&seqset, 0, sizeof(seqset));
if (mailbox_get_uids(t->box, last_uid+1, (uint32_t)-1,
&seqset.seq1, &seqset.seq2) < 0) {
(void)fts_backend_build_deinit(ctx.build);
return -1;
}
if (seqset.seq1 == 0) {
/* no new messages */
(void)fts_backend_build_deinit(ctx.build);
return 0;
}
memset(&search_arg, 0, sizeof(search_arg));
search_arg.type = SEARCH_SEQSET;
search_arg.value.seqset = &seqset;
ctx.headers = str_new(default_pool, 512);
mail = mail_alloc(t, 0, NULL);
search_ctx = mailbox_search_init(t, NULL, &search_arg, NULL);
while (mailbox_search_next(search_ctx, mail) > 0) {
if (fts_build_mail(&ctx, mail) < 0) {
ret = -1;
break;
}
}
if (mailbox_search_deinit(&search_ctx) < 0)
ret = -1;
mail_free(&mail);
if (fts_backend_build_deinit(ctx.build) < 0)
ret = -1;
str_free(&ctx.headers);
return ret;
}
static struct mail_search_context *
fts_mailbox_search_init(struct mailbox_transaction_context *t,
const char *charset, struct mail_search_arg *args,
const enum mail_sort_type *sort_program)
{
struct fts_mailbox *fbox = FTS_CONTEXT(t->box);
struct mail_search_context *ctx;
struct fts_search_context *fctx;
ARRAY_TYPE(seq_range) uid_result;
ctx = fbox->super.search_init(t, charset, args, sort_program);
fctx = i_new(struct fts_search_context, 1);
array_idx_set(&ctx->module_contexts, fts_storage_module_id, &fctx);
/* FIXME: handle AND/OR. Maybe also header lookups? */
while (args != NULL &&
args->type != SEARCH_BODY &&
args->type != SEARCH_TEXT)
args = args->next;
if (args != NULL) {
if (fts_build_new(t) < 0)
return ctx;
ARRAY_CREATE(&uid_result, default_pool, struct seq_range, 64);
if (fts_backend_lookup(fbox->backend, args->value.str,
&uid_result) < 0) {
/* failed, fallback to reading everything */
array_free(&uid_result);
}
args = args->next;
while (args != NULL) {
if (args->type == SEARCH_BODY ||
args->type == SEARCH_TEXT) {
if (fts_backend_filter(fbox->backend,
args->value.str,
&uid_result) < 0) {
/* failed, but we already have limited
the search, so just ignore this */
break;
}
}
args = args->next;
}
if (array_is_created(&uid_result)) {
(void)uid_range_to_seq(t->box, &uid_result,
&fctx->result);
array_free(&uid_result);
}
}
return ctx;
}
static int fts_mailbox_search_next_update_seq(struct mail_search_context *ctx)
{
struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
struct fts_search_context *fctx = FTS_CONTEXT(ctx);
struct seq_range *range;
unsigned int count;
if (array_is_created(&fctx->result)) {
range = array_get_modifiable(&fctx->result, &count);
while (fctx->result_pos < count &&
ctx->seq > range[fctx->result_pos].seq2)
fctx->result_pos++;
if (fctx->result_pos == count)
return 0;
if (ctx->seq > range[fctx->result_pos].seq1)
range[fctx->result_pos].seq1 = ctx->seq+1;
else {
ctx->seq = range[fctx->result_pos].seq1 - 1;
range[fctx->result_pos].seq1++;
}
}
return fbox->super.search_next_update_seq(ctx);
}
static int fts_mailbox_search_deinit(struct mail_search_context *ctx)
{
struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
struct fts_search_context *fctx = FTS_CONTEXT(ctx);
if (array_is_created(&fctx->result))
array_free(&fctx->result);
i_free(fctx);
return fbox->super.search_deinit(ctx);
}
void fts_mailbox_opened(struct mailbox *box)
{
struct fts_mailbox *fbox;
struct fts_backend *backend;
const char *env, *path;
if (fts_next_hook_mailbox_opened != NULL)
fts_next_hook_mailbox_opened(box);
env = getenv("FTS");
if (env == NULL)
return;
path = mail_storage_get_mailbox_index_dir(box->storage, box->name);
if (path == NULL)
return;
path = t_strconcat(path, "/" FTS_INDEX_NAME, NULL);
backend = fts_backend_init(env, path);
if (backend == NULL)
return;
fbox = i_new(struct fts_mailbox, 1);
fbox->super = box->v;
fbox->backend = backend;
box->v.close = fts_mailbox_close;
box->v.search_init = fts_mailbox_search_init;
box->v.search_next_update_seq = fts_mailbox_search_next_update_seq;
box->v.search_deinit = fts_mailbox_search_deinit;
if (!fts_storage_module_id_set) {
fts_storage_module_id = mail_storage_module_id++;
fts_storage_module_id_set = TRUE;
}
array_idx_set(&box->module_contexts, fts_storage_module_id, &fbox);
}
More information about the dovecot-cvs
mailing list