dovecot-2.0: fts-lucene: Fixes to work with CLucene v2.3.3.4 and...
dovecot at dovecot.org
dovecot at dovecot.org
Tue May 31 15:38:11 EEST 2011
details: http://hg.dovecot.org/dovecot-2.0/rev/9ae30e5d6935
changeset: 12829:9ae30e5d6935
user: Timo Sirainen <tss at iki.fi>
date: Tue May 31 15:38:03 2011 +0300
description:
fts-lucene: Fixes to work with CLucene v2.3.3.4 and new FTS API.
It's still not recommended to actually use this.
diffstat:
src/plugins/fts-lucene/Makefile.am | 2 +-
src/plugins/fts-lucene/fts-backend-lucene.c | 71 ++++++++++---
src/plugins/fts-lucene/lucene-wrapper.cc | 149 ++++++++++++---------------
3 files changed, 121 insertions(+), 101 deletions(-)
diffs (truncated from 397 to 300 lines):
diff -r 7a7c22755b7a -r 9ae30e5d6935 src/plugins/fts-lucene/Makefile.am
--- a/src/plugins/fts-lucene/Makefile.am Tue May 31 15:36:22 2011 +0300
+++ b/src/plugins/fts-lucene/Makefile.am Tue May 31 15:38:03 2011 +0300
@@ -12,7 +12,7 @@
lib21_fts_lucene_plugin.la
lib21_fts_lucene_plugin_la_LIBADD = \
- -lclucene
+ -lclucene-shared -lclucene-core
lib21_fts_lucene_plugin_la_SOURCES = \
fts-lucene-plugin.c \
diff -r 7a7c22755b7a -r 9ae30e5d6935 src/plugins/fts-lucene/fts-backend-lucene.c
--- a/src/plugins/fts-lucene/fts-backend-lucene.c Tue May 31 15:36:22 2011 +0300
+++ b/src/plugins/fts-lucene/fts-backend-lucene.c Tue May 31 15:38:03 2011 +0300
@@ -24,8 +24,13 @@
struct fts_backend backend;
struct lucene_mail_storage *lstorage;
struct mailbox *box;
+};
- uint32_t last_uid;
+struct lucene_fts_backend_build_context {
+ struct fts_backend_build_context ctx;
+
+ uint32_t uid;
+ bool hdr;
};
static MODULE_CONTEXT_DEFINE_INIT(fts_lucene_storage_module,
@@ -110,38 +115,69 @@
{
struct lucene_fts_backend *backend =
(struct lucene_fts_backend *)_backend;
- struct fts_backend_build_context *ctx;
+ struct lucene_fts_backend_build_context *ctx;
+ uint32_t last_uid;
fts_backend_select(backend);
if (lucene_index_build_init(backend->lstorage->index,
- &backend->last_uid) < 0)
+ &last_uid) < 0)
return -1;
- ctx = i_new(struct fts_backend_build_context, 1);
- ctx->backend = _backend;
+ ctx = i_new(struct lucene_fts_backend_build_context, 1);
+ ctx->ctx.backend = _backend;
+ ctx->uid = last_uid + 1;
- *last_uid_r = backend->last_uid;
- *ctx_r = ctx;
+ *last_uid_r = last_uid;
+ *ctx_r = &ctx->ctx;
return 0;
}
+static void
+fts_backend_lucene_build_hdr(struct fts_backend_build_context *_ctx,
+ uint32_t uid)
+{
+ struct lucene_fts_backend_build_context *ctx =
+ (struct lucene_fts_backend_build_context *)_ctx;
+
+ i_assert(uid >= ctx->uid);
+
+ ctx->uid = uid;
+ ctx->hdr = TRUE;
+}
+
+static bool
+fts_backend_lucene_build_body_begin(struct fts_backend_build_context *_ctx,
+ uint32_t uid, const char *content_type,
+ const char *content_disposition ATTR_UNUSED)
+{
+ struct lucene_fts_backend_build_context *ctx =
+ (struct lucene_fts_backend_build_context *)_ctx;
+
+ i_assert(uid >= ctx->uid);
+
+ if (!fts_backend_default_can_index(content_type))
+ return FALSE;
+
+ ctx->uid = uid;
+ ctx->hdr = FALSE;
+ return TRUE;
+}
+
static int
-fts_backend_lucene_build_more(struct fts_backend_build_context *ctx,
- uint32_t uid, const unsigned char *data,
- size_t size, bool headers)
+fts_backend_lucene_build_more(struct fts_backend_build_context *_ctx,
+ const unsigned char *data, size_t size)
{
+ struct lucene_fts_backend_build_context *ctx =
+ (struct lucene_fts_backend_build_context *)_ctx;
struct lucene_fts_backend *backend =
- (struct lucene_fts_backend *)ctx->backend;
+ (struct lucene_fts_backend *)_ctx->backend;
- if (ctx->failed)
+ if (_ctx->failed)
return -1;
- i_assert(uid >= backend->last_uid);
- backend->last_uid = uid;
-
i_assert(backend->lstorage->selected_box == backend->box);
return lucene_index_build_more(backend->lstorage->index,
- uid, data, size, headers);
+ ctx->uid, data, size, ctx->hdr);
}
static int
@@ -212,6 +248,9 @@
fts_backend_lucene_get_last_uid,
NULL,
fts_backend_lucene_build_init,
+ fts_backend_lucene_build_hdr,
+ fts_backend_lucene_build_body_begin,
+ NULL,
fts_backend_lucene_build_more,
fts_backend_lucene_build_deinit,
fts_backend_lucene_expunge,
diff -r 7a7c22755b7a -r 9ae30e5d6935 src/plugins/fts-lucene/lucene-wrapper.cc
--- a/src/plugins/fts-lucene/lucene-wrapper.cc Tue May 31 15:36:22 2011 +0300
+++ b/src/plugins/fts-lucene/lucene-wrapper.cc Tue May 31 15:38:03 2011 +0300
@@ -33,8 +33,7 @@
struct lucene_index {
char *path, *lock_path;
- char *mailbox_name;
- TCHAR *tmailbox_name;
+ wchar_t *mailbox_name;
time_t last_stale_check;
bool lock_error;
@@ -48,45 +47,6 @@
uint32_t prev_uid, last_uid;
};
-class RawTokenStream : public TokenStream {
- CL_NS(util)::Reader *reader;
-
-public:
- RawTokenStream(CL_NS(util)::Reader *reader) {
- this->reader = reader;
- };
-
- bool next(Token *token) {
- const TCHAR *data;
-
- int32_t len = this->reader->read(data);
- if (len <= 0)
- return false;
-
- token->set(data, 0, len);
- return true;
- }
-
- void close() { }
-};
-
-class DovecotAnalyzer : public standard::StandardAnalyzer {
-public:
- TokenStream *tokenStream(const TCHAR *fieldName,
- CL_NS(util)::Reader *reader) {
- /* Everything except body/headers should go as-is without any
- modifications. Isn't there any easier way to do this than
- to implement a whole new RawTokenStream?.. */
- if (fieldName != 0 &&
- wcscmp(fieldName, L"headers") != 0 &&
- wcscmp(fieldName, L"body") != 0)
- return _CLNEW RawTokenStream(reader);
-
- return standard::StandardAnalyzer::
- tokenStream(fieldName, reader);
- }
-};
-
static bool lucene_dir_scan(const char *dir, const char *skip_path,
time_t stale_stamp, bool unlink_staled)
{
@@ -174,7 +134,7 @@
index = i_new(struct lucene_index, 1);
index->path = i_strdup(path);
index->lock_path = i_strdup(lock_path);
- index->analyzer = _CLNEW DovecotAnalyzer();
+ index->analyzer = _CLNEW standard::StandardAnalyzer();
lucene_delete_stale_locks(index);
return index;
@@ -192,24 +152,54 @@
lucene_index_close(index);
_CLDELETE(index->analyzer);
i_free(index->mailbox_name);
- i_free(index->tmailbox_name);
i_free(index->path);
i_free(index->lock_path);
i_free(index);
}
+static void
+lucene_utf8_to_tchar(const char *src, wchar_t *dest, size_t destsize)
+{
+ ARRAY_TYPE(unichars) dest_arr;
+ buffer_t buf = { 0, 0 };
+
+ i_assert(sizeof(wchar_t) == sizeof(unichar_t));
+
+ buffer_create_data(&buf, dest, sizeof(wchar_t) * destsize);
+ array_create_from_buffer(&dest_arr, &buf, sizeof(wchar_t));
+ if (uni_utf8_to_ucs4(src, &dest_arr) < 0)
+ i_unreached();
+ i_assert(array_count(&dest_arr)+1 == destsize);
+ dest[destsize-1] = 0;
+}
+
+static void
+lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize,
+ wchar_t *dest, size_t destsize)
+{
+ ARRAY_TYPE(unichars) dest_arr;
+ buffer_t buf = { 0, 0 };
+
+ i_assert(sizeof(wchar_t) == sizeof(unichar_t));
+
+ buffer_create_data(&buf, dest, sizeof(wchar_t) * destsize);
+ array_create_from_buffer(&dest_arr, &buf, sizeof(wchar_t));
+ if (uni_utf8_to_ucs4_n(src, srcsize, &dest_arr) < 0)
+ i_unreached();
+ i_assert(array_count(&dest_arr)+1 == destsize);
+ dest[destsize-1] = 0;
+}
+
void lucene_index_select_mailbox(struct lucene_index *index,
const char *mailbox_name)
{
- size_t len;
+ size_t size;
i_free(index->mailbox_name);
- i_free(index->tmailbox_name);
- len = strlen(mailbox_name);
- index->mailbox_name = i_strdup(mailbox_name);
- index->tmailbox_name = i_new(TCHAR, len + 1);
- STRCPY_AtoT(index->tmailbox_name, mailbox_name, len);
+ size = uni_utf8_strlen_n(mailbox_name, (size_t)-1) + 1;
+ index->mailbox_name = i_new(wchar_t, size);
+ lucene_utf8_to_tchar(mailbox_name, index->mailbox_name, size);
}
static void lucene_handle_error(struct lucene_index *index, CLuceneError &err,
@@ -265,7 +255,7 @@
const TCHAR *field_name, uint32_t *uid_r)
{
Field *field = doc->getField(field_name);
- TCHAR *uid = field == NULL ? NULL : field->stringValue();
+ const TCHAR *uid = field == NULL ? NULL : field->stringValue();
if (uid == NULL) {
i_error("lucene: Corrupted FTS index %s: No UID for document",
index->path);
@@ -298,7 +288,7 @@
if there are more than one, delete the smaller ones. this is normal
behavior because we can't update/delete documents in writer, so
we'll do it only in here.. */
- Term mailbox_term(_T("box"), index->tmailbox_name);
+ Term mailbox_term(_T("box"), index->mailbox_name);
Term last_uid_term(_T("last_uid"), _T("*"));
TermQuery mailbox_query(&mailbox_term);
WildcardQuery last_uid_query(&last_uid_term);
@@ -421,49 +411,43 @@
const unsigned char *data, size_t size,
bool headers)
{
- unsigned int len;
+ size_t destsize;
i_assert(uid > index->last_uid);
i_assert(size > 0);
- len = uni_utf8_strlen_n(data, size);
- wchar_t dest[len+1];
- lucene_utf8towcs(dest, (const char *)data, len);
- dest[len] = 0;
+ destsize = uni_utf8_strlen_n(data, size) + 1;
+ wchar_t dest[destsize];
+ lucene_utf8_n_to_tchar(data, size, dest, destsize);
if (uid != index->prev_uid) {
- char id[MAX_INT_STRLEN];
- TCHAR tid[MAX_INT_STRLEN];
+ wchar_t id[MAX_INT_STRLEN];
if (lucene_index_build_flush(index) < 0)
return -1;
index->prev_uid = uid;
More information about the dovecot-cvs
mailing list