[dovecot-cvs] dovecot/src/plugins/fts-lucene fts-backend-lucene.c, 1.1, 1.2 fts-lucene-plugin.c, 1.1, 1.2 fts-lucene-plugin.h, 1.1, 1.2 lucene-wrapper.cc, 1.2, 1.3 lucene-wrapper.h, 1.1, 1.2
tss at dovecot.org
tss at dovecot.org
Wed Oct 25 00:49:16 UTC 2006
- Previous message: [dovecot-cvs] dovecot/src/plugins/fts fts-api-private.h, 1.2, 1.3 fts-api.c, 1.3, 1.4 fts-api.h, 1.2, 1.3 fts-storage.c, 1.5, 1.6
- Next message: [dovecot-cvs] dovecot/src/plugins/fts-lucene lucene-wrapper.cc, 1.3, 1.4
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /var/lib/cvs/dovecot/src/plugins/fts-lucene
In directory talvi:/tmp/cvs-serv7414
Modified Files:
fts-backend-lucene.c fts-lucene-plugin.c fts-lucene-plugin.h
lucene-wrapper.cc lucene-wrapper.h
Log Message:
Create only a single index into INBOX's index dir. Did several fixes.
However still a bit buggy.
Index: fts-backend-lucene.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/fts-backend-lucene.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- fts-backend-lucene.c 17 Sep 2006 23:15:54 -0000 1.1
+++ fts-backend-lucene.c 24 Oct 2006 23:49:13 -0000 1.2
@@ -1,23 +1,64 @@
/* Copyright (C) 2006 Timo Sirainen */
#include "lib.h"
+#include "array.h"
+#include "mail-storage-private.h"
#include "lucene-wrapper.h"
#include "fts-lucene-plugin.h"
+#define LUCENE_INDEX_DIR_NAME "lucene-indexes"
+
+struct lucene_mail_storage {
+ struct lucene_index *index;
+ struct mailbox *selected_box;
+ int refcount;
+};
+
struct lucene_fts_backend {
struct fts_backend backend;
- struct lucene_index *index;
+ struct lucene_mail_storage *lstorage;
+ struct mailbox *box;
uint32_t last_uid;
};
-static struct fts_backend *fts_backend_lucene_init(const char *path)
+static void fts_backend_select(struct lucene_fts_backend *backend)
+{
+ if (backend->lstorage->selected_box != backend->box) {
+ lucene_index_select_mailbox(backend->lstorage->index,
+ mailbox_get_name(backend->box));
+ backend->lstorage->selected_box = backend->box;
+ }
+}
+
+static struct fts_backend *fts_backend_lucene_init(struct mailbox *box)
{
+ struct lucene_mail_storage *lstorage;
struct lucene_fts_backend *backend;
+ const char *path;
+
+ lstorage = LUCENE_CONTEXT(box->storage);
+ if (lstorage == NULL) {
+ path = mail_storage_get_mailbox_index_dir(box->storage,
+ "INBOX");
+ if (path == NULL) {
+ /* in-memory indexes */
+ return NULL;
+ }
+
+ path = t_strconcat(path, "/"LUCENE_INDEX_DIR_NAME, NULL);
+
+ lstorage = i_new(struct lucene_mail_storage, 1);
+ lstorage->index = lucene_index_init(path);
+ array_idx_set(&box->storage->module_contexts,
+ fts_lucene_storage_module_id, &lstorage);
+ }
+ lstorage->refcount++;
backend = i_new(struct lucene_fts_backend, 1);
backend->backend = fts_backend_lucene;
- backend->index = lucene_index_init(path);
+ backend->lstorage = lstorage;
+ backend->box = box;
return &backend->backend;
}
@@ -26,7 +67,12 @@
struct lucene_fts_backend *backend =
(struct lucene_fts_backend *)_backend;
- lucene_index_deinit(backend->index);
+ if (--backend->lstorage->refcount == 0) {
+ array_idx_clear(&backend->box->storage->module_contexts,
+ fts_lucene_storage_module_id);
+ lucene_index_deinit(backend->lstorage->index);
+ i_free(backend->lstorage);
+ }
i_free(backend);
}
@@ -37,9 +83,12 @@
(struct lucene_fts_backend *)_backend;
struct fts_backend_build_context *ctx;
+ fts_backend_select(backend);
+
ctx = i_new(struct fts_backend_build_context, 1);
ctx->backend = _backend;
- if (lucene_index_build_init(backend->index, &backend->last_uid) < 0)
+ if (lucene_index_build_init(backend->lstorage->index,
+ &backend->last_uid) < 0)
ctx->failed = TRUE;
*last_uid_r = backend->last_uid;
@@ -60,7 +109,9 @@
i_assert(uid >= backend->last_uid);
backend->last_uid = uid;
- return lucene_index_build_more(backend->index, uid, data, size);
+ i_assert(backend->lstorage->selected_box == backend->box);
+ return lucene_index_build_more(backend->lstorage->index,
+ uid, data, size);
}
static int
@@ -70,7 +121,8 @@
(struct lucene_fts_backend *)ctx->backend;
int ret = ctx->failed ? -1 : 0;
- lucene_index_build_deinit(backend->index);
+ i_assert(backend->lstorage->selected_box == backend->box);
+ lucene_index_build_deinit(backend->lstorage->index);
i_free(ctx);
return ret;
}
@@ -82,7 +134,8 @@
struct lucene_fts_backend *backend =
(struct lucene_fts_backend *)_backend;
- return lucene_index_lookup(backend->index, key, result);
+ fts_backend_select(backend);
+ return lucene_index_lookup(backend->lstorage->index, key, result);
}
static int
@@ -92,7 +145,8 @@
struct lucene_fts_backend *backend =
(struct lucene_fts_backend *)_backend;
- return lucene_index_filter(backend->index, key, result);
+ fts_backend_select(backend);
+ return lucene_index_filter(backend->lstorage->index, key, result);
}
struct fts_backend fts_backend_lucene = {
Index: fts-lucene-plugin.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/fts-lucene-plugin.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- fts-lucene-plugin.c 17 Sep 2006 23:15:54 -0000 1.1
+++ fts-lucene-plugin.c 24 Oct 2006 23:49:13 -0000 1.2
@@ -1,10 +1,14 @@
/* Copyright (C) 2006 Timo Sirainen */
#include "lib.h"
+#include "mail-storage-private.h"
#include "fts-lucene-plugin.h"
+unsigned int fts_lucene_storage_module_id;
+
void fts_lucene_plugin_init(void)
{
+ fts_lucene_storage_module_id = mail_storage_module_id++;
fts_backend_register(&fts_backend_lucene);
}
Index: fts-lucene-plugin.h
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/fts-lucene-plugin.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- fts-lucene-plugin.h 17 Sep 2006 23:15:54 -0000 1.1
+++ fts-lucene-plugin.h 24 Oct 2006 23:49:13 -0000 1.2
@@ -3,7 +3,12 @@
#include "fts-api-private.h"
+#define LUCENE_CONTEXT(obj) \
+ *((void **)array_idx_modifiable(&(obj)->module_contexts, \
+ fts_lucene_storage_module_id))
+
extern struct fts_backend fts_backend_lucene;
+extern unsigned int fts_lucene_storage_module_id;
void fts_lucene_plugin_init(void);
void fts_lucene_plugin_deinit(void);
Index: lucene-wrapper.cc
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/lucene-wrapper.cc,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- lucene-wrapper.cc 24 Oct 2006 21:51:04 -0000 1.2
+++ lucene-wrapper.cc 24 Oct 2006 23:49:14 -0000 1.3
@@ -14,17 +14,21 @@
using namespace lucene::index;
using namespace lucene::search;
using namespace lucene::queryParser;
+using namespace lucene::analysis;
struct lucene_index {
char *path;
+ char *mailbox_name;
+ TCHAR *tmailbox_name;
IndexReader *reader;
IndexWriter *writer;
IndexSearcher *searcher;
- lucene::analysis::standard::StandardAnalyzer *analyzer;
+ Analyzer *analyzer;
Document *doc;
uint32_t prev_uid, last_uid;
+ int32_t last_uid_doc_id;
};
static const uint8_t utf8_skip_table[256] = {
@@ -38,6 +42,39 @@
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
};
+class RawTokenStream : public TokenStream {
+ CL_NS(util)::Reader *reader;
+
+public:
+ RawTokenStream(CL_NS(util)::Reader *reader) {
+ this->reader = reader;
+ };
+
+ bool next(Token *token) {
+ const TCHAR *data;
+
+ int32_t len = this->reader->read(data);
+ if (len <= 0)
+ return false;
+
+ token->set(data, 0, len);
+ return true;
+ }
+
+ void close() { }
+};
+
+class DovecotAnalyzer : public standard::StandardAnalyzer {
+public:
+ TokenStream *tokenStream(const TCHAR *fieldName,
+ CL_NS(util)::Reader *reader) {
+ if (fieldName != 0 && wcscmp(fieldName, L"contents") != 0)
+ return _CLNEW RawTokenStream(reader);
+ return standard::StandardAnalyzer::
+ tokenStream(fieldName, reader);
+ }
+};
+
struct lucene_index *lucene_index_init(const char *path)
{
struct lucene_index *index;
@@ -58,10 +95,25 @@
void lucene_index_deinit(struct lucene_index *index)
{
lucene_index_close(index);
- i_free(index->path);
+ i_free(index->mailbox_name);
+ i_free(index->tmailbox_name);
i_free(index);
}
+int lucene_index_select_mailbox(struct lucene_index *index,
+ const char *mailbox_name)
+{
+ size_t len;
+
+ i_free(index->mailbox_name);
+ i_free(index->tmailbox_name);
+
+ len = strlen(mailbox_name);
+ index->mailbox_name = i_strdup(mailbox_name);
+ index->tmailbox_name = i_new(TCHAR, len + 1);
+ STRCPY_AtoT(index->tmailbox_name, mailbox_name, len);
+}
+
static int lucene_index_open(struct lucene_index *index)
{
if (index->reader != NULL)
@@ -89,10 +141,8 @@
if ((ret = lucene_index_open(index)) <= 0)
return ret;
- if (index->analyzer == NULL) {
- index->analyzer =
- _CLNEW lucene::analysis::standard::StandardAnalyzer();
- }
+ if (index->analyzer == NULL)
+ index->analyzer = _CLNEW DovecotAnalyzer();
index->searcher = _CLNEW IndexSearcher(index->reader);
return 1;
@@ -119,35 +169,73 @@
}
static int
-lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r)
+lucene_index_get_last_uid(struct lucene_index *index)
{
- int32_t max_docnum = index->reader->maxDoc();
-
- if (max_docnum == 0) {
- *last_uid_r = 0;
- return 0;
- }
+ int ret = 0;
- Document *doc = index->reader->document(max_docnum-1);
- if (lucene_doc_get_uid(index, doc, last_uid_r) < 0) {
- _CLDELETE(doc);
+ if (lucene_index_open_search(index) <= 0)
return -1;
+
+ Term mailbox_term(_T("box"), index->tmailbox_name);
+ Term last_uid_term(_T("last_uid"), _T("1"));
+ TermQuery mailbox_query(&mailbox_term);
+ TermQuery last_uid_query(&last_uid_term);
+
+ BooleanQuery query;
+ query.add(&mailbox_query, true, false);
+ query.add(&last_uid_query, true, false);
+
+ index->last_uid = 0;
+ index->last_uid_doc_id = -1;
+ try {
+ Hits *hits = index->searcher->search(&query);
+
+ if (hits->length() > 1) {
+ i_error("lucene: last_uid search for mailbox %s "
+ "returned multiple hits", index->mailbox_name);
+ }
+ for (int32_t i = 0; i < hits->length(); i++) {
+ uint32_t uid;
+
+ if (lucene_doc_get_uid(index, &hits->doc(i),
+ &uid) < 0) {
+ ret = -1;
+ break;
+ }
+
+ int32_t del_id = -1;
+ if (uid > index->last_uid) {
+ if (index->last_uid_doc_id >= 0)
+ del_id = index->last_uid_doc_id;
+ index->last_uid = uid;
+ index->last_uid_doc_id = hits->id(i);
+ } else {
+ del_id = hits->id(i);
+ }
+ if (del_id >= 0)
+ index->reader->deleteDocument(del_id);
+ }
+ _CLDELETE(hits);
+ } catch (CLuceneError &err) {
+ i_error("lucene: last_uid search failed: %s", err.what());
+ ret = -1;
}
- _CLDELETE(doc);
- return 0;
+ return ret;
}
int lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r)
{
uint32_t last_uid = 0;
+ i_assert(index->mailbox_name != NULL);
+
if (lucene_index_open(index) < 0)
return -1;
if (index->reader == NULL)
index->last_uid = 0;
else {
- if (lucene_index_get_last_uid(index, &index->last_uid) < 0)
+ if (lucene_index_get_last_uid(index) < 0)
return -1;
}
*last_uid_r = index->last_uid;
@@ -156,7 +244,7 @@
return 0;
bool exists = IndexReader::indexExists(index->path);
- index->analyzer = _CLNEW lucene::analysis::standard::StandardAnalyzer();
+ index->analyzer = _CLNEW DovecotAnalyzer();
try {
index->writer = _CLNEW IndexWriter(index->path,
index->analyzer, !exists);
@@ -230,6 +318,7 @@
i_snprintf(id, sizeof(id), "%u", uid);
STRCPY_AtoT(tid, id, MAX_INT_STRLEN);
index->doc->add(*Field::Text(_T("uid"), tid));
+ index->doc->add(*Field::Text(_T("box"), index->tmailbox_name));
}
index->doc->add(*Field::Text(_T("contents"), dest));
@@ -237,11 +326,41 @@
return 0;
}
+static int lucene_index_update_last_uid(struct lucene_index *index)
+{
+ Document doc;
+ char id[MAX_INT_STRLEN];
+ TCHAR tid[MAX_INT_STRLEN];
+
+ i_snprintf(id, sizeof(id), "%u", index->last_uid);
+ STRCPY_AtoT(tid, id, MAX_INT_STRLEN);
+
+ doc.add(*Field::Text(_T("last_uid"), _T("1")));
+ doc.add(*Field::Text(_T("uid"), tid));
+ doc.add(*Field::Text(_T("box"), index->tmailbox_name));
+
+ try {
+ if (index->last_uid_doc_id >= 0) {
+ index->reader->deleteDocument(index->last_uid_doc_id);
+ index->last_uid_doc_id = -1;
+ }
+ index->writer->addDocument(&doc);
+ return 0;
+ } catch (CLuceneError &err) {
+ i_error("lucene: IndexWriter::addDocument(%s) failed: %s",
+ index->path, err.what());
+ return -1;
+ }
+}
+
int lucene_index_build_deinit(struct lucene_index *index)
{
int ret = 0;
+ if (index->prev_uid > index->last_uid)
+ index->last_uid = index->prev_uid;
index->prev_uid = 0;
+
if (index->writer == NULL) {
lucene_index_close(index);
return -1;
@@ -249,6 +368,8 @@
if (lucene_index_build_flush(index) < 0)
ret = -1;
+ if (lucene_index_update_last_uid(index) < 0)
+ ret = -1;
try {
index->writer->optimize();
@@ -287,10 +408,10 @@
lucene_utf8towcs(tkey, quoted_key, len + 1);
t_pop();
- Query *query = NULL;
+ Query *content_query = NULL;
try {
- query = QueryParser::parse(tkey, _T("contents"),
- index->analyzer);
+ content_query = QueryParser::parse(tkey, _T("contents"),
+ index->analyzer);
} catch (CLuceneError &err) {
if (getenv("DEBUG") != NULL) {
i_info("lucene: QueryParser::parse(%s) failed: %s",
@@ -300,8 +421,14 @@
return -1;
}
+ BooleanQuery query;
+ Term mailbox_term(_T("box"), index->tmailbox_name);
+ TermQuery mailbox_query(&mailbox_term);
+ query.add(content_query, true, false);
+ query.add(&mailbox_query, true, false);
+
try {
- Hits *hits = index->searcher->search(query);
+ Hits *hits = index->searcher->search(&query);
for (int32_t i = 0; i < hits->length(); i++) {
uint32_t uid;
@@ -321,7 +448,7 @@
ret = -1;
}
- _CLDELETE(query);
+ _CLDELETE(content_query);
lucene_index_close(index);
return ret;
}
Index: lucene-wrapper.h
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/lucene-wrapper.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- lucene-wrapper.h 17 Sep 2006 23:15:54 -0000 1.1
+++ lucene-wrapper.h 24 Oct 2006 23:49:14 -0000 1.2
@@ -6,6 +6,9 @@
struct lucene_index *lucene_index_init(const char *path);
void lucene_index_deinit(struct lucene_index *index);
+int lucene_index_select_mailbox(struct lucene_index *index,
+ const char *mailbox_name);
+
int lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r);
int lucene_index_build_more(struct lucene_index *index, uint32_t uid,
const unsigned char *data, size_t size);
- Previous message: [dovecot-cvs] dovecot/src/plugins/fts fts-api-private.h, 1.2, 1.3 fts-api.c, 1.3, 1.4 fts-api.h, 1.2, 1.3 fts-storage.c, 1.5, 1.6
- Next message: [dovecot-cvs] dovecot/src/plugins/fts-lucene lucene-wrapper.cc, 1.3, 1.4
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the dovecot-cvs
mailing list