[dovecot-cvs] dovecot/src/plugins/fts-lucene fts-backend-lucene.c, 1.1, 1.2 fts-lucene-plugin.c, 1.1, 1.2 fts-lucene-plugin.h, 1.1, 1.2 lucene-wrapper.cc, 1.2, 1.3 lucene-wrapper.h, 1.1, 1.2

tss at dovecot.org tss at dovecot.org
Wed Oct 25 00:49:16 UTC 2006


Update of /var/lib/cvs/dovecot/src/plugins/fts-lucene
In directory talvi:/tmp/cvs-serv7414

Modified Files:
	fts-backend-lucene.c fts-lucene-plugin.c fts-lucene-plugin.h 
	lucene-wrapper.cc lucene-wrapper.h 
Log Message:
Create only a single index into INBOX's index dir. Did several fixes.
However still a bit buggy.



Index: fts-backend-lucene.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/fts-backend-lucene.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- fts-backend-lucene.c	17 Sep 2006 23:15:54 -0000	1.1
+++ fts-backend-lucene.c	24 Oct 2006 23:49:13 -0000	1.2
@@ -1,23 +1,64 @@
 /* Copyright (C) 2006 Timo Sirainen */
 
 #include "lib.h"
+#include "array.h"
+#include "mail-storage-private.h"
 #include "lucene-wrapper.h"
 #include "fts-lucene-plugin.h"
 
+#define LUCENE_INDEX_DIR_NAME "lucene-indexes"
+
+struct lucene_mail_storage {
+	struct lucene_index *index;
+	struct mailbox *selected_box;
+	int refcount;
+};
+
 struct lucene_fts_backend {
 	struct fts_backend backend;
-	struct lucene_index *index;
+	struct lucene_mail_storage *lstorage;
+	struct mailbox *box;
 
 	uint32_t last_uid;
 };
 
-static struct fts_backend *fts_backend_lucene_init(const char *path)
+static void fts_backend_select(struct lucene_fts_backend *backend)
+{
+	if (backend->lstorage->selected_box != backend->box) {
+		lucene_index_select_mailbox(backend->lstorage->index,
+					    mailbox_get_name(backend->box));
+		backend->lstorage->selected_box = backend->box;
+	}
+}
+
+static struct fts_backend *fts_backend_lucene_init(struct mailbox *box)
 {
+	struct lucene_mail_storage *lstorage;
 	struct lucene_fts_backend *backend;
+	const char *path;
+
+	lstorage = LUCENE_CONTEXT(box->storage);
+	if (lstorage == NULL) {
+		path = mail_storage_get_mailbox_index_dir(box->storage,
+							  "INBOX");
+		if (path == NULL) {
+			/* in-memory indexes */
+			return NULL;
+		}
+
+		path = t_strconcat(path, "/"LUCENE_INDEX_DIR_NAME, NULL);
+
+		lstorage = i_new(struct lucene_mail_storage, 1);
+		lstorage->index = lucene_index_init(path);
+		array_idx_set(&box->storage->module_contexts,
+			      fts_lucene_storage_module_id, &lstorage);
+	}
+	lstorage->refcount++;
 
 	backend = i_new(struct lucene_fts_backend, 1);
 	backend->backend = fts_backend_lucene;
-	backend->index = lucene_index_init(path);
+	backend->lstorage = lstorage;
+	backend->box = box;
 	return &backend->backend;
 }
 
@@ -26,7 +67,12 @@
 	struct lucene_fts_backend *backend =
 		(struct lucene_fts_backend *)_backend;
 
-	lucene_index_deinit(backend->index);
+	if (--backend->lstorage->refcount == 0) {
+		array_idx_clear(&backend->box->storage->module_contexts,
+				fts_lucene_storage_module_id);
+		lucene_index_deinit(backend->lstorage->index);
+		i_free(backend->lstorage);
+	}
 	i_free(backend);
 }
 
@@ -37,9 +83,12 @@
 		(struct lucene_fts_backend *)_backend;
 	struct fts_backend_build_context *ctx;
 
+	fts_backend_select(backend);
+
 	ctx = i_new(struct fts_backend_build_context, 1);
 	ctx->backend = _backend;
-	if (lucene_index_build_init(backend->index, &backend->last_uid) < 0)
+	if (lucene_index_build_init(backend->lstorage->index,
+				    &backend->last_uid) < 0)
 		ctx->failed = TRUE;
 
 	*last_uid_r = backend->last_uid;
@@ -60,7 +109,9 @@
 	i_assert(uid >= backend->last_uid);
 	backend->last_uid = uid;
 
-	return lucene_index_build_more(backend->index, uid, data, size);
+	i_assert(backend->lstorage->selected_box == backend->box);
+	return lucene_index_build_more(backend->lstorage->index,
+				       uid, data, size);
 }
 
 static int
@@ -70,7 +121,8 @@
 		(struct lucene_fts_backend *)ctx->backend;
 	int ret = ctx->failed ? -1 : 0;
 
-	lucene_index_build_deinit(backend->index);
+	i_assert(backend->lstorage->selected_box == backend->box);
+	lucene_index_build_deinit(backend->lstorage->index);
 	i_free(ctx);
 	return ret;
 }
@@ -82,7 +134,8 @@
 	struct lucene_fts_backend *backend =
 		(struct lucene_fts_backend *)_backend;
 
-	return lucene_index_lookup(backend->index, key, result);
+	fts_backend_select(backend);
+	return lucene_index_lookup(backend->lstorage->index, key, result);
 }
 
 static int
@@ -92,7 +145,8 @@
 	struct lucene_fts_backend *backend =
 		(struct lucene_fts_backend *)_backend;
 
-	return lucene_index_filter(backend->index, key, result);
+	fts_backend_select(backend);
+	return lucene_index_filter(backend->lstorage->index, key, result);
 }
 
 struct fts_backend fts_backend_lucene = {

Index: fts-lucene-plugin.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/fts-lucene-plugin.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- fts-lucene-plugin.c	17 Sep 2006 23:15:54 -0000	1.1
+++ fts-lucene-plugin.c	24 Oct 2006 23:49:13 -0000	1.2
@@ -1,10 +1,14 @@
 /* Copyright (C) 2006 Timo Sirainen */
 
 #include "lib.h"
+#include "mail-storage-private.h"
 #include "fts-lucene-plugin.h"
 
+unsigned int fts_lucene_storage_module_id;
+
 void fts_lucene_plugin_init(void)
 {
+	fts_lucene_storage_module_id = mail_storage_module_id++;
 	fts_backend_register(&fts_backend_lucene);
 }
 

Index: fts-lucene-plugin.h
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/fts-lucene-plugin.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- fts-lucene-plugin.h	17 Sep 2006 23:15:54 -0000	1.1
+++ fts-lucene-plugin.h	24 Oct 2006 23:49:13 -0000	1.2
@@ -3,7 +3,12 @@
 
 #include "fts-api-private.h"
 
+#define LUCENE_CONTEXT(obj) \
+	*((void **)array_idx_modifiable(&(obj)->module_contexts, \
+					fts_lucene_storage_module_id))
+
 extern struct fts_backend fts_backend_lucene;
+extern unsigned int fts_lucene_storage_module_id;
 
 void fts_lucene_plugin_init(void);
 void fts_lucene_plugin_deinit(void);

Index: lucene-wrapper.cc
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/lucene-wrapper.cc,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- lucene-wrapper.cc	24 Oct 2006 21:51:04 -0000	1.2
+++ lucene-wrapper.cc	24 Oct 2006 23:49:14 -0000	1.3
@@ -14,17 +14,21 @@
 using namespace lucene::index;
 using namespace lucene::search;
 using namespace lucene::queryParser;
+using namespace lucene::analysis;
 
 struct lucene_index {
 	char *path;
+	char *mailbox_name;
+	TCHAR *tmailbox_name;
 
 	IndexReader *reader;
 	IndexWriter *writer;
 	IndexSearcher *searcher;
-	lucene::analysis::standard::StandardAnalyzer *analyzer;
+	Analyzer *analyzer;
 
 	Document *doc;
 	uint32_t prev_uid, last_uid;
+	int32_t last_uid_doc_id;
 };
 
 static const uint8_t utf8_skip_table[256] = {
@@ -38,6 +42,39 @@
 	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
 };
 
+class RawTokenStream : public TokenStream {
+	CL_NS(util)::Reader *reader;
+
+public:
+	RawTokenStream(CL_NS(util)::Reader *reader) {
+		this->reader = reader;
+	};
+
+	bool next(Token *token) {
+		const TCHAR *data;
+
+		int32_t len = this->reader->read(data);
+		if (len <= 0)
+			return false;
+
+		token->set(data, 0, len);
+		return true;
+	}
+
+	void close() { }
+};
+
+class DovecotAnalyzer : public standard::StandardAnalyzer {
+public:
+	TokenStream *tokenStream(const TCHAR *fieldName,
+				 CL_NS(util)::Reader *reader) {
+		if (fieldName != 0 && wcscmp(fieldName, L"contents") != 0)
+			return _CLNEW RawTokenStream(reader);
+		return standard::StandardAnalyzer::
+			tokenStream(fieldName, reader);
+	}
+};
+
 struct lucene_index *lucene_index_init(const char *path)
 {
 	struct lucene_index *index;
@@ -58,10 +95,25 @@
 void lucene_index_deinit(struct lucene_index *index)
 {
 	lucene_index_close(index);
-	i_free(index->path);
+	i_free(index->mailbox_name);
+	i_free(index->tmailbox_name);
 	i_free(index);
 }
 
+int lucene_index_select_mailbox(struct lucene_index *index,
+				const char *mailbox_name)
+{
+	size_t len;
+
+	i_free(index->mailbox_name);
+	i_free(index->tmailbox_name);
+
+	len = strlen(mailbox_name);
+	index->mailbox_name = i_strdup(mailbox_name);
+	index->tmailbox_name = i_new(TCHAR, len + 1);
+	STRCPY_AtoT(index->tmailbox_name, mailbox_name, len);
+}
+
 static int lucene_index_open(struct lucene_index *index)
 {
 	if (index->reader != NULL)
@@ -89,10 +141,8 @@
 	if ((ret = lucene_index_open(index)) <= 0)
 		return ret;
 
-	if (index->analyzer == NULL) {
-		index->analyzer =
-			_CLNEW lucene::analysis::standard::StandardAnalyzer();
-	}
+	if (index->analyzer == NULL)
+		index->analyzer = _CLNEW DovecotAnalyzer();
 
 	index->searcher = _CLNEW IndexSearcher(index->reader);
 	return 1;
@@ -119,35 +169,73 @@
 }
 
 static int
-lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r)
+lucene_index_get_last_uid(struct lucene_index *index)
 {
-	int32_t max_docnum = index->reader->maxDoc();
-
-	if (max_docnum == 0) {
-		*last_uid_r = 0;
-		return 0;
-	}
+	int ret = 0;
 
-	Document *doc = index->reader->document(max_docnum-1);
-	if (lucene_doc_get_uid(index, doc, last_uid_r) < 0) {
-		_CLDELETE(doc);
+	if (lucene_index_open_search(index) <= 0)
 		return -1;
+
+	Term mailbox_term(_T("box"), index->tmailbox_name);
+	Term last_uid_term(_T("last_uid"), _T("1"));
+	TermQuery mailbox_query(&mailbox_term);
+	TermQuery last_uid_query(&last_uid_term);
+
+	BooleanQuery query;
+	query.add(&mailbox_query, true, false);
+	query.add(&last_uid_query, true, false);
+
+	index->last_uid = 0;
+	index->last_uid_doc_id = -1;
+	try {
+		Hits *hits = index->searcher->search(&query);
+
+		if (hits->length() > 1) {
+			i_error("lucene: last_uid search for mailbox %s "
+				"returned multiple hits", index->mailbox_name);
+		}
+		for (int32_t i = 0; i < hits->length(); i++) {
+			uint32_t uid;
+
+			if (lucene_doc_get_uid(index, &hits->doc(i),
+					       &uid) < 0) {
+				ret = -1;
+				break;
+			}
+
+			int32_t del_id = -1;
+			if (uid > index->last_uid) {
+				if (index->last_uid_doc_id >= 0)
+					del_id = index->last_uid_doc_id;
+				index->last_uid = uid;
+				index->last_uid_doc_id = hits->id(i);
+			} else {
+				del_id = hits->id(i);
+			}
+			if (del_id >= 0)
+				index->reader->deleteDocument(del_id);
+		}
+		_CLDELETE(hits);
+	} catch (CLuceneError &err) {
+		i_error("lucene: last_uid search failed: %s", err.what());
+		ret = -1;
 	}
-	_CLDELETE(doc);
-	return 0;
+	return ret;
 }
 
 int lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r)
 {
 	uint32_t last_uid = 0;
 
+	i_assert(index->mailbox_name != NULL);
+
 	if (lucene_index_open(index) < 0)
 		return -1;
 
 	if (index->reader == NULL)
 		index->last_uid = 0;
 	else {
-		if (lucene_index_get_last_uid(index, &index->last_uid) < 0)
+		if (lucene_index_get_last_uid(index) < 0)
 			return -1;
 	}
 	*last_uid_r = index->last_uid;
@@ -156,7 +244,7 @@
 		return 0;
 
 	bool exists = IndexReader::indexExists(index->path);
-	index->analyzer = _CLNEW lucene::analysis::standard::StandardAnalyzer();
+	index->analyzer = _CLNEW DovecotAnalyzer();
 	try {
 		index->writer = _CLNEW IndexWriter(index->path,
 						   index->analyzer, !exists);
@@ -230,6 +318,7 @@
 		i_snprintf(id, sizeof(id), "%u", uid);
 		STRCPY_AtoT(tid, id, MAX_INT_STRLEN);
 		index->doc->add(*Field::Text(_T("uid"), tid));
+		index->doc->add(*Field::Text(_T("box"), index->tmailbox_name));
 	}
 
 	index->doc->add(*Field::Text(_T("contents"), dest));
@@ -237,11 +326,41 @@
 	return 0;
 }
 
+static int lucene_index_update_last_uid(struct lucene_index *index)
+{
+	Document doc;
+	char id[MAX_INT_STRLEN];
+	TCHAR tid[MAX_INT_STRLEN];
+
+	i_snprintf(id, sizeof(id), "%u", index->last_uid);
+	STRCPY_AtoT(tid, id, MAX_INT_STRLEN);
+
+	doc.add(*Field::Text(_T("last_uid"), _T("1")));
+	doc.add(*Field::Text(_T("uid"), tid));
+	doc.add(*Field::Text(_T("box"), index->tmailbox_name));
+
+	try {
+		if (index->last_uid_doc_id >= 0) {
+			index->reader->deleteDocument(index->last_uid_doc_id);
+			index->last_uid_doc_id = -1;
+		}
+		index->writer->addDocument(&doc);
+		return 0;
+	} catch (CLuceneError &err) {
+		i_error("lucene: IndexWriter::addDocument(%s) failed: %s",
+			index->path, err.what());
+		return -1;
+	}
+}
+
 int lucene_index_build_deinit(struct lucene_index *index)
 {
 	int ret = 0;
 
+	if (index->prev_uid > index->last_uid)
+		index->last_uid = index->prev_uid;
 	index->prev_uid = 0;
+
 	if (index->writer == NULL) {
 		lucene_index_close(index);
 		return -1;
@@ -249,6 +368,8 @@
 
 	if (lucene_index_build_flush(index) < 0)
 		ret = -1;
+	if (lucene_index_update_last_uid(index) < 0)
+		ret = -1;
 
 	try {
 		index->writer->optimize();
@@ -287,10 +408,10 @@
 	lucene_utf8towcs(tkey, quoted_key, len + 1);
 	t_pop();
 
-	Query *query = NULL;
+	Query *content_query = NULL;
 	try {
-		query = QueryParser::parse(tkey, _T("contents"),
-					   index->analyzer);
+		content_query = QueryParser::parse(tkey, _T("contents"),
+						   index->analyzer);
 	} catch (CLuceneError &err) {
 		if (getenv("DEBUG") != NULL) {
 			i_info("lucene: QueryParser::parse(%s) failed: %s",
@@ -300,8 +421,14 @@
 		return -1;
 	}
 
+	BooleanQuery query;
+	Term mailbox_term(_T("box"), index->tmailbox_name);
+	TermQuery mailbox_query(&mailbox_term);
+	query.add(content_query, true, false);
+	query.add(&mailbox_query, true, false);
+
 	try {
-		Hits *hits = index->searcher->search(query);
+		Hits *hits = index->searcher->search(&query);
 
 		for (int32_t i = 0; i < hits->length(); i++) {
 			uint32_t uid;
@@ -321,7 +448,7 @@
 		ret = -1;
 	}
 
-	_CLDELETE(query);
+	_CLDELETE(content_query);
 	lucene_index_close(index);
 	return ret;
 }

Index: lucene-wrapper.h
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/lucene-wrapper.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- lucene-wrapper.h	17 Sep 2006 23:15:54 -0000	1.1
+++ lucene-wrapper.h	24 Oct 2006 23:49:14 -0000	1.2
@@ -6,6 +6,9 @@
 struct lucene_index *lucene_index_init(const char *path);
 void lucene_index_deinit(struct lucene_index *index);
 
+int lucene_index_select_mailbox(struct lucene_index *index,
+				const char *mailbox_name);
+
 int lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r);
 int lucene_index_build_more(struct lucene_index *index, uint32_t uid,
 			    const unsigned char *data, size_t size);



More information about the dovecot-cvs mailing list