[dovecot-cvs] dovecot/src/plugins/fts-lucene fts-backend-lucene.c, 1.2, 1.3 lucene-wrapper.cc, 1.4, 1.5 lucene-wrapper.h, 1.2, 1.3

tss at dovecot.org tss at dovecot.org
Wed Oct 25 21:03:01 UTC 2006


Update of /var/lib/cvs/dovecot/src/plugins/fts-lucene
In directory talvi:/tmp/cvs-serv5509

Modified Files:
	fts-backend-lucene.c lucene-wrapper.cc lucene-wrapper.h 
Log Message:
Fixes and cleanups



Index: fts-backend-lucene.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/fts-backend-lucene.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- fts-backend-lucene.c	24 Oct 2006 23:49:13 -0000	1.2
+++ fts-backend-lucene.c	25 Oct 2006 20:02:59 -0000	1.3
@@ -76,6 +76,17 @@
 	i_free(backend);
 }
 
+static int
+fts_backend_lucene_get_last_uid(struct fts_backend *_backend,
+				uint32_t *last_uid_r)
+{
+	struct lucene_fts_backend *backend =
+		(struct lucene_fts_backend *)_backend;
+
+	fts_backend_select(backend);
+	return lucene_index_get_last_uid(backend->lstorage->index, last_uid_r);
+}
+
 static struct fts_backend_build_context *
 fts_backend_lucene_build_init(struct fts_backend *_backend, uint32_t *last_uid_r)
 {
@@ -156,6 +167,7 @@
 	{
 		fts_backend_lucene_init,
 		fts_backend_lucene_deinit,
+		fts_backend_lucene_get_last_uid,
 		fts_backend_lucene_build_init,
 		fts_backend_lucene_build_more,
 		fts_backend_lucene_build_deinit,

Index: lucene-wrapper.cc
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/lucene-wrapper.cc,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- lucene-wrapper.cc	25 Oct 2006 00:30:33 -0000	1.4
+++ lucene-wrapper.cc	25 Oct 2006 20:02:59 -0000	1.5
@@ -28,7 +28,6 @@
 
 	Document *doc;
 	uint32_t prev_uid, last_uid;
-	int32_t last_uid_doc_id;
 };
 
 static const uint8_t utf8_skip_table[256] = {
@@ -68,8 +67,12 @@
 public:
 	TokenStream *tokenStream(const TCHAR *fieldName,
 				 CL_NS(util)::Reader *reader) {
+		/* Everything except contents should go as-is without any
+		   modifications. Isn't there any easier way to do this than
+		   to implement a whole new RawTokenStream?.. */
 		if (fieldName != 0 && wcscmp(fieldName, L"contents") != 0)
 			return _CLNEW RawTokenStream(reader);
+
 		return standard::StandardAnalyzer::
 			tokenStream(fieldName, reader);
 	}
@@ -81,6 +84,7 @@
 
 	index = i_new(struct lucene_index, 1);
 	index->path = i_strdup(path);
+	index->analyzer = _CLNEW DovecotAnalyzer();
 	return index;
 }
 
@@ -89,12 +93,12 @@
 	_CLDELETE(index->reader);
 	_CLDELETE(index->writer);
 	_CLDELETE(index->searcher);
-	_CLDELETE(index->analyzer);
 }
 
 void lucene_index_deinit(struct lucene_index *index)
 {
 	lucene_index_close(index);
+	_CLDELETE(index->analyzer);
 	i_free(index->mailbox_name);
 	i_free(index->tmailbox_name);
 	i_free(index);
@@ -141,17 +145,15 @@
 	if ((ret = lucene_index_open(index)) <= 0)
 		return ret;
 
-	if (index->analyzer == NULL)
-		index->analyzer = _CLNEW DovecotAnalyzer();
-
 	index->searcher = _CLNEW IndexSearcher(index->reader);
 	return 1;
 }
 
-static int lucene_doc_get_uid(struct lucene_index *index,
-			      Document *doc, uint32_t *uid_r)
+static int
+lucene_doc_get_uid(struct lucene_index *index, Document *doc,
+		   const TCHAR *field_name, uint32_t *uid_r)
 {
-	Field *field = doc->getField(_T("uid"));
+	Field *field = doc->getField(field_name);
 	TCHAR *uid = field == NULL ? NULL : field->stringValue();
 	if (uid == NULL) {
 		i_error("lucene: Corrupted FTS index %s: No UID for document",
@@ -169,52 +171,55 @@
 }
 
 static int
-lucene_index_get_last_uid(struct lucene_index *index)
+lucene_index_get_last_uid_int(struct lucene_index *index, bool delete_old)
 {
 	int ret = 0;
+	bool deleted = false;
 
 	index->last_uid = 0;
-	index->last_uid_doc_id = -1;
 
-	if (lucene_index_open_search(index) <= 0)
-		return -1;
+	if ((ret = lucene_index_open_search(index)) <= 0)
+		return ret;
 
+	/* find all the existing last_uids for selected mailbox.
+	   if there are more than one, delete the smaller ones. this is normal
+	   behavior because we can't update/delete documents in writer, so
+	   we'll do it only in here.. */
 	Term mailbox_term(_T("box"), index->tmailbox_name);
-	Term last_uid_term(_T("last_uid"), _T("1"));
+	Term last_uid_term(_T("last_uid"), _T("*"));
 	TermQuery mailbox_query(&mailbox_term);
-	TermQuery last_uid_query(&last_uid_term);
+	WildcardQuery last_uid_query(&last_uid_term);
 
 	BooleanQuery query;
 	query.add(&mailbox_query, true, false);
 	query.add(&last_uid_query, true, false);
 
+	int32_t last_doc_id = -1;
 	try {
 		Hits *hits = index->searcher->search(&query);
 
-		if (hits->length() > 1) {
-			i_error("lucene: last_uid search for mailbox %s "
-				"returned multiple hits", index->mailbox_name);
-		}
 		for (int32_t i = 0; i < hits->length(); i++) {
 			uint32_t uid;
 
 			if (lucene_doc_get_uid(index, &hits->doc(i),
-					       &uid) < 0) {
+					       _T("last_uid"), &uid) < 0) {
 				ret = -1;
 				break;
 			}
 
 			int32_t del_id = -1;
 			if (uid > index->last_uid) {
-				if (index->last_uid_doc_id >= 0)
-					del_id = index->last_uid_doc_id;
+				if (last_doc_id >= 0)
+					del_id = last_doc_id;
 				index->last_uid = uid;
-				index->last_uid_doc_id = hits->id(i);
+				last_doc_id = hits->id(i);
 			} else {
 				del_id = hits->id(i);
 			}
-			/*if (del_id >= 0)
-				index->reader->deleteDocument(del_id);*/
+			if (del_id >= 0 && delete_old) {
+				index->reader->deleteDocument(del_id);
+				deleted = true;
+			}
 		}
 		_CLDELETE(hits);
 	} catch (CLuceneError &err) {
@@ -222,9 +227,25 @@
 		ret = -1;
 	}
 
+	if (deleted) {
+		/* the index was modified. we'll need to release the locks
+		   before opening a writer */
+		lucene_index_close(index);
+	}
 	return ret;
 }
 
+int lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r)
+{
+	/* delete the old last_uids in here, since we've not write-locked
+	   the index yet */
+	if (lucene_index_get_last_uid_int(index, true) < 0)
+		return -1;
+
+	*last_uid_r = index->last_uid;
+	return 0;
+}
+
 int lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r)
 {
 	uint32_t last_uid = 0;
@@ -232,23 +253,8 @@
 	i_assert(index->mailbox_name != NULL);
 
 	lucene_index_close(index);
-	if (lucene_index_open(index) < 0)
-		return -1;
-
-	if (index->reader == NULL) {
-		index->last_uid = 0;
-		index->last_uid_doc_id = -1;
-	} else {
-		if (lucene_index_get_last_uid(index) < 0)
-			return -1;
-	}
-	*last_uid_r = index->last_uid;
-
-	if (index->writer != NULL)
-		return 0;
 
 	bool exists = IndexReader::indexExists(index->path);
-	index->analyzer = _CLNEW DovecotAnalyzer();
 	try {
 		index->writer = _CLNEW IndexWriter(index->path,
 						   index->analyzer, !exists);
@@ -257,8 +263,11 @@
 			index->path, err.what());
 		return -1;
 	}
-
 	index->writer->setMaxFieldLength(MAX_TERMS_PER_DOCUMENT);
+
+	if (lucene_index_get_last_uid_int(index, false) < 0)
+		return -1;
+	*last_uid_r = index->last_uid;
 	return 0;
 }
 
@@ -339,22 +348,10 @@
 	i_snprintf(id, sizeof(id), "%u", index->last_uid);
 	STRCPY_AtoT(tid, id, MAX_INT_STRLEN);
 
-	doc.add(*Field::Text(_T("last_uid"), _T("1")));
-	doc.add(*Field::Text(_T("uid"), tid));
+	doc.add(*Field::Text(_T("last_uid"), tid));
 	doc.add(*Field::Text(_T("box"), index->tmailbox_name));
 
 	try {
-		if (index->last_uid_doc_id >= 0) {
-			//index->reader->deleteDocument(index->last_uid_doc_id);
-			index->last_uid_doc_id = -1;
-		}
-	} catch (CLuceneError &err) {
-		i_error("lucene: IndexWriter::deleteDocument(%s) failed: %s",
-			index->path, err.what());
-		return -1;
-	}
-
-	try {
 		index->writer->addDocument(&doc);
 		return 0;
 	} catch (CLuceneError &err) {
@@ -450,7 +447,7 @@
 			uint32_t uid;
 
 			if (lucene_doc_get_uid(index, &hits->doc(i),
-					       &uid) < 0) {
+					       _T("uid"), &uid) < 0) {
 				ret = -1;
 				break;
 			}

Index: lucene-wrapper.h
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-lucene/lucene-wrapper.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- lucene-wrapper.h	24 Oct 2006 23:49:14 -0000	1.2
+++ lucene-wrapper.h	25 Oct 2006 20:02:59 -0000	1.3
@@ -8,6 +8,7 @@
 
 int lucene_index_select_mailbox(struct lucene_index *index,
 				const char *mailbox_name);
+int lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r);
 
 int lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r);
 int lucene_index_build_more(struct lucene_index *index, uint32_t uid,



More information about the dovecot-cvs mailing list