dovecot-1.1: Initial code to support Apache Solr (Lucene indexin...

dovecot at dovecot.org dovecot at dovecot.org
Sun Jul 13 15:37:50 EEST 2008


details:   http://hg.dovecot.org/dovecot-1.1/rev/08daafe3ffb0
changeset: 7746:08daafe3ffb0
user:      Timo Sirainen <tss at iki.fi>
date:      Sun Jul 13 15:17:15 2008 +0300
description:
Initial code to support Apache Solr (Lucene indexing server).

diffstat:

10 files changed, 930 insertions(+), 2 deletions(-)
configure.in                            |   40 ++
src/plugins/Makefile.am                 |    6 
src/plugins/fts-solr/Makefile.am        |   32 ++
src/plugins/fts-solr/fts-backend-solr.c |  299 ++++++++++++++++++++
src/plugins/fts-solr/fts-solr-plugin.c  |   16 +
src/plugins/fts-solr/fts-solr-plugin.h  |   11 
src/plugins/fts-solr/schema.xml         |   50 +++
src/plugins/fts-solr/solr-connection.c  |  454 +++++++++++++++++++++++++++++++
src/plugins/fts-solr/solr-connection.h  |   22 +
src/plugins/fts/fts-storage.c           |    2 

diffs (truncated from 1004 to 300 lines):

diff -r 97245d54efee -r 08daafe3ffb0 configure.in
--- a/configure.in	Sat Jul 12 16:24:47 2008 +0300
+++ b/configure.in	Sun Jul 13 15:17:15 2008 +0300
@@ -262,6 +262,15 @@ AC_ARG_WITH(lucene,
 	fi,
 	want_lucene=no)
 AM_CONDITIONAL(BUILD_LUCENE, test "$want_lucene" = "yes")
+
+AC_ARG_WITH(solr,
+[  --with-solr             Build with Solr full text search support],
+	if test x$withval = xno || test x$withval = xauto; then
+		want_solr=$withval
+	else
+		want_solr=yes
+	fi,
+	want_solr=no)
 
 AC_ARG_WITH(ssl,
 [  --with-ssl=gnutls|openssl Build with GNUTLS or OpenSSL (default)],
@@ -2209,6 +2218,36 @@ fi
 fi
 AM_CONDITIONAL(HAVE_RQUOTA, test "$have_rquota" = "yes")
 
+if test "$want_solr" != "no"; then
+  AC_CHECK_PROG(CURLCONFIG, curl-config, YES, NO)
+  if test $CURLCONFIG = YES; then
+    CURL_CFLAGS=`curl-config --cflags`
+    CURL_LIBS=`curl-config --libs`
+    
+    dnl libcurl found, also need libexpat
+    AC_CHECK_LIB(expat, XML_Parse, [
+      AC_CHECK_HEADER(expat.h, [
+        AC_SUBST(CURL_CFLAGS)
+        AC_SUBST(CURL_LIBS)
+	have_solr=yes
+      ], [
+	if test $want_solr = yes; then
+	  AC_ERROR([Can't build with Solr support: expat.h not found])
+	fi
+      ])
+    ], [
+      if test $want_solr = yes; then
+	AC_ERROR([Can't build with Solr support: libexpat not found])
+      fi
+    ])
+  else
+    if test $want_solr = yes; then
+      AC_ERROR([Can't build with Solr support: curl-config not found])
+    fi
+  fi
+fi
+AM_CONDITIONAL(BUILD_SOLR, test "$have_solr" = "yes")
+
 dnl **
 dnl ** capabilities
 dnl **
@@ -2267,6 +2306,7 @@ src/plugins/expire/Makefile
 src/plugins/expire/Makefile
 src/plugins/fts/Makefile
 src/plugins/fts-lucene/Makefile
+src/plugins/fts-solr/Makefile
 src/plugins/fts-squat/Makefile
 src/plugins/lazy-expunge/Makefile
 src/plugins/mail-log/Makefile
diff -r 97245d54efee -r 08daafe3ffb0 src/plugins/Makefile.am
--- a/src/plugins/Makefile.am	Sat Jul 12 16:24:47 2008 +0300
+++ b/src/plugins/Makefile.am	Sun Jul 13 15:17:15 2008 +0300
@@ -6,7 +6,11 @@ FTS_LUCENE = fts-lucene
 FTS_LUCENE = fts-lucene
 endif
 
+if BUILD_SOLR
+FTS_LUCENE = fts-solr
+endif
+
 SUBDIRS = \
 	acl convert expire fts fts-squat lazy-expunge mail-log mbox-snarf \
 	quota imap-quota trash \
-	$(ZLIB) $(FTS_LUCENE)
+	$(ZLIB) $(FTS_LUCENE) $(FTS_SOLR)
diff -r 97245d54efee -r 08daafe3ffb0 src/plugins/fts-solr/Makefile.am
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-solr/Makefile.am	Sun Jul 13 15:17:15 2008 +0300
@@ -0,0 +1,32 @@
+AM_CPPFLAGS = \
+	-I$(top_srcdir)/src/lib \
+	-I$(top_srcdir)/src/lib-mail \
+	-I$(top_srcdir)/src/lib-index \
+	-I$(top_srcdir)/src/lib-storage \
+	-I$(top_srcdir)/src/plugins/fts
+
+lib21_fts_solr_plugin_la_LDFLAGS = -module -avoid-version
+
+module_LTLIBRARIES = \
+	lib21_fts_solr_plugin.la
+
+lib21_fts_solr_plugin_la_LIBADD = \
+	$(CURL_CFLAGS) -lexpat
+
+lib21_fts_solr_plugin_la_SOURCES = \
+	fts-backend-solr.c \
+	fts-solr-plugin.c \
+	solr-connection.c
+
+noinst_HEADERS = \
+	fts-solr-plugin.h \
+	solr-connection.h
+
+EXTRA_DIST = schema.xml
+
+install-exec-local:
+	for d in imap lda; do \
+	  $(mkdir_p) $(DESTDIR)$(moduledir)/$$d; \
+	  rm -f $(DESTDIR)$(moduledir)/$$d/lib21_fts_solr_plugin$(MODULE_SUFFIX); \
+	  $(LN_S) ../lib21_fts_solr_plugin$(MODULE_SUFFIX) $(DESTDIR)$(moduledir)/$$d; \
+	done
diff -r 97245d54efee -r 08daafe3ffb0 src/plugins/fts-solr/fts-backend-solr.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-solr/fts-backend-solr.c	Sun Jul 13 15:17:15 2008 +0300
@@ -0,0 +1,299 @@
+/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "str.h"
+#include "mail-storage-private.h"
+#include "solr-connection.h"
+#include "fts-solr-plugin.h"
+
+#include <stdlib.h>
+#include <curl/curl.h>
+
+struct solr_fts_backend_build_context {
+	struct fts_backend_build_context ctx;
+
+	struct solr_connection_post *post;
+	uint32_t prev_uid, uid_validity;
+	string_t *cmd;
+	bool headers;
+};
+
+static struct solr_connection *solr_conn = NULL;
+
+static void solr_quote_str(string_t *dest, const char *str)
+{
+	solr_connection_quote_str(solr_conn, dest, str);
+}
+
+static void xml_encode(string_t *dest, const char *str)
+{
+	for (; *str != '\0'; str++) {
+		switch (*str) {
+		case '&':
+			str_append(dest, "&amp;");
+			break;
+		case '<':
+			str_append(dest, "&lt;");
+			break;
+		case '>':
+			str_append(dest, "&gt;");
+			break;
+		default:
+			str_append_c(dest, *str);
+			break;
+		}
+	}
+}
+
+static struct fts_backend *
+fts_backend_solr_init(struct mailbox *box ATTR_UNUSED)
+{
+	struct fts_backend *backend;
+
+	if (solr_conn == NULL)
+		solr_conn = solr_connection_init(getenv("FTS_SOLR"));
+
+	backend = i_new(struct fts_backend, 1);
+	*backend = fts_backend_solr;
+	return backend;
+}
+
+static void fts_backend_solr_deinit(struct fts_backend *backend)
+{
+	i_free(backend);
+}
+
+static int fts_backend_solr_get_last_uid(struct fts_backend *backend,
+					 uint32_t *last_uid_r)
+{
+	struct mailbox_status status;
+	ARRAY_TYPE(seq_range) uids;
+	const struct seq_range *uidvals;
+	unsigned int count;
+	string_t *str;
+
+	str = t_str_new(256);
+	str_append(str, "fl=uid&rows=1&sort=uid%20desc&q=");
+
+	mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
+	str_printfa(str, "uidv:%u%%20box:", status.uidvalidity);
+	solr_quote_str(str, backend->box->name);
+	str_append(str, "%20user:");
+	solr_quote_str(str, backend->box->storage->user);
+
+	t_array_init(&uids, 1);
+	if (solr_connection_select(solr_conn, str_c(str), &uids) < 0)
+		return -1;
+
+	uidvals = array_get(&uids, &count);
+	if (count == 0) {
+		/* nothing indexed yet for this mailbox */
+		*last_uid_r = 0;
+	} else if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
+		*last_uid_r = uidvals[0].seq1;
+	} else {
+		i_error("fts_solr: Last UID lookup returned multiple rows");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+fts_backend_solr_build_init(struct fts_backend *backend, uint32_t *last_uid_r,
+			    struct fts_backend_build_context **ctx_r)
+{
+	struct solr_fts_backend_build_context *ctx;
+	struct mailbox_status status;
+
+	*last_uid_r = (uint32_t)-1;
+
+	ctx = i_new(struct solr_fts_backend_build_context, 1);
+	ctx->ctx.backend = backend;
+	ctx->post = solr_connection_post_begin(solr_conn);
+	ctx->cmd = str_new(default_pool, 256);
+
+	mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
+	ctx->uid_validity = status.uidvalidity;
+
+	*ctx_r = &ctx->ctx;
+	return 0;
+}
+
+static int
+fts_backend_solr_build_more(struct fts_backend_build_context *_ctx,
+			    uint32_t uid, const unsigned char *data,
+			    size_t size, bool headers)
+{
+	struct solr_fts_backend_build_context *ctx =
+		(struct solr_fts_backend_build_context *)_ctx;
+	struct mailbox *box = _ctx->backend->box;
+	string_t *cmd = ctx->cmd;
+
+	/* body comes first, then headers */
+	if (ctx->prev_uid != uid) {
+		/* uid changed */
+		str_truncate(cmd, 0);
+		if (ctx->prev_uid == 0)
+			str_append(cmd, "<add>");
+		else
+			str_append(cmd, "</field></doc>");
+		ctx->prev_uid = uid;
+
+		str_printfa(cmd, "<doc>"
+			    "<field name=\"uid\">%u</field>"
+			    "<field name=\"uidv\">%u</field>",
+			    uid, ctx->uid_validity);
+
+		str_append(cmd, "<field name=\"box\">");
+		xml_encode(cmd, box->name);
+		str_append(cmd, "</field><field name=\"user\">");
+		xml_encode(cmd, box->storage->user);
+
+		str_printfa(cmd, "</field><field name=\"id\">%u/%u/",
+			    uid, ctx->uid_validity);
+		xml_encode(cmd, box->storage->user);
+		str_append_c(cmd, '/');
+		xml_encode(cmd, box->name);
+		str_append(cmd, "</field>");
+
+		ctx->headers = headers;
+		if (headers) {
+			str_append(cmd, "<field name=\"hdr\">");
+		} else {
+			str_append(cmd, "<field name=\"body\">");
+		}
+		solr_connection_post_more(ctx->post, str_data(cmd),
+					  str_len(cmd));
+	} else if (headers && !ctx->headers) {
+		str_truncate(cmd, 0);
+		str_append(cmd, "</field><field name=\"hdr\">");
+		solr_connection_post_more(ctx->post, str_data(cmd),
+					  str_len(cmd));
+	} else {
+		i_assert(!(!headers && ctx->headers));
+	}
+
+	solr_connection_post_more(ctx->post, data, size);
+	return 0;
+}
+


More information about the dovecot-cvs mailing list