dovecot-2.0: fts: Backends can now index non-text body parts if ...

dovecot at dovecot.org dovecot at dovecot.org
Mon May 17 19:07:04 EEST 2010


details:   http://hg.dovecot.org/dovecot-2.0/rev/757cb3148407
changeset: 11316:757cb3148407
user:      Timo Sirainen <tss at iki.fi>
date:      Mon May 17 18:06:57 2010 +0200
description:
fts: Backends can now index non-text body parts if they support it.

diffstat:

 src/plugins/fts-solr/fts-backend-solr.c   |  105 +++++++++++++++++++++++------------
 src/plugins/fts-squat/fts-backend-squat.c |   44 ++++++++++++--
 src/plugins/fts/fts-api-private.h         |   11 +++-
 src/plugins/fts/fts-api.c                 |   31 +++++++++-
 src/plugins/fts/fts-api.h                 |   60 ++++++++++++++-----
 src/plugins/fts/fts-storage.c             |  104 +++++++++++++++++++++++++++++-----
 6 files changed, 273 insertions(+), 82 deletions(-)

diffs (truncated from 558 to 300 lines):

diff -r 7bb35ad5e80e -r 757cb3148407 src/plugins/fts-solr/fts-backend-solr.c
--- a/src/plugins/fts-solr/fts-backend-solr.c	Fri May 14 17:41:34 2010 +0200
+++ b/src/plugins/fts-solr/fts-backend-solr.c	Mon May 17 18:06:57 2010 +0200
@@ -561,48 +561,78 @@
 	xml_encode(str, backend->id_box_name);
 }
 
-static int
-fts_backend_solr_build_more(struct fts_backend_build_context *_ctx,
-			    uint32_t uid, const unsigned char *data,
-			    size_t size, bool headers)
+static void
+fts_backend_solr_uid_changed(struct solr_fts_backend_build_context *ctx,
+			     uint32_t uid)
+{
+	if (ctx->post == NULL) {
+		ctx->post = solr_connection_post_begin(solr_conn);
+		str_append(ctx->cmd, "<add>");
+	} else {
+		str_append(ctx->cmd, "</field></doc>");
+	}
+	ctx->prev_uid = uid;
+	ctx->headers = FALSE;
+
+	fts_backend_solr_add_doc_prefix(ctx, uid);
+	str_printfa(ctx->cmd, "<field name=\"id\">");
+	xml_encode_id(ctx->cmd, ctx->ctx.backend, uid, ctx->uid_validity);
+	str_append(ctx->cmd, "</field>");
+}
+
+static void
+fts_backend_solr_build_hdr(struct fts_backend_build_context *_ctx,
+			   uint32_t uid)
 {
 	struct solr_fts_backend_build_context *ctx =
 		(struct solr_fts_backend_build_context *)_ctx;
-	string_t *cmd = ctx->cmd;
 
-	/* body comes first, then headers */
-	if (ctx->prev_uid != uid) {
-		/* uid changed */
-		if (ctx->post == NULL) {
-			ctx->post = solr_connection_post_begin(solr_conn);
-			str_append(cmd, "<add>");
-		} else {
-			str_append(cmd, "</field></doc>");
-		}
-		ctx->prev_uid = uid;
-
-		fts_backend_solr_add_doc_prefix(ctx, uid);
-		str_printfa(cmd, "<field name=\"id\">");
-		xml_encode_id(cmd, _ctx->backend, uid, ctx->uid_validity);
-		str_append(cmd, "</field>");
-
-		ctx->headers = headers;
-		if (headers) {
-			str_append(cmd, "<field name=\"hdr\">");
-		} else {
-			str_append(cmd, "<field name=\"body\">");
-		}
-	} else if (headers && !ctx->headers) {
-		str_append(cmd, "</field><field name=\"hdr\">");
-	} else {
-		i_assert(!(!headers && ctx->headers));
+	if (uid != ctx->prev_uid)
+		fts_backend_solr_uid_changed(ctx, uid);
+	else {
+		i_assert(!ctx->headers);
+		str_append(ctx->cmd, "</field>");
 	}
 
-	xml_encode_data(cmd, data, size);
-	if (str_len(cmd) > SOLR_CMDBUF_SIZE-128) {
-		solr_connection_post_more(ctx->post, str_data(cmd),
-					  str_len(cmd));
-		str_truncate(cmd, 0);
+	ctx->headers = TRUE;
+	str_append(ctx->cmd, "<field name=\"hdr\">");
+}
+
+static bool
+fts_backend_solr_build_body_begin(struct fts_backend_build_context *_ctx,
+				  uint32_t uid, const char *content_type,
+				  const char *content_disposition ATTR_UNUSED)
+{
+	struct solr_fts_backend_build_context *ctx =
+		(struct solr_fts_backend_build_context *)_ctx;
+
+	if (!fts_backend_default_can_index(content_type))
+		return FALSE;
+
+	if (uid != ctx->prev_uid)
+		fts_backend_solr_uid_changed(ctx, uid);
+	else {
+		/* body comes first, then headers */
+		i_assert(!ctx->headers);
+	}
+
+	ctx->headers = FALSE;
+	str_append(ctx->cmd, "<field name=\"body\">");
+	return TRUE;
+}
+
+static int
+fts_backend_solr_build_more(struct fts_backend_build_context *_ctx,
+			    const unsigned char *data, size_t size)
+{
+	struct solr_fts_backend_build_context *ctx =
+		(struct solr_fts_backend_build_context *)_ctx;
+
+	xml_encode_data(ctx->cmd, data, size);
+	if (str_len(ctx->cmd) > SOLR_CMDBUF_SIZE-128) {
+		solr_connection_post_more(ctx->post, str_data(ctx->cmd),
+					  str_len(ctx->cmd));
+		str_truncate(ctx->cmd, 0);
 	}
 	return 0;
 }
@@ -806,6 +836,9 @@
 		fts_backend_solr_get_last_uid,
 		fts_backend_solr_get_all_last_uids,
 		fts_backend_solr_build_init,
+		fts_backend_solr_build_hdr,
+		fts_backend_solr_build_body_begin,
+		NULL,
 		fts_backend_solr_build_more,
 		fts_backend_solr_build_deinit,
 		fts_backend_solr_expunge,
diff -r 7bb35ad5e80e -r 757cb3148407 src/plugins/fts-squat/fts-backend-squat.c
--- a/src/plugins/fts-squat/fts-backend-squat.c	Fri May 14 17:41:34 2010 +0200
+++ b/src/plugins/fts-squat/fts-backend-squat.c	Mon May 17 18:06:57 2010 +0200
@@ -21,6 +21,8 @@
 struct squat_fts_backend_build_context {
 	struct fts_backend_build_context ctx;
 	struct squat_trie_build_context *build_ctx;
+	enum squat_index_type squat_type;
+	uint32_t uid;
 };
 
 static void
@@ -127,18 +129,41 @@
 	return 0;
 }
 
-static int
-fts_backend_squat_build_more(struct fts_backend_build_context *_ctx,
-			     uint32_t uid, const unsigned char *data,
-			     size_t size, bool headers)
+static void
+fts_backend_squat_build_hdr(struct fts_backend_build_context *_ctx,
+			    uint32_t uid)
 {
 	struct squat_fts_backend_build_context *ctx =
 		(struct squat_fts_backend_build_context *)_ctx;
-	enum squat_index_type squat_type;
 
-	squat_type = headers ? SQUAT_INDEX_TYPE_HEADER :
-		SQUAT_INDEX_TYPE_BODY;
-	return squat_trie_build_more(ctx->build_ctx, uid, squat_type,
+	ctx->squat_type = SQUAT_INDEX_TYPE_HEADER;
+	ctx->uid = uid;
+}
+
+static bool
+fts_backend_squat_build_body_begin(struct fts_backend_build_context *_ctx,
+				   uint32_t uid, const char *content_type,
+				   const char *content_disposition ATTR_UNUSED)
+{
+	struct squat_fts_backend_build_context *ctx =
+		(struct squat_fts_backend_build_context *)_ctx;
+
+	if (!fts_backend_default_can_index(content_type))
+		return FALSE;
+
+	ctx->squat_type = SQUAT_INDEX_TYPE_BODY;
+	ctx->uid = uid;
+	return TRUE;
+}
+
+static int
+fts_backend_squat_build_more(struct fts_backend_build_context *_ctx,
+			     const unsigned char *data, size_t size)
+{
+	struct squat_fts_backend_build_context *ctx =
+		(struct squat_fts_backend_build_context *)_ctx;
+
+	return squat_trie_build_more(ctx->build_ctx, ctx->uid, ctx->squat_type,
 				     data, size);
 }
 
@@ -248,6 +273,9 @@
 		fts_backend_squat_get_last_uid,
 		NULL,
 		fts_backend_squat_build_init,
+		fts_backend_squat_build_hdr,
+		fts_backend_squat_build_body_begin,
+		NULL,
 		fts_backend_squat_build_more,
 		fts_backend_squat_build_deinit,
 		fts_backend_squat_expunge,
diff -r 7bb35ad5e80e -r 757cb3148407 src/plugins/fts/fts-api-private.h
--- a/src/plugins/fts/fts-api-private.h	Fri May 14 17:41:34 2010 +0200
+++ b/src/plugins/fts/fts-api-private.h	Mon May 17 18:06:57 2010 +0200
@@ -13,8 +13,13 @@
 
 	int (*build_init)(struct fts_backend *backend, uint32_t *last_uid_r,
 			  struct fts_backend_build_context **ctx_r);
-	int (*build_more)(struct fts_backend_build_context *ctx, uint32_t uid,
-			  const unsigned char *data, size_t size, bool headers);
+	void (*build_hdr)(struct fts_backend_build_context *ctx, uint32_t uid);
+	bool (*build_body_begin)(struct fts_backend_build_context *ctx,
+				 uint32_t uid, const char *content_type,
+				 const char *content_disposition);
+	void (*build_body_end)(struct fts_backend_build_context *ctx);
+	int (*build_more)(struct fts_backend_build_context *ctx,
+			  const unsigned char *data, size_t size);
 	int (*build_deinit)(struct fts_backend_build_context *ctx);
 
 	void (*expunge)(struct fts_backend *backend, struct mail *mail);
@@ -80,6 +85,8 @@
 void fts_backend_register(const struct fts_backend *backend);
 void fts_backend_unregister(const char *name);
 
+bool fts_backend_default_can_index(const char *content_type);
+
 void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
 		     const ARRAY_TYPE(seq_range) *definite_filter,
 		     ARRAY_TYPE(seq_range) *maybe_dest,
diff -r 7bb35ad5e80e -r 757cb3148407 src/plugins/fts/fts-api.c
--- a/src/plugins/fts/fts-api.c	Fri May 14 17:41:34 2010 +0200
+++ b/src/plugins/fts/fts-api.c	Mon May 17 18:06:57 2010 +0200
@@ -99,10 +99,29 @@
 	return ret;
 }
 
-int fts_backend_build_more(struct fts_backend_build_context *ctx, uint32_t uid,
-			   const unsigned char *data, size_t size, bool headers)
+void fts_backend_build_hdr(struct fts_backend_build_context *ctx, uint32_t uid)
 {
-	return ctx->backend->v.build_more(ctx, uid, data, size, headers);
+	ctx->backend->v.build_hdr(ctx, uid);
+}
+
+bool fts_backend_build_body_begin(struct fts_backend_build_context *ctx,
+				  uint32_t uid, const char *content_type,
+				  const char *content_disposition)
+{
+	return ctx->backend->v.build_body_begin(ctx, uid, content_type,
+						content_disposition);
+}
+
+void fts_backend_build_body_end(struct fts_backend_build_context *ctx)
+{
+	if (ctx->backend->v.build_body_end != NULL)
+		ctx->backend->v.build_body_end(ctx);
+}
+
+int fts_backend_build_more(struct fts_backend_build_context *ctx,
+			   const unsigned char *data, size_t size)
+{
+	return ctx->backend->v.build_more(ctx, data, size);
 }
 
 int fts_backend_build_deinit(struct fts_backend_build_context **_ctx)
@@ -321,3 +340,9 @@
 	pool_unref(&ctx->pool);
 	return ret;
 }
+
+bool fts_backend_default_can_index(const char *content_type)
+{
+	return strncasecmp(content_type, "text/", 5) == 0 ||
+		strcasecmp(content_type, "message/rfc822") == 0;
+}
diff -r 7bb35ad5e80e -r 757cb3148407 src/plugins/fts/fts-api.h
--- a/src/plugins/fts/fts-api.h	Fri May 14 17:41:34 2010 +0200
+++ b/src/plugins/fts/fts-api.h	Mon May 17 18:06:57 2010 +0200
@@ -8,8 +8,12 @@
 #include "seq-range-array.h"
 
 enum fts_lookup_flags {
+	/* Search within header and/or body.
+	   At least one of these must be set. */
 	FTS_LOOKUP_FLAG_HEADER	= 0x01,
 	FTS_LOOKUP_FLAG_BODY	= 0x02,
+
+	/* The key must NOT be found */
 	FTS_LOOKUP_FLAG_INVERT	= 0x04
 };
 
@@ -33,23 +37,42 @@
 /* Get the last_uid for the mailbox. */
 int fts_backend_get_last_uid(struct fts_backend *backend, uint32_t *last_uid_r);
 /* Get last_uids for all mailboxes that might be backend mailboxes for a
-   virtual mailbox. Depending on virtual mailbox configuration, this function
-   may also return mailboxes that don't really even match the virtual mailbox
-   patterns. The caller should filter out the list itself. */
+   virtual mailbox. The backend can use mailbox_get_virtual_backend_boxes() or
+   mailbox_get_virtual_box_patterns() functions to get the list of mailboxes.
+


More information about the dovecot-cvs mailing list