dovecot-2.0: fts: Backends can now index non-text body parts if ...
dovecot at dovecot.org
dovecot at dovecot.org
Mon May 17 19:07:04 EEST 2010
details: http://hg.dovecot.org/dovecot-2.0/rev/757cb3148407
changeset: 11316:757cb3148407
user: Timo Sirainen <tss at iki.fi>
date: Mon May 17 18:06:57 2010 +0200
description:
fts: Backends can now index non-text body parts if they support it.
diffstat:
src/plugins/fts-solr/fts-backend-solr.c | 105 +++++++++++++++++++++++------------
src/plugins/fts-squat/fts-backend-squat.c | 44 ++++++++++++--
src/plugins/fts/fts-api-private.h | 11 +++-
src/plugins/fts/fts-api.c | 31 +++++++++-
src/plugins/fts/fts-api.h | 60 ++++++++++++++-----
src/plugins/fts/fts-storage.c | 104 +++++++++++++++++++++++++++++-----
6 files changed, 273 insertions(+), 82 deletions(-)
diffs (truncated from 558 to 300 lines):
diff -r 7bb35ad5e80e -r 757cb3148407 src/plugins/fts-solr/fts-backend-solr.c
--- a/src/plugins/fts-solr/fts-backend-solr.c Fri May 14 17:41:34 2010 +0200
+++ b/src/plugins/fts-solr/fts-backend-solr.c Mon May 17 18:06:57 2010 +0200
@@ -561,48 +561,78 @@
xml_encode(str, backend->id_box_name);
}
-static int
-fts_backend_solr_build_more(struct fts_backend_build_context *_ctx,
- uint32_t uid, const unsigned char *data,
- size_t size, bool headers)
+static void
+fts_backend_solr_uid_changed(struct solr_fts_backend_build_context *ctx,
+ uint32_t uid)
+{
+ if (ctx->post == NULL) {
+ ctx->post = solr_connection_post_begin(solr_conn);
+ str_append(ctx->cmd, "<add>");
+ } else {
+ str_append(ctx->cmd, "</field></doc>");
+ }
+ ctx->prev_uid = uid;
+ ctx->headers = FALSE;
+
+ fts_backend_solr_add_doc_prefix(ctx, uid);
+ str_printfa(ctx->cmd, "<field name=\"id\">");
+ xml_encode_id(ctx->cmd, ctx->ctx.backend, uid, ctx->uid_validity);
+ str_append(ctx->cmd, "</field>");
+}
+
+static void
+fts_backend_solr_build_hdr(struct fts_backend_build_context *_ctx,
+ uint32_t uid)
{
struct solr_fts_backend_build_context *ctx =
(struct solr_fts_backend_build_context *)_ctx;
- string_t *cmd = ctx->cmd;
- /* body comes first, then headers */
- if (ctx->prev_uid != uid) {
- /* uid changed */
- if (ctx->post == NULL) {
- ctx->post = solr_connection_post_begin(solr_conn);
- str_append(cmd, "<add>");
- } else {
- str_append(cmd, "</field></doc>");
- }
- ctx->prev_uid = uid;
-
- fts_backend_solr_add_doc_prefix(ctx, uid);
- str_printfa(cmd, "<field name=\"id\">");
- xml_encode_id(cmd, _ctx->backend, uid, ctx->uid_validity);
- str_append(cmd, "</field>");
-
- ctx->headers = headers;
- if (headers) {
- str_append(cmd, "<field name=\"hdr\">");
- } else {
- str_append(cmd, "<field name=\"body\">");
- }
- } else if (headers && !ctx->headers) {
- str_append(cmd, "</field><field name=\"hdr\">");
- } else {
- i_assert(!(!headers && ctx->headers));
+ if (uid != ctx->prev_uid)
+ fts_backend_solr_uid_changed(ctx, uid);
+ else {
+ i_assert(!ctx->headers);
+ str_append(ctx->cmd, "</field>");
}
- xml_encode_data(cmd, data, size);
- if (str_len(cmd) > SOLR_CMDBUF_SIZE-128) {
- solr_connection_post_more(ctx->post, str_data(cmd),
- str_len(cmd));
- str_truncate(cmd, 0);
+ ctx->headers = TRUE;
+ str_append(ctx->cmd, "<field name=\"hdr\">");
+}
+
+static bool
+fts_backend_solr_build_body_begin(struct fts_backend_build_context *_ctx,
+ uint32_t uid, const char *content_type,
+ const char *content_disposition ATTR_UNUSED)
+{
+ struct solr_fts_backend_build_context *ctx =
+ (struct solr_fts_backend_build_context *)_ctx;
+
+ if (!fts_backend_default_can_index(content_type))
+ return FALSE;
+
+ if (uid != ctx->prev_uid)
+ fts_backend_solr_uid_changed(ctx, uid);
+ else {
+ /* body comes first, then headers */
+ i_assert(!ctx->headers);
+ }
+
+ ctx->headers = FALSE;
+ str_append(ctx->cmd, "<field name=\"body\">");
+ return TRUE;
+}
+
+static int
+fts_backend_solr_build_more(struct fts_backend_build_context *_ctx,
+ const unsigned char *data, size_t size)
+{
+ struct solr_fts_backend_build_context *ctx =
+ (struct solr_fts_backend_build_context *)_ctx;
+
+ xml_encode_data(ctx->cmd, data, size);
+ if (str_len(ctx->cmd) > SOLR_CMDBUF_SIZE-128) {
+ solr_connection_post_more(ctx->post, str_data(ctx->cmd),
+ str_len(ctx->cmd));
+ str_truncate(ctx->cmd, 0);
}
return 0;
}
@@ -806,6 +836,9 @@
fts_backend_solr_get_last_uid,
fts_backend_solr_get_all_last_uids,
fts_backend_solr_build_init,
+ fts_backend_solr_build_hdr,
+ fts_backend_solr_build_body_begin,
+ NULL,
fts_backend_solr_build_more,
fts_backend_solr_build_deinit,
fts_backend_solr_expunge,
diff -r 7bb35ad5e80e -r 757cb3148407 src/plugins/fts-squat/fts-backend-squat.c
--- a/src/plugins/fts-squat/fts-backend-squat.c Fri May 14 17:41:34 2010 +0200
+++ b/src/plugins/fts-squat/fts-backend-squat.c Mon May 17 18:06:57 2010 +0200
@@ -21,6 +21,8 @@
struct squat_fts_backend_build_context {
struct fts_backend_build_context ctx;
struct squat_trie_build_context *build_ctx;
+ enum squat_index_type squat_type;
+ uint32_t uid;
};
static void
@@ -127,18 +129,41 @@
return 0;
}
-static int
-fts_backend_squat_build_more(struct fts_backend_build_context *_ctx,
- uint32_t uid, const unsigned char *data,
- size_t size, bool headers)
+static void
+fts_backend_squat_build_hdr(struct fts_backend_build_context *_ctx,
+ uint32_t uid)
{
struct squat_fts_backend_build_context *ctx =
(struct squat_fts_backend_build_context *)_ctx;
- enum squat_index_type squat_type;
- squat_type = headers ? SQUAT_INDEX_TYPE_HEADER :
- SQUAT_INDEX_TYPE_BODY;
- return squat_trie_build_more(ctx->build_ctx, uid, squat_type,
+ ctx->squat_type = SQUAT_INDEX_TYPE_HEADER;
+ ctx->uid = uid;
+}
+
+static bool
+fts_backend_squat_build_body_begin(struct fts_backend_build_context *_ctx,
+ uint32_t uid, const char *content_type,
+ const char *content_disposition ATTR_UNUSED)
+{
+ struct squat_fts_backend_build_context *ctx =
+ (struct squat_fts_backend_build_context *)_ctx;
+
+ if (!fts_backend_default_can_index(content_type))
+ return FALSE;
+
+ ctx->squat_type = SQUAT_INDEX_TYPE_BODY;
+ ctx->uid = uid;
+ return TRUE;
+}
+
+static int
+fts_backend_squat_build_more(struct fts_backend_build_context *_ctx,
+ const unsigned char *data, size_t size)
+{
+ struct squat_fts_backend_build_context *ctx =
+ (struct squat_fts_backend_build_context *)_ctx;
+
+ return squat_trie_build_more(ctx->build_ctx, ctx->uid, ctx->squat_type,
data, size);
}
@@ -248,6 +273,9 @@
fts_backend_squat_get_last_uid,
NULL,
fts_backend_squat_build_init,
+ fts_backend_squat_build_hdr,
+ fts_backend_squat_build_body_begin,
+ NULL,
fts_backend_squat_build_more,
fts_backend_squat_build_deinit,
fts_backend_squat_expunge,
diff -r 7bb35ad5e80e -r 757cb3148407 src/plugins/fts/fts-api-private.h
--- a/src/plugins/fts/fts-api-private.h Fri May 14 17:41:34 2010 +0200
+++ b/src/plugins/fts/fts-api-private.h Mon May 17 18:06:57 2010 +0200
@@ -13,8 +13,13 @@
int (*build_init)(struct fts_backend *backend, uint32_t *last_uid_r,
struct fts_backend_build_context **ctx_r);
- int (*build_more)(struct fts_backend_build_context *ctx, uint32_t uid,
- const unsigned char *data, size_t size, bool headers);
+ void (*build_hdr)(struct fts_backend_build_context *ctx, uint32_t uid);
+ bool (*build_body_begin)(struct fts_backend_build_context *ctx,
+ uint32_t uid, const char *content_type,
+ const char *content_disposition);
+ void (*build_body_end)(struct fts_backend_build_context *ctx);
+ int (*build_more)(struct fts_backend_build_context *ctx,
+ const unsigned char *data, size_t size);
int (*build_deinit)(struct fts_backend_build_context *ctx);
void (*expunge)(struct fts_backend *backend, struct mail *mail);
@@ -80,6 +85,8 @@
void fts_backend_register(const struct fts_backend *backend);
void fts_backend_unregister(const char *name);
+bool fts_backend_default_can_index(const char *content_type);
+
void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
const ARRAY_TYPE(seq_range) *definite_filter,
ARRAY_TYPE(seq_range) *maybe_dest,
diff -r 7bb35ad5e80e -r 757cb3148407 src/plugins/fts/fts-api.c
--- a/src/plugins/fts/fts-api.c Fri May 14 17:41:34 2010 +0200
+++ b/src/plugins/fts/fts-api.c Mon May 17 18:06:57 2010 +0200
@@ -99,10 +99,29 @@
return ret;
}
-int fts_backend_build_more(struct fts_backend_build_context *ctx, uint32_t uid,
- const unsigned char *data, size_t size, bool headers)
+void fts_backend_build_hdr(struct fts_backend_build_context *ctx, uint32_t uid)
{
- return ctx->backend->v.build_more(ctx, uid, data, size, headers);
+ ctx->backend->v.build_hdr(ctx, uid);
+}
+
+bool fts_backend_build_body_begin(struct fts_backend_build_context *ctx,
+ uint32_t uid, const char *content_type,
+ const char *content_disposition)
+{
+ return ctx->backend->v.build_body_begin(ctx, uid, content_type,
+ content_disposition);
+}
+
+void fts_backend_build_body_end(struct fts_backend_build_context *ctx)
+{
+ if (ctx->backend->v.build_body_end != NULL)
+ ctx->backend->v.build_body_end(ctx);
+}
+
+int fts_backend_build_more(struct fts_backend_build_context *ctx,
+ const unsigned char *data, size_t size)
+{
+ return ctx->backend->v.build_more(ctx, data, size);
}
int fts_backend_build_deinit(struct fts_backend_build_context **_ctx)
@@ -321,3 +340,9 @@
pool_unref(&ctx->pool);
return ret;
}
+
+bool fts_backend_default_can_index(const char *content_type)
+{
+ return strncasecmp(content_type, "text/", 5) == 0 ||
+ strcasecmp(content_type, "message/rfc822") == 0;
+}
diff -r 7bb35ad5e80e -r 757cb3148407 src/plugins/fts/fts-api.h
--- a/src/plugins/fts/fts-api.h Fri May 14 17:41:34 2010 +0200
+++ b/src/plugins/fts/fts-api.h Mon May 17 18:06:57 2010 +0200
@@ -8,8 +8,12 @@
#include "seq-range-array.h"
enum fts_lookup_flags {
+ /* Search within header and/or body.
+ At least one of these must be set. */
FTS_LOOKUP_FLAG_HEADER = 0x01,
FTS_LOOKUP_FLAG_BODY = 0x02,
+
+ /* The key must NOT be found */
FTS_LOOKUP_FLAG_INVERT = 0x04
};
@@ -33,23 +37,42 @@
/* Get the last_uid for the mailbox. */
int fts_backend_get_last_uid(struct fts_backend *backend, uint32_t *last_uid_r);
/* Get last_uids for all mailboxes that might be backend mailboxes for a
- virtual mailbox. Depending on virtual mailbox configuration, this function
- may also return mailboxes that don't really even match the virtual mailbox
- patterns. The caller should filter out the list itself. */
+ virtual mailbox. The backend can use mailbox_get_virtual_backend_boxes() or
+ mailbox_get_virtual_box_patterns() functions to get the list of mailboxes.
+
More information about the dovecot-cvs
mailing list