dovecot-2.1: fts-solr: Don't break when there are duplicate From...
dovecot at dovecot.org
dovecot at dovecot.org
Mon Sep 12 16:19:07 EEST 2011
details: http://hg.dovecot.org/dovecot-2.1/rev/600034b77a1c
changeset: 13453:600034b77a1c
user: Timo Sirainen <tss at iki.fi>
date: Mon Sep 12 16:18:56 2011 +0300
description:
fts-solr: Don't break when there are duplicate From/To/Subject/etc. fields.
diffstat:
src/plugins/fts-solr/fts-backend-solr.c | 101 +++++++++++++++++--------------
1 files changed, 56 insertions(+), 45 deletions(-)
diffs (189 lines):
diff -r 502b794f654b -r 600034b77a1c src/plugins/fts-solr/fts-backend-solr.c
--- a/src/plugins/fts-solr/fts-backend-solr.c Mon Sep 12 14:44:01 2011 +0300
+++ b/src/plugins/fts-solr/fts-backend-solr.c Mon Sep 12 16:18:56 2011 +0300
@@ -22,6 +22,11 @@
struct fts_backend backend;
};
+struct solr_fts_field {
+ char *key;
+ string_t *value;
+};
+
struct solr_fts_backend_update_context {
struct fts_backend_update_context ctx;
@@ -30,14 +35,13 @@
struct solr_connection_post *post;
uint32_t prev_uid;
- string_t *cmd, *hdr, *hdr_fields;
+ string_t *cmd, *cur_value, *cur_value2;
+ ARRAY_DEFINE(fields, struct solr_fts_field);
uint32_t last_indexed_uid;
unsigned int last_indexed_uid_set:1;
- unsigned int headers_open:1;
unsigned int body_open:1;
- unsigned int cur_header_index:1;
unsigned int documents_added:1;
unsigned int expunges:1;
};
@@ -222,8 +226,7 @@
ctx = i_new(struct solr_fts_backend_update_context, 1);
ctx->ctx.backend = _backend;
ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE);
- ctx->hdr = str_new(default_pool, 4096);
- ctx->hdr_fields = str_new(default_pool, 1024);
+ i_array_init(&ctx->fields, 16);
return &ctx->ctx;
}
@@ -257,23 +260,39 @@
str_append(ctx->cmd, "</field>");
}
+static string_t *
+fts_solr_field_get(struct solr_fts_backend_update_context *ctx, const char *key)
+{
+ const struct solr_fts_field *field;
+ struct solr_fts_field new_field;
+
+ /* there are only a few fields. this lookup is fast enough. */
+ array_foreach(&ctx->fields, field) {
+ if (strcasecmp(field->key, key) == 0)
+ return field->value;
+ }
+
+ memset(&new_field, 0, sizeof(new_field));
+ new_field.key = str_lcase(i_strdup(key));
+ new_field.value = str_new(default_pool, 128);
+ array_append(&ctx->fields, &new_field, 1);
+ return new_field.value;
+}
+
static void
fts_backend_solr_doc_close(struct solr_fts_backend_update_context *ctx)
{
- ctx->headers_open = FALSE;
+ struct solr_fts_field *field;
+
if (ctx->body_open) {
ctx->body_open = FALSE;
str_append(ctx->cmd, "</field>");
}
- if (str_len(ctx->hdr) > 0) {
- str_append(ctx->cmd, "<field name=\"hdr\">");
- str_append_str(ctx->cmd, ctx->hdr);
+ array_foreach_modifiable(&ctx->fields, field) {
+ str_printfa(ctx->cmd, "<field name=\"%s\">", field->key);
+ str_append_str(ctx->cmd, field->value);
str_append(ctx->cmd, "</field>");
- str_truncate(ctx->hdr, 0);
- }
- if (str_len(ctx->hdr_fields) > 0) {
- str_append_str(ctx->cmd, ctx->hdr_fields);
- str_truncate(ctx->hdr_fields, 0);
+ str_truncate(field->value, 0);
}
str_append(ctx->cmd, "</doc>");
}
@@ -297,6 +316,7 @@
{
struct solr_fts_backend_update_context *ctx =
(struct solr_fts_backend_update_context *)_ctx;
+ struct solr_fts_field *field;
const char *str;
int ret = _ctx->failed ? -1 : 0;
@@ -314,8 +334,11 @@
}
str_free(&ctx->cmd);
- str_free(&ctx->hdr);
- str_free(&ctx->hdr_fields);
+ array_foreach_modifiable(&ctx->fields, field) {
+ str_free(&field->value);
+ i_free(field->key);
+ }
+ array_free(&ctx->fields);
i_free(ctx);
return ret;
}
@@ -409,22 +432,21 @@
switch (key->type) {
case FTS_BACKEND_BUILD_KEY_HDR:
if (fts_header_want_indexed(key->hdr_name)) {
- ctx->cur_header_index = TRUE;
- str_printfa(ctx->hdr_fields, "<field name=\"%s\">",
- t_str_lcase(key->hdr_name));
+ ctx->cur_value2 =
+ fts_solr_field_get(ctx, key->hdr_name);
}
/* fall through */
case FTS_BACKEND_BUILD_KEY_MIME_HDR:
- xml_encode(ctx->hdr, key->hdr_name);
- str_append(ctx->hdr, ": ");
- ctx->headers_open = TRUE;
+ ctx->cur_value = fts_solr_field_get(ctx, "hdr");
+ xml_encode(ctx->cur_value, key->hdr_name);
+ str_append(ctx->cur_value, ": ");
break;
case FTS_BACKEND_BUILD_KEY_BODY_PART:
- ctx->headers_open = FALSE;
if (!ctx->body_open) {
ctx->body_open = TRUE;
str_append(ctx->cmd, "<field name=\"body\">");
}
+ ctx->cur_value = ctx->cmd;
break;
case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY:
i_unreached();
@@ -438,20 +460,14 @@
struct solr_fts_backend_update_context *ctx =
(struct solr_fts_backend_update_context *)_ctx;
- if (ctx->headers_open) {
- /* this is called individually for each header line.
- headers are finished only when key changes to body */
- str_append_c(ctx->hdr, '\n');
- } else {
- i_assert(ctx->body_open);
- /* messages can have multiple MIME bodies.
- add them all as one. */
- str_append_c(ctx->cmd, '\n');
- }
-
- if (ctx->cur_header_index) {
- str_append(ctx->hdr_fields, "</field>");
- ctx->cur_header_index = FALSE;
+ /* There can be multiple duplicate keys (duplicate header lines,
+ multiple MIME body parts). Make sure they are separated by
+ whitespace. */
+ str_append_c(ctx->cur_value, '\n');
+ ctx->cur_value = NULL;
+ if (ctx->cur_value2 != NULL) {
+ str_append_c(ctx->cur_value2, '\n');
+ ctx->cur_value2 = NULL;
}
}
@@ -465,14 +481,9 @@
if (_ctx->failed)
return -1;
- if (ctx->headers_open) {
- if (ctx->cur_header_index)
- xml_encode_data(ctx->hdr_fields, data, size);
- xml_encode_data(ctx->hdr, data, size);
- } else {
- i_assert(!ctx->cur_header_index);
- xml_encode_data(ctx->cmd, data, size);
- }
+ xml_encode_data(ctx->cur_value, data, size);
+ if (ctx->cur_value2 != NULL)
+ xml_encode_data(ctx->cur_value2, data, size);
if (str_len(ctx->cmd) > SOLR_CMDBUF_SIZE-128) {
solr_connection_post_more(ctx->post, str_data(ctx->cmd),
More information about the dovecot-cvs
mailing list