dovecot-2.1: fts: Added FTS_BACKEND_FLAG_BUILD_FULL_WORDS for se...
dovecot at dovecot.org
dovecot at dovecot.org
Sat Nov 5 17:48:50 EET 2011
details: http://hg.dovecot.org/dovecot-2.1/rev/f4a95383ea8a
changeset: 13660:f4a95383ea8a
user: Timo Sirainen <tss at iki.fi>
date: Sat Nov 05 17:58:50 2011 +0200
description:
fts: Added FTS_BACKEND_FLAG_BUILD_FULL_WORDS for sending data to backends only in full words.
diffstat:
src/plugins/fts/fts-api-private.h | 4 +-
src/plugins/fts/fts-build-mail.c | 82 +++++++++++++++++++++++++++++++++++---
2 files changed, 78 insertions(+), 8 deletions(-)
diffs (153 lines):
diff -r efe369e2885d -r f4a95383ea8a src/plugins/fts/fts-api-private.h
--- a/src/plugins/fts/fts-api-private.h Sat Nov 05 17:31:47 2011 +0200
+++ b/src/plugins/fts/fts-api-private.h Sat Nov 05 17:58:50 2011 +0200
@@ -55,7 +55,9 @@
FTS_BACKEND_FLAG_BINARY_MIME_PARTS = 0x01,
/* Send built text to backend as decomposed titlecase rather than
preserving original case */
- FTS_BACKEND_FLAG_BUILD_DTCASE = 0x02
+ FTS_BACKEND_FLAG_BUILD_DTCASE = 0x02,
+ /* Send only fully indexable words rather than randomly sized blocks */
+ FTS_BACKEND_FLAG_BUILD_FULL_WORDS = 0x04
};
struct fts_backend {
diff -r efe369e2885d -r f4a95383ea8a src/plugins/fts/fts-build-mail.c
--- a/src/plugins/fts/fts-build-mail.c Sat Nov 05 17:31:47 2011 +0200
+++ b/src/plugins/fts/fts-build-mail.c Sat Nov 05 17:58:50 2011 +0200
@@ -2,6 +2,7 @@
#include "lib.h"
#include "istream.h"
+#include "buffer.h"
#include "str.h"
#include "rfc822-parser.h"
#include "message-address.h"
@@ -12,12 +13,21 @@
#include "fts-api-private.h"
#include "fts-build-mail.h"
+/* there are other characters as well, but this doesn't have to be exact */
+#define IS_WORD_WHITESPACE(c) \
+ ((c) == ' ' || (c) == '\t' || (c) == '\n')
+/* if we see a word larger than this, just go ahead and split it from
+ wherever */
+#define MAX_WORD_SIZE 1024
+
struct fts_mail_build_context {
struct mail *mail;
struct fts_backend_update_context *update_ctx;
char *content_type, *content_disposition;
struct fts_parser *body_parser;
+
+ buffer_t *word_buf;
};
static void fts_build_parse_content_type(struct fts_mail_build_context *ctx,
@@ -175,6 +185,65 @@
return fts_backend_update_set_build_key(ctx->update_ctx, &key);
}
+static int fts_build_body_block(struct fts_mail_build_context *ctx,
+ struct message_block *block, bool last)
+{
+ unsigned int i;
+
+ i_assert(block->hdr == NULL);
+
+ if ((ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_BUILD_FULL_WORDS) == 0) {
+ return fts_backend_update_build_more(ctx->update_ctx,
+ block->data, block->size);
+ }
+ /* we'll need to send only full words to the backend */
+
+ if (ctx->word_buf != NULL && ctx->word_buf->used > 0) {
+ /* continuing previous word */
+ for (i = 0; i < block->size; i++) {
+ if (IS_WORD_WHITESPACE(block->data[i]))
+ break;
+ }
+ buffer_append(ctx->word_buf, block->data, i);
+ block->data += i;
+ block->size -= i;
+ if (block->size == 0 && ctx->word_buf->used < MAX_WORD_SIZE &&
+ !last) {
+ /* word is still not finished */
+ return 0;
+ }
+ /* we have a full word, index it */
+ if (fts_backend_update_build_more(ctx->update_ctx,
+ ctx->word_buf->data,
+ ctx->word_buf->used) < 0)
+ return -1;
+ buffer_set_used_size(ctx->word_buf, 0);
+ }
+
+ /* find the boundary for last word */
+ if (last)
+ i = block->size;
+ else {
+ for (i = block->size; i > 0; i--) {
+ if (IS_WORD_WHITESPACE(block->data[i-1]))
+ break;
+ }
+ }
+
+ if (fts_backend_update_build_more(ctx->update_ctx, block->data, i) < 0)
+ return -1;
+
+ if (i < block->size) {
+ if (ctx->word_buf == NULL) {
+ ctx->word_buf =
+ buffer_create_dynamic(default_pool, 128);
+ }
+ buffer_append(ctx->word_buf, block->data + i, block->size - i);
+ }
+ return 0;
+}
+
static int fts_body_parser_finish(struct fts_mail_build_context *ctx)
{
struct message_block block;
@@ -183,9 +252,7 @@
do {
memset(&block, 0, sizeof(block));
fts_parser_more(ctx->body_parser, &block);
- if (fts_backend_update_build_more(ctx->update_ctx,
- block.data,
- block.size) < 0) {
+ if (fts_build_body_block(ctx, &block, FALSE) < 0) {
ret = -1;
break;
}
@@ -282,9 +349,7 @@
i_assert(body_part);
if (ctx.body_parser != NULL)
fts_parser_more(ctx.body_parser, &block);
- if (fts_backend_update_build_more(update_ctx,
- block.data,
- block.size) < 0) {
+ if (fts_build_body_block(&ctx, &block, FALSE) < 0) {
ret = -1;
break;
}
@@ -295,13 +360,16 @@
ret = fts_body_parser_finish(&ctx);
if (ret == 0 && body_part && !skip_body && !body_added) {
/* make sure body is added even when it doesn't exist */
- ret = fts_backend_update_build_more(update_ctx, NULL, 0);
+ block.data = NULL; block.size = 0;
+ ret = fts_build_body_block(&ctx, &block, TRUE);
}
if (message_parser_deinit(&parser, &parts) < 0)
mail_set_cache_corrupted(mail, MAIL_FETCH_MESSAGE_PARTS);
message_decoder_deinit(&decoder);
i_free(ctx.content_type);
i_free(ctx.content_disposition);
+ if (ctx.word_buf != NULL)
+ buffer_free(&ctx.word_buf);
return ret < 0 ? -1 : 1;
}
More information about the dovecot-cvs
mailing list