dovecot-2.1: fts: Added FTS_BACKEND_FLAG_BUILD_SHORT_UTF8 to req...
dovecot at dovecot.org
dovecot at dovecot.org
Tue Nov 27 03:49:46 EET 2012
details: http://hg.dovecot.org/dovecot-2.1/rev/01550514f189
changeset: 14807:01550514f189
user: Timo Sirainen <tss at iki.fi>
date: Tue Nov 27 03:49:25 2012 +0200
description:
fts: Added FTS_BACKEND_FLAG_BUILD_SHORT_UTF8 to require sending only short UTF8 data to backend.
diffstat:
src/plugins/fts/fts-api-private.h | 4 +++-
src/plugins/fts/fts-build-mail.c | 7 +++++--
src/plugins/fts/fts-parser.c | 24 ++++++++++++++++++------
src/plugins/fts/fts-parser.h | 3 ++-
4 files changed, 28 insertions(+), 10 deletions(-)
diffs (116 lines):
diff -r 172295f5a78b -r 01550514f189 src/plugins/fts/fts-api-private.h
--- a/src/plugins/fts/fts-api-private.h Tue Nov 27 03:48:15 2012 +0200
+++ b/src/plugins/fts/fts-api-private.h Tue Nov 27 03:49:25 2012 +0200
@@ -59,7 +59,9 @@
/* Send only fully indexable words rather than randomly sized blocks */
FTS_BACKEND_FLAG_BUILD_FULL_WORDS = 0x04,
/* Fuzzy search works */
- FTS_BACKEND_FLAG_FUZZY_SEARCH = 0x08
+ FTS_BACKEND_FLAG_FUZZY_SEARCH = 0x08,
+ /* Don't allow 5-byte or 6-byte UTF8 sequences */
+ FTS_BACKEND_FLAG_BUILD_SHORT_UTF8 = 0x10
};
struct fts_backend {
diff -r 172295f5a78b -r 01550514f189 src/plugins/fts/fts-build-mail.c
--- a/src/plugins/fts/fts-build-mail.c Tue Nov 27 03:48:15 2012 +0200
+++ b/src/plugins/fts/fts-build-mail.c Tue Nov 27 03:49:25 2012 +0200
@@ -144,6 +144,7 @@
struct mail_storage *storage;
const char *content_type;
struct fts_backend_build_key key;
+ bool require_short_utf8;
i_assert(ctx->body_parser == NULL);
@@ -158,9 +159,11 @@
return FALSE;
}
-
+ require_short_utf8 = (ctx->update_ctx->backend->flags &
+ FTS_BACKEND_FLAG_BUILD_SHORT_UTF8) != 0;
+
storage = mailbox_get_storage(ctx->mail->box);
- if (fts_parser_init(mail_storage_get_user(storage),
+ if (fts_parser_init(mail_storage_get_user(storage), require_short_utf8,
content_type, ctx->content_disposition,
&ctx->body_parser)) {
/* extract text using the the returned parser */
diff -r 172295f5a78b -r 01550514f189 src/plugins/fts/fts-parser.c
--- a/src/plugins/fts/fts-parser.c Tue Nov 27 03:48:15 2012 +0200
+++ b/src/plugins/fts/fts-parser.c Tue Nov 27 03:49:25 2012 +0200
@@ -11,7 +11,7 @@
&fts_parser_script
};
-bool fts_parser_init(struct mail_user *user,
+bool fts_parser_init(struct mail_user *user, bool require_short_utf8,
const char *content_type, const char *content_disposition,
struct fts_parser **parser_r)
{
@@ -20,8 +20,10 @@
for (i = 0; i < N_ELEMENTS(parsers); i++) {
*parser_r = parsers[i]->try_init(user, content_type,
content_disposition);
- if (*parser_r != NULL)
+ if (*parser_r != NULL) {
+ (*parser_r)->require_short_utf8 = require_short_utf8;
return TRUE;
+ }
}
return FALSE;
}
@@ -56,11 +58,15 @@
void fts_parser_more(struct fts_parser *parser, struct message_block *block)
{
+ bool valid_utf8;
+
if (parser->v.more != NULL)
parser->v.more(parser, block);
- if (!uni_utf8_data_is_valid(block->data, block->size) ||
- data_has_nuls(block->data, block->size)) {
+ valid_utf8 = parser->require_short_utf8 ?
+ uni_utf8_short_data_is_valid(block->data, block->size) :
+ uni_utf8_data_is_valid(block->data, block->size);
+ if (!valid_utf8 || data_has_nuls(block->data, block->size)) {
/* output isn't valid UTF-8. make it. */
if (parser->utf8_output == NULL) {
parser->utf8_output =
@@ -68,8 +74,14 @@
} else {
buffer_set_used_size(parser->utf8_output, 0);
}
- (void)uni_utf8_get_valid_data(block->data, block->size,
- parser->utf8_output);
+ if (parser->require_short_utf8) {
+ (void)uni_utf8_short_get_valid_data(block->data,
+ block->size,
+ parser->utf8_output);
+ } else {
+ (void)uni_utf8_get_valid_data(block->data, block->size,
+ parser->utf8_output);
+ }
replace_nul_bytes(parser->utf8_output);
block->data = parser->utf8_output->data;
block->size = parser->utf8_output->used;
diff -r 172295f5a78b -r 01550514f189 src/plugins/fts/fts-parser.h
--- a/src/plugins/fts/fts-parser.h Tue Nov 27 03:48:15 2012 +0200
+++ b/src/plugins/fts/fts-parser.h Tue Nov 27 03:49:25 2012 +0200
@@ -15,12 +15,13 @@
struct fts_parser {
struct fts_parser_vfuncs v;
buffer_t *utf8_output;
+ bool require_short_utf8;
};
extern struct fts_parser_vfuncs fts_parser_html;
extern struct fts_parser_vfuncs fts_parser_script;
-bool fts_parser_init(struct mail_user *user,
+bool fts_parser_init(struct mail_user *user, bool require_short_utf8,
const char *content_type, const char *content_disposition,
struct fts_parser **parser_r);
struct fts_parser *fts_parser_text_init(void);
More information about the dovecot-cvs
mailing list