diff -ur dovecot-2.3.11.3-orig/src/plugins/fts/fts-build-mail.c dovecot-2.3.11.3/src/plugins/fts/fts-build-mail.c --- dovecot-2.3.11.3-orig/src/plugins/fts/fts-build-mail.c 2020-08-12 14:20:41.000000000 +0200 +++ dovecot-2.3.11.3/src/plugins/fts/fts-build-mail.c 2020-12-07 14:05:23.654217555 +0100 @@ -17,6 +17,7 @@ #include "fts-filter.h" #include "fts-api-private.h" #include "fts-build-mail.h" +#include "settings-parser.h" /* there are other characters as well, but this doesn't have to be exact */ #define IS_WORD_WHITESPACE(c) \ @@ -34,6 +35,7 @@ buffer_t *word_buf, *pending_input; struct fts_user_language *cur_user_lang; + bool oversized_tika; }; static int fts_build_data(struct fts_mail_build_context *ctx, @@ -236,7 +238,7 @@ parser_context.user = mail_storage_get_user(storage); parser_context.content_disposition = ctx->content_disposition; - + parser_context.oversized_tika = ctx->oversized_tika; if (fts_parser_init(&parser_context, &ctx->body_parser)) { /* extract text using the the returned parser */ *binary_body_r = TRUE; @@ -488,7 +490,32 @@ bool binary_body; const char *error; int ret; - + uoff_t msg_size; + uoff_t fts_max_size = 0; + uoff_t fts_max_size_tika = 0; + const char * fts_max_size_setting; + const char * fts_max_size_tika_setting; + bool oversized_msg; + bool oversized_tika; + + fts_max_size_setting = mail_user_plugin_getenv(update_ctx->backend->ns->user, "fts_max_size"); + if (fts_max_size_setting != NULL) { + i_debug("fts_max_size %s",fts_max_size_setting); + if (settings_get_size(fts_max_size_setting, &fts_max_size, &error) < 0) { + i_error("%s",error); + fts_max_size = 0; + } + i_debug("fts_max_size (value) %"PRIuUOFF_T,fts_max_size); + } + fts_max_size_tika_setting = mail_user_plugin_getenv(update_ctx->backend->ns->user, "fts_max_size_tika"); + if (fts_max_size_tika_setting != NULL) { + i_debug("fts_max_size_tika %s",fts_max_size_tika_setting); + if (settings_get_size(fts_max_size_tika_setting, &fts_max_size_tika, &error) < 0) { + i_error("%s",error); + fts_max_size_tika = 0; + } + i_debug("fts_max_size_tika (value) %"PRIuUOFF_T,fts_max_size_tika); + } *may_need_retry_r = FALSE; if (mail_get_stream_because(mail, NULL, NULL, "fts indexing", &input) < 0) { if (mail->expunged) @@ -498,10 +525,21 @@ mailbox_get_last_internal_error(mail->box, NULL)); return -1; } - + oversized_msg = FALSE; + oversized_tika = FALSE; + i_stream_get_size(input,TRUE,&msg_size); + if (fts_max_size > 0 && msg_size > fts_max_size) { + i_info("Skipping message body indexing because size %"PRIuUOFF_T" exceeds setting fts_max_size %s",msg_size,fts_max_size_setting); + oversized_msg = TRUE; + } + if (fts_max_size_tika > 0 && msg_size > fts_max_size_tika) { + i_info("Skipping message attachment indexing because size %"PRIuUOFF_T" exceeds setting fts_max_size_tika %s",msg_size,fts_max_size_tika_setting); + oversized_tika = TRUE; + } i_zero(&ctx); ctx.update_ctx = update_ctx; ctx.mail = mail; + ctx.oversized_tika = oversized_tika; if ((update_ctx->backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) ctx.pending_input = buffer_create_dynamic(default_pool, 128); @@ -556,7 +594,7 @@ message_decoder_set_return_binary(decoder, TRUE); body_part = TRUE; } else { - if (skip_body) + if (skip_body ||oversized_msg) continue; } @@ -590,7 +628,7 @@ else (void)fts_parser_deinit(&ctx.body_parser, NULL); } - if (ret == 0 && body_part && !skip_body && !body_added) { + if (ret == 0 && body_part && !skip_body && !oversized_msg && !body_added) { /* make sure body is added even when it doesn't exist */ block.data = NULL; block.size = 0; ret = fts_build_body_block(&ctx, &block, TRUE); diff -ur dovecot-2.3.11.3-orig/src/plugins/fts/fts-parser.h dovecot-2.3.11.3/src/plugins/fts/fts-parser.h --- dovecot-2.3.11.3-orig/src/plugins/fts/fts-parser.h 2020-08-12 14:20:41.000000000 +0200 +++ dovecot-2.3.11.3/src/plugins/fts/fts-parser.h 2020-12-07 12:42:55.653635916 +0100 @@ -10,6 +10,7 @@ /* Can't be NULL */ const char *content_type; const char *content_disposition; + bool oversized_tika; }; struct fts_parser_vfuncs { diff -ur dovecot-2.3.11.3-orig/src/plugins/fts/fts-parser-tika.c dovecot-2.3.11.3/src/plugins/fts/fts-parser-tika.c --- dovecot-2.3.11.3-orig/src/plugins/fts/fts-parser-tika.c 2020-08-12 14:20:41.000000000 +0200 +++ dovecot-2.3.11.3/src/plugins/fts/fts-parser-tika.c 2020-12-07 13:01:33.732476038 +0100 @@ -57,7 +57,7 @@ tuser = p_new(user->pool, struct fts_parser_tika_user, 1); MODULE_CONTEXT_SET(user, fts_parser_tika_user_module, tuser); - if (http_url_parse(url, NULL, 0, user->pool, + if (http_url_parse(url, NULL, HTTP_URL_ALLOW_USERINFO_PART, user->pool, &tuser->http_url, &error) < 0) { i_error("fts_tika: Failed to parse HTTP url %s: %s", url, error); return -1; @@ -77,7 +77,8 @@ http_set.request_timeout_msecs = 60*1000; http_set.ssl = &ssl_set; http_set.debug = user->mail_debug; - tika_http_client = http_client_init(&http_set); + tika_http_client = http_client_init_private(&http_set); } *http_url_r = tuser->http_url; return 0; @@ -141,6 +142,10 @@ if (tika_get_http_client_url(parser_context->user, &http_url) < 0) return NULL; + if (parser_context->oversized_tika) { + i_info("skipping tika parser due to oversized message"); + return NULL; + } if (http_url->path == NULL) http_url->path = "/"; @@ -152,6 +157,11 @@ http_url->host.name, t_strconcat(http_url->path, http_url->enc_query, NULL), fts_tika_parser_response, parser); + if (http_url->user != NULL) { + http_client_request_set_auth_simple( + http_req, http_url->user, http_url->password); + } + http_client_request_set_port(http_req, http_url->port); http_client_request_set_ssl(http_req, http_url->have_ssl); if (parser_context->content_type != NULL) diff -ur dovecot-2.3.11.3-orig/src/plugins/fts-solr/solr-connection.c dovecot-2.3.11.3/src/plugins/fts-solr/solr-connection.c --- dovecot-2.3.11.3-orig/src/plugins/fts-solr/solr-connection.c 2020-08-12 14:20:41.000000000 +0200 +++ dovecot-2.3.11.3/src/plugins/fts-solr/solr-connection.c 2020-11-15 18:34:13.657576104 +0100 @@ -103,7 +103,8 @@ http_set.ssl = ssl_client_set; http_set.debug = solr_set->debug; http_set.rawlog_dir = solr_set->rawlog_dir; - solr_http_client = http_client_init(&http_set); + solr_http_client = http_client_init_private(&http_set); } *conn_r = conn;