dovecot-2.2: lib-fts: Fixed using max_length setting in simple t...

dovecot at dovecot.org dovecot at dovecot.org
Sat May 9 08:32:35 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/2048dade16e7
changeset: 18559:2048dade16e7
user:      Teemu Huovila <teemu.huovila at dovecot.fi>
date:      Sat May 09 11:16:22 2015 +0300
description:
lib-fts: Fixed using max_length setting in simple tokenizer

diffstat:

 src/lib-fts/fts-tokenizer-generic.c |  17 +++++++++--------
 src/lib-fts/test-fts-tokenizer.c    |   9 +++++++--
 2 files changed, 16 insertions(+), 10 deletions(-)

diffs (80 lines):

diff -r 8d445959df03 -r 2048dade16e7 src/lib-fts/fts-tokenizer-generic.c
--- a/src/lib-fts/fts-tokenizer-generic.c	Sat May 09 11:15:50 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic.c	Sat May 09 11:16:22 2015 +0300
@@ -86,7 +86,7 @@
 fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok,
                                            const char **token_r)
 {
-	*token_r = t_strndup(tok->token->data, tok->token->used);
+	*token_r = t_strndup(tok->token->data, I_MIN(tok->token->used, tok->max_length));
 	buffer_set_used_size(tok->token, 0);
 	return 1;
 }
@@ -147,8 +147,7 @@
 		char_start_i = i;
 		if (data_is_word_boundary(data, size, &i)) {
 			len = char_start_i - start;
-			buffer_append(tok->token, data + start,
-			              I_MIN(len, tok->max_length));
+			buffer_append(tok->token, data + start, len);
 			if (tok->token->used == 0) {
 				/* no text read yet */
 				start = i + 1;
@@ -161,14 +160,16 @@
 	}
 	/* word boundary not found yet */
 	len = i - start;
-	buffer_append(tok->token, data + start, I_MIN(len, tok->max_length));
-
+	buffer_append(tok->token, data + start, len);
 	*skip_r = i;
 
-	if (size == 0 && tok->token->used > 0) {
-		/* return the last token */
+	/* return the last token */
+	if (size == 0 && tok->token->used > 0)
 		return fts_tokenizer_generic_simple_current_token(tok, token_r);
-	}
+
+	/* token too long */
+	if (tok->token->used > tok->max_length)
+		return fts_tokenizer_generic_simple_current_token(tok, token_r);
 	return 0;
 }
 
diff -r 8d445959df03 -r 2048dade16e7 src/lib-fts/test-fts-tokenizer.c
--- a/src/lib-fts/test-fts-tokenizer.c	Sat May 09 11:15:50 2015 +0300
+++ b/src/lib-fts/test-fts-tokenizer.c	Sat May 09 11:16:22 2015 +0300
@@ -6,6 +6,8 @@
 #include "test-common.h"
 #include "fts-tokenizer.h"
 #include "fts-tokenizer-private.h"
+/* TODO: fix including and linking of this. */
+/* #include "fts-tokenizer-generic-private.h" */
 
 #include <stdlib.h>
 
@@ -13,11 +15,12 @@
 {
 	static const unsigned char input[] =
 		"hello world\r\nAnd there\twas: text "
-		"galore, and more.\n\n (\"Hello world\")last ";
+		"galore, and longlonglongabcdefghijklmnopqrstuvwxyz more.\n\n (\"Hello world\")last ";
 	static const char *const expected_output[] = {
 		"hello", "world", "And",
 		"there", "was", "text", "galore",
-		"and", "more", "Hello", "world", "last", NULL
+		"and", "longlonglongabcdefghijklmnopqr",
+		"more", "Hello", "world", "last", NULL
 	};
 	const struct fts_tokenizer *tok_class;
 	struct fts_tokenizer *tok;
@@ -28,6 +31,8 @@
 	fts_tokenizers_init();
 	tok_class = fts_tokenizer_find(FTS_TOKENIZER_GENERIC_NAME);
 	test_assert(fts_tokenizer_create(tok_class, NULL, NULL, &tok, &error) == 0);
+/*TODO: Uncomment when fts-tokenizer-generic-private.h inclusion is fixed */
+/*test_assert(((struct generic_fts_tokenizer *) tok)->algorithm ==  BOUNDARY_ALGORITHM_SIMPLE);*/
 	while (fts_tokenizer_next(tok, input, sizeof(input)-1, &token) > 0) {
 		test_assert(strcmp(token, *eopp) == 0);
 		eopp++;


More information about the dovecot-cvs mailing list