dovecot-2.2: lib-fts: Minor fts-tokenizer-address cleanups

dovecot at dovecot.org dovecot at dovecot.org
Sat May 9 09:52:49 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/ac6ae1b9f5c4
changeset: 18570:ac6ae1b9f5c4
user:      Timo Sirainen <tss at iki.fi>
date:      Sat May 09 12:50:11 2015 +0300
description:
lib-fts: Minor fts-tokenizer-address cleanups

diffstat:

 src/lib-fts/fts-tokenizer-address.c |  28 ++++++++--------------------
 1 files changed, 8 insertions(+), 20 deletions(-)

diffs (88 lines):

diff -r 7c1fe66e8855 -r ac6ae1b9f5c4 src/lib-fts/fts-tokenizer-address.c
--- a/src/lib-fts/fts-tokenizer-address.c	Sat May 09 12:48:13 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-address.c	Sat May 09 12:50:11 2015 +0300
@@ -23,8 +23,7 @@
 	struct fts_tokenizer tokenizer;
 	enum email_address_parser_state state;
 	string_t *last_word;
-	string_t *parent_data; /* Copy of input data between tokens.
-	                          TODO: could be buffer_t maybe */
+	string_t *parent_data; /* Copy of input data between tokens. */
 	bool search;
 };
 
@@ -94,34 +93,27 @@
  Returns size that can be skipped. */
 static size_t skip_nonlocal_part(const unsigned char *data, size_t size)
 {
-	const unsigned char *p = data;
 	size_t skip = 0;
 
 	/* Yes, a dot can start an address. De facto before de jure. */
-	while ( skip < size && (!IS_ATEXT(*p) && *p != '.')) {
+	while (skip < size && (!IS_ATEXT(data[skip]) && data[skip] != '.'))
 		skip++;
-		p++;
-	}
 	return skip;
 }
 
-/* TODO: 
-   - DONT dereference *p past size!
-*/
 static enum email_address_parser_state
 fts_tokenizer_email_address_parse_local(struct email_address_fts_tokenizer *tok,
                                         const unsigned char *data, size_t size,
                                         size_t *skip_r)
 {
 	size_t pos = 0;
-	const unsigned char *p = data;
 	bool at = FALSE;
 
-	while (pos < size && (IS_ATEXT(*p) || (*p == '@' || *p == '.'))) {
-		if (*p == '@')
+	while (pos < size && (IS_ATEXT(data[pos]) ||
+			      data[pos] == '@' || data[pos] == '.')) {
+		if (data[pos] == '@')
 			at = TRUE;
 		pos++;
-		p++;
 		if (at)
 			break;
 	}
@@ -133,13 +125,13 @@
 	}
 
 	/* localpart, @ not included yet */
-	if (pos > 0 && (IS_ATEXT(*(p-1)) || *(p-1) == '.')) {
+	if (pos > 0 && (IS_ATEXT(data[pos-1]) || data[pos-1] == '.')) {
 		str_append_n(tok->last_word, data, pos);
 		*skip_r = pos;
 		return  EMAIL_ADDRESS_PARSER_STATE_LOCALPART;
 	}
 	/* not a localpart. skip past rest of no-good chars. */
-	pos += skip_nonlocal_part(p, size - pos);
+	pos += skip_nonlocal_part(data+pos, size - pos);
 	*skip_r = pos;
 	return EMAIL_ADDRESS_PARSER_STATE_NONE;
 }
@@ -153,11 +145,6 @@
 	return p[1] == '\0';
 }
 
-/* TODO:
- - allow address literals
- - reject "@..."
- - reject "@.host.tld"
-*/
 static enum email_address_parser_state
 fts_tokenizer_email_address_parse_domain(struct email_address_fts_tokenizer *tok,
                                          const unsigned char *data, size_t size,
@@ -196,6 +183,7 @@
 	if (tok->tokenizer.parent != NULL)
 		str_append_n(tok->parent_data, data, size);
 }
+
 static int
 fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
                                  const unsigned char *data, size_t size,


More information about the dovecot-cvs mailing list