dovecot-2.0-pigeonhole: Restructured and optimized lexical scanner.

pigeonhole at rename-it.nl pigeonhole at rename-it.nl
Sun Jan 31 13:39:12 EET 2010


details:   http://hg.rename-it.nl/dovecot-2.0-pigeonhole/rev/9cadd8aa01c8
changeset: 1231:9cadd8aa01c8
user:      Stephan Bosch <stephan at rename-it.nl>
date:      Sun Jan 31 12:37:46 2010 +0100
description:
Restructured and optimized lexical scanner.

diffstat:

 src/lib-sieve/sieve-lexer.c  |  368 +++++++++++++++++++++-------------------------
 src/lib-sieve/sieve-lexer.h  |   93 +++++++++-
 src/lib-sieve/sieve-parser.c |   72 ++++----
 3 files changed, 282 insertions(+), 251 deletions(-)

diffs (truncated from 1159 to 300 lines):

diff -r a3d39b596c90 -r 9cadd8aa01c8 src/lib-sieve/sieve-lexer.c
--- a/src/lib-sieve/sieve-lexer.c	Thu Jan 28 22:15:00 2010 +0100
+++ b/src/lib-sieve/sieve-lexer.c	Sun Jan 31 12:37:46 2010 +0100
@@ -33,26 +33,20 @@
  */
  
 inline static void sieve_lexer_error
-	(struct sieve_lexer *lexer, const char *fmt, ...) ATTR_FORMAT(2, 3);
+	(const struct sieve_lexer *lexer, const char *fmt, ...) ATTR_FORMAT(2, 3);
 inline static void sieve_lexer_warning
-	(struct sieve_lexer *lexer, const char *fmt, ...) ATTR_FORMAT(2, 3);
+	(const struct sieve_lexer *lexer, const char *fmt, ...) ATTR_FORMAT(2, 3);
 
 /*
  * Lexer object
  */
 
-struct sieve_lexer {
+struct sieve_lexical_scanner {
 	pool_t pool;
 	struct sieve_instance *svinst;
 
 	struct sieve_script *script;
-	struct istream *input;
-		
-	int current_line;
-	
-	enum sieve_token_type token_type;
-	string_t *token_str_value;
-	int token_int_value;
+	struct istream *input;	
 	
 	struct sieve_error_handler *ehandler;
 	
@@ -60,13 +54,15 @@
 	const unsigned char *buffer;
 	size_t buffer_size;
 	size_t buffer_pos;
+
+	struct sieve_lexer lexer;
 };
 
-struct sieve_lexer *sieve_lexer_create
+const struct sieve_lexer *sieve_lexer_create
 (struct sieve_script *script, struct sieve_error_handler *ehandler) 
 {
 	pool_t pool;
-	struct sieve_lexer *lexer;
+	struct sieve_lexical_scanner *scanner;
 	struct sieve_instance *svinst = sieve_script_svinst(script);
 	struct istream *stream;
 	const struct stat *st;
@@ -86,41 +82,44 @@
 		return NULL;
 	}
 	
-	pool = pool_alloconly_create("sieve_lexer", 1024);	
-	lexer = p_new(pool, struct sieve_lexer, 1);
-	lexer->pool = pool;
+	pool = pool_alloconly_create("sieve_lexer_scanner", 1024);	
+	scanner = p_new(pool, struct sieve_lexical_scanner, 1);
+	scanner->pool = pool;
+	scanner->lexer.scanner = scanner;
 	
-	lexer->ehandler = ehandler;
+	scanner->ehandler = ehandler;
 	sieve_error_handler_ref(ehandler);
 
-	lexer->input = stream;
-	i_stream_ref(lexer->input);
+	scanner->input = stream;
+	i_stream_ref(scanner->input);
 	
-	lexer->script = script;
+	scanner->script = script;
 	sieve_script_ref(script);
 	
-	lexer->buffer = NULL;
-	lexer->buffer_size = 0;
-	lexer->buffer_pos = 0;
+	scanner->buffer = NULL;
+	scanner->buffer_size = 0;
+	scanner->buffer_pos = 0;
 	
-	lexer->current_line = 1;	
-	lexer->token_type = STT_NONE;
-	lexer->token_str_value = str_new(pool, 256);
-	lexer->token_int_value = 0;
+	scanner->lexer.current_line = 1;	
+	scanner->lexer.token_type = STT_NONE;
+	scanner->lexer.token_str_value = str_new(pool, 256);
+	scanner->lexer.token_int_value = 0;
 		
-	return lexer;
+	return &scanner->lexer;
 }
 
-void sieve_lexer_free(struct sieve_lexer **lexer) 
+void sieve_lexer_free(const struct sieve_lexer **lexer) 
 {	
-	i_stream_unref(&(*lexer)->input);
+	struct sieve_lexical_scanner *scanner = (*lexer)->scanner;
 
-	sieve_script_close((*lexer)->script);
-	sieve_script_unref(&(*lexer)->script);
+	i_stream_unref(&scanner->input);
 
-	sieve_error_handler_unref(&(*lexer)->ehandler);
+	sieve_script_close(scanner->script);
+	sieve_script_unref(&scanner->script);
 
-	pool_unref(&(*lexer)->pool); 
+	sieve_error_handler_unref(&scanner->ehandler);
+
+	pool_unref(&scanner->pool); 
 
 	*lexer = NULL;
 }
@@ -130,14 +129,16 @@
  */
 
 inline static void sieve_lexer_error
-(struct sieve_lexer *lexer, const char *fmt, ...)
+(const struct sieve_lexer *lexer, const char *fmt, ...)
 {
+	struct sieve_lexical_scanner *scanner = lexer->scanner;
+
 	va_list args;
 	va_start(args, fmt);
 
 	T_BEGIN {
-		sieve_verror(lexer->ehandler, 
-			sieve_error_script_location(lexer->script, lexer->current_line),
+		sieve_verror(scanner->ehandler, 
+			sieve_error_script_location(scanner->script, lexer->current_line),
 			fmt, args);
 	} T_END;
 		
@@ -145,21 +146,24 @@
 }
 
 inline static void sieve_lexer_warning
-(struct sieve_lexer *lexer, const char *fmt, ...)
+(const struct sieve_lexer *lexer, const char *fmt, ...)
 {
+	struct sieve_lexical_scanner *scanner = lexer->scanner;
+
 	va_list args;
 	va_start(args, fmt);
 
 	T_BEGIN { 
-		sieve_vwarning(lexer->ehandler, 
-			sieve_error_script_location(lexer->script, lexer->current_line),
+		sieve_vwarning(scanner->ehandler, 
+			sieve_error_script_location(scanner->script, lexer->current_line),
 			fmt, args);
 	} T_END;
 		
 	va_end(args);
 }
 
-const char *sieve_lexer_token_string(struct sieve_lexer *lexer) 
+const char *sieve_lexer_token_description
+(const struct sieve_lexer *lexer) 
 {
 	switch ( lexer->token_type ) {
 		case STT_NONE: return "no token (bug)"; 		
@@ -194,7 +198,7 @@
  * Debug 
  */
  
-void sieve_lexer_print_token(struct sieve_lexer *lexer) 
+void sieve_lexer_token_print(const struct sieve_lexer *lexer) 
 {
 	switch ( lexer->token_type ) {
 		case STT_NONE: printf("??NONE?? "); break;		
@@ -227,77 +231,37 @@
 }
 
 /*
- * Token access
- */ 
-
-enum sieve_token_type sieve_lexer_current_token(struct sieve_lexer *lexer) 
-{
-	return lexer->token_type;
-}
-
-const string_t *sieve_lexer_token_str(struct sieve_lexer *lexer) 
-{
-	i_assert(	lexer->token_type == STT_STRING );
-		
-	return lexer->token_str_value;
-}
-
-const char *sieve_lexer_token_ident(struct sieve_lexer *lexer) 
-{
-	i_assert(
-		lexer->token_type == STT_TAG ||
-		lexer->token_type == STT_IDENTIFIER);
-		
-	return str_c(lexer->token_str_value);
-}
-
-int sieve_lexer_token_int(struct sieve_lexer *lexer) 
-{
-	i_assert(lexer->token_type == STT_NUMBER);
-		
-	return lexer->token_int_value;
-}
-
-bool sieve_lexer_eof(struct sieve_lexer *lexer) 
-{
-	return lexer->token_type == STT_EOF;
-}
-
-int sieve_lexer_current_line(struct sieve_lexer *lexer) 
-{
-	return lexer->current_line;
-}
-
-/*
  * Lexical scanning 
  */
 
-static void sieve_lexer_shift(struct sieve_lexer *lexer) 
+static void sieve_lexer_shift(struct sieve_lexical_scanner *scanner) 
 {
-	if ( lexer->buffer != NULL && lexer->buffer[lexer->buffer_pos] == '\n' ) 
-		lexer->current_line++;	
+	if ( scanner->buffer != NULL && scanner->buffer[scanner->buffer_pos] == '\n' ) 
+		scanner->lexer.current_line++;	
 	
-	if ( lexer->buffer != NULL && lexer->buffer_pos + 1 < lexer->buffer_size )
-		lexer->buffer_pos++;
+	if ( scanner->buffer != NULL && 
+		scanner->buffer_pos + 1 < scanner->buffer_size )
+		scanner->buffer_pos++;
 	else {
-		if ( lexer->buffer != NULL )
-			i_stream_skip(lexer->input, lexer->buffer_size);
+		if ( scanner->buffer != NULL )
+			i_stream_skip(scanner->input, scanner->buffer_size);
 		
-		lexer->buffer = i_stream_get_data(lexer->input, &lexer->buffer_size);
+		scanner->buffer = i_stream_get_data(scanner->input, &scanner->buffer_size);
 	  
-		if ( lexer->buffer == NULL && i_stream_read(lexer->input) > 0 )
-	  		lexer->buffer = i_stream_get_data(lexer->input, &lexer->buffer_size);
+		if ( scanner->buffer == NULL && i_stream_read(scanner->input) > 0 )
+	  		scanner->buffer = i_stream_get_data
+					(scanner->input, &scanner->buffer_size);
 	  	
-		lexer->buffer_pos = 0;
+		scanner->buffer_pos = 0;
 	}
 }
 
-static inline int sieve_lexer_curchar(struct sieve_lexer *lexer) 
+static inline int sieve_lexer_curchar(struct sieve_lexical_scanner *scanner) 
 {	
-	if ( lexer->buffer == NULL )
+	if ( scanner->buffer == NULL )
 		return -1;
 	
-	return lexer->buffer[lexer->buffer_pos];
+	return scanner->buffer[scanner->buffer_pos];
 }
 
 static inline const char *_char_sanitize(int ch)
@@ -311,32 +275,35 @@
 /* sieve_lexer_scan_raw_token:
  *   Scans valid tokens and whitespace 
  */
-static bool sieve_lexer_scan_raw_token(struct sieve_lexer *lexer) 
+static bool sieve_lexer_scan_raw_token(struct sieve_lexical_scanner *scanner) 
 {
+	struct sieve_lexer *lexer = &scanner->lexer;
 	sieve_number_t start_line;
 	string_t *str;
 
 	/* Read first character */
 	if ( lexer->token_type == STT_NONE ) {
-		i_stream_read(lexer->input);
-		sieve_lexer_shift(lexer);
+		i_stream_read(scanner->input);
+		sieve_lexer_shift(scanner);
 	}
   
-	switch ( sieve_lexer_curchar(lexer) ) {
+	switch ( sieve_lexer_curchar(scanner) ) {
 	
 	/* whitespace */
 	
 	// hash-comment = ( "#" *CHAR-NOT-CRLF CRLF )
 	case '#': 
-		sieve_lexer_shift(lexer);
-		while ( sieve_lexer_curchar(lexer) != '\n' ) {
-			switch( sieve_lexer_curchar(lexer) ) {
+		sieve_lexer_shift(scanner);


More information about the dovecot-cvs mailing list