[dovecot-cvs] dovecot/src/lib Makefile.am, 1.72, 1.73 str-find.c, NONE, 1.1 str-find.h, NONE, 1.1

tss at dovecot.org tss at dovecot.org
Wed Apr 4 11:17:05 EEST 2007


Update of /var/lib/cvs/dovecot/src/lib
In directory talvi:/tmp/cvs-serv22559

Modified Files:
	Makefile.am 
Added Files:
	str-find.c str-find.h 
Log Message:
Added Boyer-Moore string searching.



Index: Makefile.am
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib/Makefile.am,v
retrieving revision 1.72
retrieving revision 1.73
diff -u -d -r1.72 -r1.73
--- Makefile.am	29 Mar 2007 11:51:32 -0000	1.72
+++ Makefile.am	4 Apr 2007 08:17:03 -0000	1.73
@@ -78,6 +78,7 @@
 	seq-range-array.c \
 	sha1.c \
 	str.c \
+	str-find.c \
 	str-sanitize.c \
 	strescape.c \
 	strfuncs.c \
@@ -156,6 +157,7 @@
 	seq-range-array.h \
 	sha1.h \
 	str.h \
+	str-find.h \
 	str-sanitize.h \
 	strescape.h \
 	strfuncs.h \

--- NEW FILE: str-find.c ---
/* Copyright (C) 2007 Timo Sirainen */

#include "lib.h"
#include "str-find.h"

struct str_find_context {
	pool_t pool;
	unsigned char *key;
	unsigned int key_len;

	unsigned int *matches;
	unsigned int match_count;

	int badtab[UCHAR_MAX+1];
	int goodtab[];
};

static void init_badtab(struct str_find_context *ctx)
{
	unsigned int i, len_1 = ctx->key_len - 1;

	for (i = 0; i <= UCHAR_MAX; i++)
		ctx->badtab[i] = ctx->key_len;

	for (i = 0; i < len_1; i++)
		ctx->badtab[ctx->key[i]] = len_1 - i;
}

static void init_suffixes(struct str_find_context *ctx, unsigned int *suffixes)
{
	unsigned int len_1 = ctx->key_len - 1;
	int f = 0, g, i;

	suffixes[len_1] = ctx->key_len;
	g = len_1;
	for (i = ctx->key_len - 2; i >= 0; i--) {
		if (i > g && (int)suffixes[i + len_1 - f] < i - g)
			suffixes[i] = suffixes[i + len_1 - f];
		else {
			if (i < g)
				g = i;
			f = i;
			while (g >= 0 && ctx->key[g] == ctx->key[g + len_1 - f])
				g--;
			suffixes[i] = f - g;
		}
	}
}

static void init_goodtab(struct str_find_context *ctx)
{
	unsigned int len_1 = ctx->key_len - 1;
	unsigned int j, *suffixes;
	int i;

	suffixes = t_buffer_get(ctx->key_len);
	init_suffixes(ctx, suffixes);

	for (i = 0; i < (int)ctx->key_len; i++)
		ctx->goodtab[i] = ctx->key_len;

	j = 0;
	for (i = len_1; i >= -1; i--) {
		if (i < 0 || suffixes[i] == (unsigned int)i + 1) {
			for (; j < len_1 - i; j++) {
				if (ctx->goodtab[j] == (int)ctx->key_len)
					ctx->goodtab[j] = len_1 - i;
			}
		}
	}
	for (i = 0; i <= (int)ctx->key_len - 2; i++)
		ctx->goodtab[len_1 - suffixes[i]] = len_1 - i;
}
 
struct str_find_context *str_find_init(pool_t pool, const char *key)
{
	struct str_find_context *ctx;
	unsigned int key_len = strlen(key);

	ctx = p_malloc(pool, sizeof(struct str_find_context) +
		       sizeof(ctx->goodtab[0]) * key_len);
	ctx->pool = pool;
	ctx->matches = p_malloc(pool, key_len);
	ctx->key_len = key_len;
	ctx->key = p_malloc(pool, key_len);
	memcpy(ctx->key, key, key_len);

	init_goodtab(ctx);
	init_badtab(ctx);
	return ctx;
}

void str_find_deinit(struct str_find_context **_ctx)
{
	struct str_find_context *ctx = *_ctx;

	*_ctx = NULL;
	p_free(ctx->pool, ctx->matches);
	p_free(ctx->pool, ctx->key);
	p_free(ctx->pool, ctx);
}

bool str_find_more(struct str_find_context *ctx,
		    const unsigned char *data, size_t size)
{
	unsigned int key_len = ctx->key_len;
	unsigned int i, j, a, b;
	int bad_value;

	for (i = j = 0; i < ctx->match_count; i++) {
		a = ctx->matches[i];
		if (ctx->matches[i] + size >= key_len) {
			/* we can fully determine this match now */
			for (; a < key_len; a++) {
				if (ctx->key[a] != data[a - ctx->matches[i]])
					break;
			}

			if (a == key_len)
				return TRUE;
		} else {
			for (b = 0; b < size; b++) {
				if (ctx->key[a+b] != data[b])
					break;
			}

			if (b == size)
				ctx->matches[j++] = a + size;
		}
	}
	if (j > 0) {
		i_assert(j + size < key_len);
		ctx->match_count = j;
		j = 0;
	} else {
		/* Boyer-Moore searching */
		j = 0;
		while (j + key_len <= size) {
			i = key_len - 1;
			while (ctx->key[i] == data[i + j]) {
				if (i == 0)
					return TRUE;
				i--;
			}

			bad_value = ctx->badtab[data[i + j]] + i + 1 - key_len;
			j += I_MAX(ctx->goodtab[i], bad_value);
		}
		i_assert(j <= size);
		ctx->match_count = 0;
	}

	for (; j < size; j++) {
		for (i = j; i < size; i++) {
			if (ctx->key[i-j] != data[i])
				break;
		}
		if (i == size)
			ctx->matches[ctx->match_count++] = size - j;
	}
	return FALSE;
}

void str_find_reset(struct str_find_context *ctx)
{
	ctx->match_count = 0;
}

--- NEW FILE: str-find.h ---
#ifndef __STR_FIND_H
#define __STR_FIND_H

struct str_find_context;

struct str_find_context *str_find_init(pool_t pool, const char *key);
void str_find_deinit(struct str_find_context **ctx);

/* Returns TRUE if key is found. It's possible to send the data in arbitrary
   blocks and have the key still match. */
bool str_find_more(struct str_find_context *ctx,
		   const unsigned char *data, size_t size);
/* Reset input data. The next str_find_more() call won't try to match the key
   to earlier data. */
void str_find_reset(struct str_find_context *ctx);

#endif



More information about the dovecot-cvs mailing list