[dovecot-cvs] dovecot/src/plugins/fts-squat squat-trie.c,1.8,1.9
tss at dovecot.org
tss at dovecot.org
Wed Dec 13 13:08:30 UTC 2006
Update of /var/lib/cvs/dovecot/src/plugins/fts-squat
In directory talvi:/tmp/cvs-serv28013
Modified Files:
squat-trie.c
Log Message:
Optimization.
Index: squat-trie.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts-squat/squat-trie.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- squat-trie.c 13 Dec 2006 12:31:41 -0000 1.8
+++ squat-trie.c 13 Dec 2006 13:08:28 -0000 1.9
@@ -19,6 +19,11 @@
#include <fcntl.h>
#include <ctype.h>
+/* normalization changes 0..32 -> 0 */
+#define MAX_8BIT_CHAR_COUNT (256 - 32)
+
+#define FAST_8BIT_LEVEL 2
+
#define TRIE_COMPRESS_PERCENTAGE 30
#define TRIE_COMPRESS_MIN_SIZE (1024*50)
@@ -157,6 +162,7 @@
ALIGN(sizeof(uint16_t) * ((node)->chars_16bit_count)))
static void free_node(struct trie_node *node, unsigned int level);
+static void squat_trie_compress_chars8(struct trie_node *node);
static int
squat_trie_compress_node(struct squat_trie_compress_context *ctx,
struct trie_node *node, unsigned int level);
@@ -333,6 +339,25 @@
return 0;
}
+static void
+trie_map_fix_fast_node(struct trie_node *node, unsigned int chars8_count)
+{
+ uint8_t *chars = NODE_CHARS8(node);
+ struct trie_node **children = NODE_CHILDREN8(node);
+ int i, j;
+
+ i_assert(node->chars_8bit_count == MAX_8BIT_CHAR_COUNT);
+
+ j = chars8_count - 1;
+ for (i = node->chars_8bit_count - 1; i >= 0; i--) {
+ if (j >= 0 && i == chars[j])
+ children[i] = children[j--];
+ else
+ children[i] = NULL;
+ chars[i] = i;
+ }
+}
+
static int
trie_map_node(struct squat_trie *trie, uint32_t offset, unsigned int level,
struct trie_node **node_r)
@@ -342,7 +367,7 @@
uint32_t num, chars8_count, chars16_count;
unsigned int chars8_offset, chars8_size, chars8_memsize;
unsigned int chars16_offset, chars16_size, chars16_memsize;
- unsigned int idx_size;
+ unsigned int idx_size, alloced_chars8_count;
i_assert(trie->fd != -1);
@@ -373,8 +398,10 @@
idx_size = level == BLOCK_SIZE ?
sizeof(uint32_t) : sizeof(struct trie_node *);
- chars8_memsize = ALIGN(chars8_count * sizeof(uint8_t)) +
- chars8_count * idx_size;
+ alloced_chars8_count = level <= FAST_8BIT_LEVEL ?
+ MAX_8BIT_CHAR_COUNT : chars8_count;
+ chars8_memsize = ALIGN(alloced_chars8_count * sizeof(uint8_t)) +
+ alloced_chars8_count * idx_size;
if (trie_map_area(trie, chars8_offset, chars8_size + 8) < 0)
return -1;
@@ -412,7 +439,7 @@
}
node = i_malloc(sizeof(*node) + chars8_memsize + chars16_memsize);
- node->chars_8bit_count = chars8_count;
+ node->chars_8bit_count = alloced_chars8_count;
node->chars_16bit_count = chars16_count;
node->file_offset = offset;
@@ -433,8 +460,11 @@
src_idx = CONST_PTR_OFFSET(chars8_src, chars8_count);
trie_map_node_save_children(level, src_idx, chars8_count,
children8);
+
+ if (alloced_chars8_count != chars8_count)
+ trie_map_fix_fast_node(node, chars8_count);
if (chars16_count == 0)
- end_offset = &src_idx[chars8_count];
+ end_offset = &src_idx[alloced_chars8_count];
else {
src_idx = CONST_PTR_OFFSET(chars16_src,
chars16_count *
@@ -460,7 +490,7 @@
for (i = 0; i < count; i++) {
child_idx = POINTER_CAST_TO(children[i], size_t);
- if ((child_idx & 1) == 0)
+ if ((child_idx & 1) == 0 && children[i] != NULL)
free_node(children[i], level);
}
}
@@ -620,8 +650,11 @@
trie->hdr = hdr;
trie->file_cache_modify_counter = trie->hdr->modify_counter;
- if (trie_map_node(trie, trie->hdr->root_offset, 1, &trie->root) < 0)
- return 0;
+ if (trie->hdr->root_offset != 0) {
+ if (trie_map_node(trie, trie->hdr->root_offset,
+ 1, &trie->root) < 0)
+ return 0;
+ }
return 1;
}
@@ -860,12 +893,34 @@
node_alloc(uint16_t chr, unsigned int level)
{
struct trie_node *node;
- unsigned int idx_size, idx_offset = sizeof(*node);
+ unsigned int i, idx_size, idx_offset = sizeof(*node);
idx_size = level < BLOCK_SIZE ?
sizeof(struct trie_node *) : sizeof(uint32_t);
- if (chr < 256) {
+ if (level <= FAST_8BIT_LEVEL) {
+ uint8_t *chars;
+ unsigned int chars16_count = chr >= 256 ? 1 : 0;
+
+ node = i_malloc(sizeof(*node) +
+ ALIGN(MAX_8BIT_CHAR_COUNT) +
+ ALIGN(sizeof(uint16_t) * chars16_count) +
+ (MAX_8BIT_CHAR_COUNT + chars16_count) *
+ idx_size);
+ node->chars_8bit_count = MAX_8BIT_CHAR_COUNT;
+
+ chars = NODE_CHARS8(node);
+ for (i = 0; i < MAX_8BIT_CHAR_COUNT; i++)
+ chars[i] = i;
+
+ if (chars16_count > 0) {
+ uint16_t *chrp;
+
+ node->chars_16bit_count = chars16_count;
+ chrp = (uint16_t *)&chars[i];
+ *chrp = chr;
+ }
+ } else if (chr < 256) {
uint8_t *chrp;
idx_offset += ALIGN(sizeof(*chrp));
@@ -982,11 +1037,14 @@
if (*data < 256) {
unsigned int count;
+ i_assert(*data < MAX_8BIT_CHAR_COUNT);
if (node == NULL) {
ctx->node_count++;
node = *parent = node_alloc(*data, level);
- char_idx = 0;
+ char_idx = level <= FAST_8BIT_LEVEL ? *data : 0;
modified = TRUE;
+ } else if (level <= FAST_8BIT_LEVEL) {
+ char_idx = *data;
} else {
uint8_t *chars = NODE_CHARS8(node);
uint8_t *pos;
@@ -1071,26 +1129,26 @@
struct trie_node **children;
uint32_t char_idx;
- if (*data < 256) {
- const uint8_t *chars, *pos;
-
- if (node == NULL)
- return 0;
+ if (node == NULL)
+ return 0;
- chars = NODE_CHARS8(node);
- pos = bsearch(data, chars, node->chars_8bit_count,
- sizeof(chars[0]), chr_8bit_cmp);
- if (pos == NULL || *pos != *data)
- return 0;
+ if (*data < 256) {
+ if (level <= FAST_8BIT_LEVEL)
+ char_idx = *data;
+ else {
+ const uint8_t *chars, *pos;
+ chars = NODE_CHARS8(node);
+ pos = bsearch(data, chars, node->chars_8bit_count,
+ sizeof(chars[0]), chr_8bit_cmp);
+ if (pos == NULL || *pos != *data)
+ return 0;
- char_idx = pos - chars;
+ char_idx = pos - chars;
+ }
children = NODE_CHILDREN8(node);
} else {
const uint16_t *chars, *pos;
- if (node == NULL)
- return 0;
-
chars = NODE_CHARS16(node, level);
pos = bsearch(data, chars, node->chars_16bit_count,
sizeof(chars[0]), chr_16bit_cmp);
@@ -1245,6 +1303,9 @@
uint32_t idx;
for (i = 0; i < count; i++) {
+ if (children[i] == NULL)
+ continue;
+
child_idx = POINTER_CAST_TO(children[i], size_t);
if ((child_idx & 1) != 0)
idx = child_idx & ~1;
@@ -1324,7 +1385,7 @@
for (i = 0; i < count; i++) {
child_idx = POINTER_CAST_TO(children[i], size_t);
- if ((child_idx & 1) == 0) {
+ if ((child_idx & 1) == 0 && children[i] != NULL) {
if (trie_write_node(ctx, level, children[i]) < 0)
return -1;
}
@@ -1355,9 +1416,11 @@
if (!node->modified)
return 0;
- if (level < BLOCK_SIZE)
+ if (level < BLOCK_SIZE) {
+ if (level <= FAST_8BIT_LEVEL)
+ squat_trie_compress_chars8(node);
node_pack(trie->buf, node);
- else {
+ } else {
if (node_leaf_finish(trie, node) < 0)
return -1;
@@ -1687,6 +1750,7 @@
node->file_offset = 0;
return 0;
}
+
node_pack(trie->buf, node);
}
More information about the dovecot-cvs
mailing list