dovecot: Simplified and optimized the sorting code.
dovecot at dovecot.org
dovecot at dovecot.org
Sat Dec 8 21:36:25 EET 2007
details: http://hg.dovecot.org/dovecot/rev/09e1e8d4aa53
changeset: 6968:09e1e8d4aa53
user: Timo Sirainen <tss at iki.fi>
date: Sat Dec 08 21:36:20 2007 +0200
description:
Simplified and optimized the sorting code.
diffstat:
1 file changed, 174 insertions(+), 279 deletions(-)
src/lib-storage/index/index-sort.c | 453 +++++++++++++-----------------------
diffs (truncated from 607 to 300 lines):
diff -r 4c3002f3cd51 -r 09e1e8d4aa53 src/lib-storage/index/index-sort.c
--- a/src/lib-storage/index/index-sort.c Sat Dec 08 21:28:46 2007 +0200
+++ b/src/lib-storage/index/index-sort.c Sat Dec 08 21:36:20 2007 +0200
@@ -53,28 +53,21 @@ struct mail_search_sort_program {
const char *primary_sort_header;
struct mail *temp_mail;
- ARRAY_TYPE(mail_sort_node) nodes;
+ ARRAY_TYPE(mail_sort_node) nodes, all_nodes;
const struct mail_sort_node *nodes_ptr;
unsigned int nodes_count, iter_idx;
- ARRAY_TYPE(mail_sort_node) all_nodes;
-
uint32_t ext_id;
- uint32_t prev_seq, last_sorted_seq;
+ unsigned int first_missing_sort_id_idx;
unsigned int reverse:1;
- unsigned int skipped_mails:1;
unsigned int sort_ids_added:1;
+ unsigned int missing_sort_ids:1;
};
struct sort_cmp_context {
struct mail_search_sort_program *program;
struct mail *mail;
-
- uint32_t cache_seq;
- enum mail_sort_type cache_type;
- uint32_t cache_value;
- const char *cache_str;
};
static struct sort_cmp_context static_node_cmp_context;
@@ -168,19 +161,21 @@ static const char *get_first_mailbox(str
return addr != NULL ? addr->mailbox : "";
}
-static const char *
-sort_header_get(enum mail_sort_type sort_type, struct mail *mail, uint32_t seq)
+static void
+sort_header_get(string_t *dest, enum mail_sort_type sort_type,
+ struct mail *mail, uint32_t seq)
{
const char *str;
- string_t *buf;
mail_set_seq(mail, seq);
switch (sort_type & MAIL_SORT_MASK) {
case MAIL_SORT_SUBJECT:
if (mail_get_first_header(mail, "Subject", &str) <= 0)
- return "";
- return imap_get_base_subject_cased(pool_datastack_create(),
- str, NULL);
+ return;
+ str = imap_get_base_subject_cased(pool_datastack_create(),
+ str, NULL);
+ str_append(dest, str);
+ return;
case MAIL_SORT_CC:
str = get_first_mailbox(mail, "Cc");
break;
@@ -194,9 +189,7 @@ sort_header_get(enum mail_sort_type sort
i_unreached();
}
- buf = t_str_new(128);
- (void)uni_utf8_to_decomposed_titlecase(str, (size_t)-1, buf);
- return str_c(buf);
+ (void)uni_utf8_to_decomposed_titlecase(str, (size_t)-1, dest);
}
static uint32_t sort_get_arrival(struct mail *mail)
@@ -259,23 +252,19 @@ static int sort_node_cmp_type(struct sor
case MAIL_SORT_TO:
case MAIL_SORT_SUBJECT:
T_FRAME_BEGIN {
- const char *str1, *str2;
-
- str1 = n1->seq == ctx->cache_seq &&
- ctx->cache_type == sort_type ? ctx->cache_str :
- sort_header_get(sort_type, ctx->mail, n1->seq);
- str2 = sort_header_get(sort_type, ctx->mail, n2->seq);
-
- ret = strcmp(str1, str2);
+ string_t *str1, *str2;
+
+ str1 = t_str_new(256);
+ str2 = t_str_new(256);
+ sort_header_get(str1, sort_type, ctx->mail, n1->seq);
+ sort_header_get(str2, sort_type, ctx->mail, n2->seq);
+
+ ret = strcmp(str_c(str1), str_c(str2));
} T_FRAME_END;
break;
case MAIL_SORT_ARRIVAL:
- if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type)
- time1 = ctx->cache_value;
- else {
- mail_set_seq(ctx->mail, n1->seq);
- time1 = sort_get_arrival(ctx->mail);
- }
+ mail_set_seq(ctx->mail, n1->seq);
+ time1 = sort_get_arrival(ctx->mail);
mail_set_seq(ctx->mail, n2->seq);
time2 = sort_get_arrival(ctx->mail);
@@ -284,12 +273,8 @@ static int sort_node_cmp_type(struct sor
(time1 > time2 ? 1 : 0);
break;
case MAIL_SORT_DATE:
- if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type)
- time1 = ctx->cache_value;
- else {
- mail_set_seq(ctx->mail, n1->seq);
- time1 = sort_get_date(ctx->mail);
- }
+ mail_set_seq(ctx->mail, n1->seq);
+ time1 = sort_get_date(ctx->mail);
mail_set_seq(ctx->mail, n2->seq);
time2 = sort_get_date(ctx->mail);
@@ -298,12 +283,8 @@ static int sort_node_cmp_type(struct sor
(time1 > time2 ? 1 : 0);
break;
case MAIL_SORT_SIZE:
- if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type)
- size1 = ctx->cache_value;
- else {
- mail_set_seq(ctx->mail, n1->seq);
- size1 = sort_get_size(ctx->mail);
- }
+ mail_set_seq(ctx->mail, n1->seq);
+ size1 = sort_get_size(ctx->mail);
mail_set_seq(ctx->mail, n2->seq);
size2 = sort_get_size(ctx->mail);
@@ -341,31 +322,24 @@ static int sort_node_cmp(const void *p1,
return sort_node_cmp_type(ctx, ctx->program->sort_program + 1, n1, n2);
}
-static int sort_node_cmp_no_sort_id(const void *p1, const void *p2)
+static int sort_node_cmp_nozero_sort_id(const void *p1, const void *p2)
{
struct sort_cmp_context *ctx = &static_node_cmp_context;
-
- return sort_node_cmp_type(ctx, ctx->program->sort_program, p1, p2);
-}
-
-static void
-index_sort_save_ids(struct mail_search_sort_program *program,
- uint32_t first_seq)
-{
- struct index_transaction_context *t =
- (struct index_transaction_context *)program->t;
- const struct mail_sort_node *nodes;
- unsigned int i, count;
-
- nodes = array_get(&program->all_nodes, &count);
- for (i = 0; i < count; i++) {
- if (nodes[i].seq < first_seq)
- continue;
-
- i_assert(nodes[i].sort_id != 0);
- mail_index_update_ext(t->trans, nodes[i].seq,
- program->ext_id, &nodes[i].sort_id, NULL);
- }
+ const struct mail_sort_node *n1 = p1, *n2 = p2;
+ const enum mail_sort_type *sort_program;
+
+ /* Use sort IDs only if both have them */
+ if (n1->sort_id != 0 && n2->sort_id != 0) {
+ if (n1->sort_id < n2->sort_id)
+ return -1;
+ if (n1->sort_id > n2->sort_id)
+ return 1;
+ sort_program = ctx->program->sort_program + 1;
+ } else {
+ sort_program = ctx->program->sort_program;
+ }
+
+ return sort_node_cmp_type(ctx, ctx->program->sort_program, n1, n2);
}
static bool
@@ -375,19 +349,18 @@ index_sort_add_ids_range(struct mail_sea
{
struct mail_sort_node *nodes;
unsigned int i, count;
- const char *last_str = "";
uint32_t prev_id = 0, last_id = (uint32_t)-1;
- string_t *prev_str;
- const char *str;
+ string_t *last_str, *prev_str, *str;
unsigned int skip;
+ last_str = t_str_new(256);
nodes = array_get_modifiable(&program->all_nodes, &count);
if (nodes[idx2].sort_id != 0) {
i_assert(idx1 != idx2);
last_id = nodes[idx2].sort_id;
- last_str = sort_header_get(program->sort_program[0], mail,
- nodes[idx2].seq);
+ sort_header_get(last_str, program->sort_program[0], mail,
+ nodes[idx2].seq);
idx2--;
}
@@ -395,19 +368,21 @@ index_sort_add_ids_range(struct mail_sea
if (nodes[idx1].sort_id != 0) {
prev_id = nodes[idx1].sort_id;
- str_append(prev_str,
- sort_header_get(program->sort_program[0], mail,
- nodes[idx1].seq));
+ sort_header_get(prev_str, program->sort_program[0], mail,
+ nodes[idx1].seq);
idx1++;
}
+ str = str_new(default_pool, 256);
for (i = idx1; i <= idx2; i++) {
- str = sort_header_get(program->sort_program[0], mail,
- nodes[i].seq);
-
- if (i == idx2 && strcmp(str, last_str) == 0)
+ T_FRAME(
+ sort_header_get(str, program->sort_program[0], mail,
+ nodes[i].seq);
+ );
+
+ if (i == idx2 && str_equals(str, last_str))
nodes[i].sort_id = last_id;
- else if (strcmp(str, str_c(prev_str)) == 0 && prev_id != 0)
+ else if (prev_id != 0 && str_equals(str, prev_str) == 0)
nodes[i].sort_id = prev_id;
else {
/* divide the available space so that each message gets
@@ -420,14 +395,16 @@ index_sort_add_ids_range(struct mail_sea
if (nodes[i].sort_id == last_id) {
/* we ran out of ID space. have to renumber
the IDs. */
+ str_free(&str);
return FALSE;
}
prev_id = nodes[i].sort_id;
str_truncate(prev_str, 0);
- str_append(prev_str, str);
- }
- }
+ str_append_str(prev_str, str);
+ }
+ }
+ str_free(&str);
return TRUE;
}
@@ -516,7 +493,6 @@ static void index_sort_preset_sort_ids(s
uint32_t last_seq)
{
struct mail_sort_node node;
- struct mail *mail;
uint32_t (*get_sort_id)(struct mail *);
switch (program->sort_program[0] & MAIL_SORT_MASK) {
@@ -534,114 +510,77 @@ static void index_sort_preset_sort_ids(s
}
/* add the missing nodes with their sort_ids */
- mail = program->temp_mail;
node.seq = array_count(&program->all_nodes) + 1;
for (; node.seq <= last_seq; node.seq++) {
- mail_set_seq(mail, node.seq);
- node.sort_id = get_sort_id(mail);
+ mail_set_seq(program->temp_mail, node.seq);
+ node.sort_id = get_sort_id(program->temp_mail);
+
i_assert(node.sort_id != 0);
array_append(&program->all_nodes, &node, 1);
}
-
- /* @UNSAFE: and sort them */
- memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context));
- static_node_cmp_context.program = program;
- static_node_cmp_context.mail = mail;
-
- qsort(array_idx_modifiable(&program->all_nodes, 0), last_seq,
- sizeof(struct mail_sort_node), sort_node_cmp);
-}
-
-static void index_sort_cache_seq(struct sort_cmp_context *ctx,
- enum mail_sort_type sort_type, uint32_t seq)
-{
- ctx->cache_seq = seq;
- ctx->cache_type = sort_type & MAIL_SORT_MASK;
-
- mail_set_seq(ctx->mail, seq);
More information about the dovecot-cvs
mailing list