dovecot: Moved back to non-indexed THREAD code. It'll be fixed a...
dovecot at dovecot.org
dovecot at dovecot.org
Tue Aug 7 14:54:59 EEST 2007
details: http://hg.dovecot.org/dovecot/rev/ad1b948c5fa2
changeset: 6229:ad1b948c5fa2
user: Timo Sirainen <tss at iki.fi>
date: Tue Aug 07 14:54:55 2007 +0300
description:
Moved back to non-indexed THREAD code. It'll be fixed after v1.1. Also
X-REFERENCES2 threading is gone for now.
diffstat:
5 files changed, 865 insertions(+), 2123 deletions(-)
configure.in | 2
src/imap/cmd-thread.c | 2
src/imap/imap-thread.c | 2976 +++++++++++++-----------------------------------
src/imap/imap-thread.h | 6
src/imap/main.c | 2
diffs (truncated from 3150 to 300 lines):
diff -r 0b4ec1687f3c -r ad1b948c5fa2 configure.in
--- a/configure.in Tue Aug 07 14:47:26 2007 +0300
+++ b/configure.in Tue Aug 07 14:54:55 2007 +0300
@@ -1990,7 +1990,7 @@ dnl ** capabilities
dnl ** capabilities
dnl **
-capability="IMAP4rev1 SASL-IR SORT THREAD=REFERENCES THREAD=X-REFERENCES2 MULTIAPPEND UNSELECT LITERAL+ IDLE CHILDREN NAMESPACE LOGIN-REFERRALS UIDPLUS LIST-EXTENDED"
+capability="IMAP4rev1 SASL-IR SORT THREAD=REFERENCES MULTIAPPEND UNSELECT LITERAL+ IDLE CHILDREN NAMESPACE LOGIN-REFERRALS UIDPLUS LIST-EXTENDED"
AC_DEFINE_UNQUOTED(CAPABILITY_STRING, "$capability", IMAP capabilities)
CFLAGS="$CFLAGS $EXTRA_CFLAGS"
diff -r 0b4ec1687f3c -r ad1b948c5fa2 src/imap/cmd-thread.c
--- a/src/imap/cmd-thread.c Tue Aug 07 14:47:26 2007 +0300
+++ b/src/imap/cmd-thread.c Tue Aug 07 14:54:55 2007 +0300
@@ -39,8 +39,6 @@ bool cmd_thread(struct client_command_co
str = IMAP_ARG_STR(args);
if (strcasecmp(str, "REFERENCES") == 0)
threading = MAIL_THREAD_REFERENCES;
- else if (strcasecmp(str, "X-REFERENCES2") == 0)
- threading = MAIL_THREAD_REFERENCES2;
else if (strcasecmp(str, "ORDEREDSUBJECT") == 0) {
client_send_command_error(cmd,
"ORDEREDSUBJECT threading is currently not supported.");
diff -r 0b4ec1687f3c -r ad1b948c5fa2 src/imap/imap-thread.c
--- a/src/imap/imap-thread.c Tue Aug 07 14:47:26 2007 +0300
+++ b/src/imap/imap-thread.c Tue Aug 07 14:54:55 2007 +0300
@@ -1,1366 +1,637 @@
-/* Copyright (C) 2002-2006 Timo Sirainen */
-
-/* Implementation of draft-ietf-imapext-thread-12 threading algorithm
-
- Message threads are permanently stored using mail-hash API. The links
- are built according to normal threading rules. Base subject grouping
- however isn't saved, so it needs to be done the slow way.
-
- We do this permanent storing and optimization only when we're threading
- all messages, which is what pretty much all the webmails do. Otherwise
- we'd need to have separate thread trees for each search query, which
- isn't practical.
-
- The nodes are stored sorted by their sent_date/UID as specified by the
- threading rules, so no further sorting is required to be done. Dummy nodes
- are sorted by their children, and since their children are also sorted, it
- practically means that they need to be moved whenever their first child
- is changed.
-
- Adding new messages to the hash is easy and fast. Removing messages
- however might cause changes all over the thread tree. Luckily this is
- rare so we can just optimize the common cases. This is done with reference
- counts:
-
- A node is created for each seen Message-ID. We also store the dummy ones
- for which no actual message exists. Each time a Message-ID is seen in the
- In-Reply-To or References headers, the Message-ID node's reference count
- is increased.
-
- Expunging then decreases reference counts for each Message-ID. There are
- two rare problematic cases when expunging:
-
- 1) Duplicate Message-IDs
- 2) Broken parent-child relationships:
- a) different messages describe them differently in their References
- headers
- b) loops
-
- For these cases expunging a message affecting these may cause larger
- thread reorganizations. We mark these with expunge_rebuilds flag, so that
- if the problematic message is expunged, we'll just rebuild everything.
-
- When a message is expunged, either its reference count drops to zero in
- which case it's removed completely, otherwise it's turned into a dummy
- node.
-
- Typically when reference count drops to zero it means it the node has no
- children. One special case is however a thread with:
- [1:dummy] [2:dummy] [3:ref 1, 2] [4:ref 2]
- When 3 is removed, 1's refcount drops to zero but 2 is still referenced
- by 4. In this case the 2's parent must be updated.
-
- When we open the mail-hash, we check that no messages have been expunged
- from mailbox which haven't also been removed from the hash. If they have,
- we rebuild the thread tree. Otherwise we add the new messages to the hash
- and then send the results to client.
-*/
+/* Copyright (C) 2002 Timo Sirainen */
+
+/*
+ * Merge sort code in sort_nodes() is copyright 2001 Simon Tatham.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Implementation of draft-ietf-imapext-thread-12 threading algorithm */
#include "common.h"
-#include "array.h"
-#include "crc32.h"
+#include "hash.h"
#include "ostream.h"
#include "str.h"
-#include "message-id.h"
+#include "rfc822-parser.h"
#include "imap-base-subject.h"
#include "mail-storage.h"
-#include "mail-search.h"
#include "imap-thread.h"
-/* FIXME: the mailbox accessing API needs some cleaning up. we shouldn't be
- including all kinds of private headers */
-#include "../lib-storage/index/index-storage.h"
-#include "mail-hash.h"
-#include "mail-index-private.h"
-#include "mail-index-sync-private.h"
-
#include <stdlib.h>
-
-#define IMAP_THREAD_CONTEXT(obj) \
- MODULE_CONTEXT(obj, imap_thread_storage_module)
/* how much memory to allocate initially. these are very rough
approximations. */
-#define APPROX_MSG_EXTRA_COUNT 10
+#define APPROX_MSG_COUNT 128
#define APPROX_MSGID_SIZE 45
/* Try to buffer this much data before sending it to output stream. */
#define OUTPUT_BUF_SIZE 2048
-#define HDR_MESSAGE_ID "message-id"
-#define HDR_IN_REPLY_TO "in-reply-to"
-#define HDR_REFERENCES "references"
-#define HDR_SUBJECT "subject"
-
-struct msgid_rec {
- const char *msgid;
- uint32_t msgid_crc32;
+#define NODE_IS_DUMMY(node) ((node)->id == 0)
+#define NODE_HAS_PARENT(ctx, node) \
+ ((node)->parent != NULL && (node)->parent != &(ctx)->root_node)
+
+struct root_info {
+ char *base_subject;
+ unsigned int reply:1;
+ unsigned int sorted:1;
};
-struct mail_thread_rec {
- struct mail_hash_record rec;
-
- uint32_t refcount:31;
- uint32_t expunge_rebuilds:1;
-
- uint32_t msgid_crc32;
- uint32_t sent_date;
-
- uint32_t parent_idx;
- uint32_t first_child_idx;
- uint32_t next_idx;
+struct node {
+ struct node *parent, *first_child, *next;
+
+ unsigned int id;
+ time_t sent_date;
+
+ union {
+ char *msgid;
+ struct root_info *info;
+ } u;
};
-
-struct mail_thread_moved_rec {
- struct mail_thread_rec rec;
- /* first_child_idx and its siblings are in moved_recs */
- unsigned int moved_children:1;
-};
-
-struct mail_thread_root_rec {
- uint32_t idx;
- uint32_t uid;
- uint32_t sent_date;
-
- /* a linked list of roots which belong to the same thread */
- struct mail_thread_root_rec *next;
-
- /* another record has this record in its next-pointer.
- this record isn't a root anymore. */
- unsigned int nonroot:1;
- /* idx points to moved_recs */
- unsigned int moved:1;
- /* subject contained a Re: or Fwd: */
- unsigned int reply:1;
-};
-#define ROOT_REC_IS_DUMMY(rec) \
- ((rec)->uid == 0)
struct thread_context {
struct mail_search_context *search_ctx;
struct mailbox_transaction_context *t;
struct mailbox *box;
struct ostream *output;
- enum mail_thread_type thread_type;
-
- struct mail *tmp_mail;
- struct mail_thread_rec tmp_rec;
-
- struct mail_search_arg tmp_search_arg;
- struct mail_search_seqset seqset;
-
- /* Hash record idx -> Message-ID */
- ARRAY_DEFINE(msgid_map, const char *);
- pool_t msgid_pool;
- struct mail_hash *msgid_hash;
-
- pool_t subject_pool;
+ struct mail *mail;
+
+ pool_t pool;
+ pool_t temp_pool;
+
+ struct hash_table *msgid_hash;
struct hash_table *subject_hash;
- ARRAY_DEFINE(roots, struct mail_thread_root_rec *);
- ARRAY_DEFINE(moved_recs, struct mail_thread_moved_rec);
- uint32_t *alt_dates;
-
- unsigned int cmp_match_count;
- uint32_t cmp_last_idx;
-
- unsigned int id_is_uid:1;
- unsigned int failed:1;
+
+ struct node root_node;
+ size_t root_count; /* not exact after prune_dummy_messages() */
+
+ bool id_is_uid;
};
-struct imap_thread_mailbox {
- union mailbox_module_context module_ctx;
- struct mail_hash *msgid_hash;
-
- /* set only temporarily while needed */
- struct thread_context *ctx;
-};
-
-static void (*next_hook_mailbox_opened)(struct mailbox *box);
-
-static MODULE_CONTEXT_DEFINE_INIT(imap_thread_storage_module,
- &mail_storage_module_register);
-
-static void imap_thread_hash_init(struct mailbox *box, bool create);
-
-static int mail_thread_input(struct thread_context *ctx, struct mail *mail);
-static int mail_thread_finish(struct thread_context *ctx);
-
-static int unlink_child(struct thread_context *ctx, uint32_t child_idx,
- uint32_t new_parent_idx);
-
-static void mail_thread_deinit(struct imap_thread_mailbox *tbox,
- struct thread_context *ctx)
-{
- i_free(ctx->alt_dates);
-
- if (ctx->msgid_hash != tbox->msgid_hash)
- mail_hash_free(&ctx->msgid_hash);
- else
- mail_hash_unlock(ctx->msgid_hash);
-
- if (ctx->subject_hash != NULL) {
+static void mail_thread_input(struct thread_context *ctx, struct mail *mail);
+static void mail_thread_finish(struct thread_context *ctx);
+
+static void mail_thread_deinit(struct thread_context *ctx)
+{
+ if (ctx->msgid_hash != NULL)
+ hash_destroy(ctx->msgid_hash);
+ if (ctx->subject_hash != NULL)
hash_destroy(ctx->subject_hash);
- pool_unref(ctx->subject_pool);
- }
More information about the dovecot-cvs
mailing list