dovecot: Moved back to non-indexed THREAD code. It'll be fixed a...

dovecot at dovecot.org dovecot at dovecot.org
Tue Aug 7 14:54:59 EEST 2007


details:   http://hg.dovecot.org/dovecot/rev/ad1b948c5fa2
changeset: 6229:ad1b948c5fa2
user:      Timo Sirainen <tss at iki.fi>
date:      Tue Aug 07 14:54:55 2007 +0300
description:
Moved back to non-indexed THREAD code. It'll be fixed after v1.1. Also
X-REFERENCES2 threading is gone for now.

diffstat:

5 files changed, 865 insertions(+), 2123 deletions(-)
configure.in           |    2 
src/imap/cmd-thread.c  |    2 
src/imap/imap-thread.c | 2976 +++++++++++++-----------------------------------
src/imap/imap-thread.h |    6 
src/imap/main.c        |    2 

diffs (truncated from 3150 to 300 lines):

diff -r 0b4ec1687f3c -r ad1b948c5fa2 configure.in
--- a/configure.in	Tue Aug 07 14:47:26 2007 +0300
+++ b/configure.in	Tue Aug 07 14:54:55 2007 +0300
@@ -1990,7 +1990,7 @@ dnl ** capabilities
 dnl ** capabilities
 dnl **
 
-capability="IMAP4rev1 SASL-IR SORT THREAD=REFERENCES THREAD=X-REFERENCES2 MULTIAPPEND UNSELECT LITERAL+ IDLE CHILDREN NAMESPACE LOGIN-REFERRALS UIDPLUS LIST-EXTENDED"
+capability="IMAP4rev1 SASL-IR SORT THREAD=REFERENCES MULTIAPPEND UNSELECT LITERAL+ IDLE CHILDREN NAMESPACE LOGIN-REFERRALS UIDPLUS LIST-EXTENDED"
 AC_DEFINE_UNQUOTED(CAPABILITY_STRING, "$capability", IMAP capabilities)
 
 CFLAGS="$CFLAGS $EXTRA_CFLAGS"
diff -r 0b4ec1687f3c -r ad1b948c5fa2 src/imap/cmd-thread.c
--- a/src/imap/cmd-thread.c	Tue Aug 07 14:47:26 2007 +0300
+++ b/src/imap/cmd-thread.c	Tue Aug 07 14:54:55 2007 +0300
@@ -39,8 +39,6 @@ bool cmd_thread(struct client_command_co
 	str = IMAP_ARG_STR(args);
 	if (strcasecmp(str, "REFERENCES") == 0)
 		threading = MAIL_THREAD_REFERENCES;
-	else if (strcasecmp(str, "X-REFERENCES2") == 0)
-		threading = MAIL_THREAD_REFERENCES2;
 	else if (strcasecmp(str, "ORDEREDSUBJECT") == 0) {
 		client_send_command_error(cmd,
 			"ORDEREDSUBJECT threading is currently not supported.");
diff -r 0b4ec1687f3c -r ad1b948c5fa2 src/imap/imap-thread.c
--- a/src/imap/imap-thread.c	Tue Aug 07 14:47:26 2007 +0300
+++ b/src/imap/imap-thread.c	Tue Aug 07 14:54:55 2007 +0300
@@ -1,1366 +1,637 @@
-/* Copyright (C) 2002-2006 Timo Sirainen */
-
-/* Implementation of draft-ietf-imapext-thread-12 threading algorithm
-
-   Message threads are permanently stored using mail-hash API. The links
-   are built according to normal threading rules. Base subject grouping
-   however isn't saved, so it needs to be done the slow way.
-
-   We do this permanent storing and optimization only when we're threading
-   all messages, which is what pretty much all the webmails do. Otherwise
-   we'd need to have separate thread trees for each search query, which
-   isn't practical.
-
-   The nodes are stored sorted by their sent_date/UID as specified by the
-   threading rules, so no further sorting is required to be done. Dummy nodes
-   are sorted by their children, and since their children are also sorted, it
-   practically means that they need to be moved whenever their first child
-   is changed.
-
-   Adding new messages to the hash is easy and fast. Removing messages
-   however might cause changes all over the thread tree. Luckily this is
-   rare so we can just optimize the common cases. This is done with reference
-   counts:
-
-   A node is created for each seen Message-ID. We also store the dummy ones
-   for which no actual message exists. Each time a Message-ID is seen in the
-   In-Reply-To or References headers, the Message-ID node's reference count
-   is increased.
-
-   Expunging then decreases reference counts for each Message-ID. There are
-   two rare problematic cases when expunging:
-
-   1) Duplicate Message-IDs
-   2) Broken parent-child relationships:
-        a) different messages describe them differently in their References
-	   headers
-	b) loops
-
-   For these cases expunging a message affecting these may cause larger
-   thread reorganizations. We mark these with expunge_rebuilds flag, so that
-   if the problematic message is expunged, we'll just rebuild everything.
-
-   When a message is expunged, either its reference count drops to zero in
-   which case it's removed completely, otherwise it's turned into a dummy
-   node.
-
-   Typically when reference count drops to zero it means it the node has no
-   children. One special case is however a thread with:
-     [1:dummy] [2:dummy] [3:ref 1, 2] [4:ref 2]
-   When 3 is removed, 1's refcount drops to zero but 2 is still referenced
-   by 4. In this case the 2's parent must be updated.
-
-   When we open the mail-hash, we check that no messages have been expunged
-   from mailbox which haven't also been removed from the hash. If they have,
-   we rebuild the thread tree. Otherwise we add the new messages to the hash
-   and then send the results to client.
-*/
+/* Copyright (C) 2002 Timo Sirainen */
+
+/*
+ * Merge sort code in sort_nodes() is copyright 2001 Simon Tatham.
+ * 
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Implementation of draft-ietf-imapext-thread-12 threading algorithm */
 
 #include "common.h"
-#include "array.h"
-#include "crc32.h"
+#include "hash.h"
 #include "ostream.h"
 #include "str.h"
-#include "message-id.h"
+#include "rfc822-parser.h"
 #include "imap-base-subject.h"
 #include "mail-storage.h"
-#include "mail-search.h"
 #include "imap-thread.h"
 
-/* FIXME: the mailbox accessing API needs some cleaning up. we shouldn't be
-   including all kinds of private headers */
-#include "../lib-storage/index/index-storage.h"
-#include "mail-hash.h"
-#include "mail-index-private.h"
-#include "mail-index-sync-private.h"
-
 #include <stdlib.h>
-
-#define IMAP_THREAD_CONTEXT(obj) \
-	MODULE_CONTEXT(obj, imap_thread_storage_module)
 
 /* how much memory to allocate initially. these are very rough
    approximations. */
-#define APPROX_MSG_EXTRA_COUNT 10
+#define APPROX_MSG_COUNT 128
 #define APPROX_MSGID_SIZE 45
 
 /* Try to buffer this much data before sending it to output stream. */
 #define OUTPUT_BUF_SIZE 2048
 
-#define HDR_MESSAGE_ID "message-id"
-#define HDR_IN_REPLY_TO "in-reply-to"
-#define HDR_REFERENCES "references"
-#define HDR_SUBJECT "subject"
-
-struct msgid_rec {
-	const char *msgid;
-	uint32_t msgid_crc32;
+#define NODE_IS_DUMMY(node) ((node)->id == 0)
+#define NODE_HAS_PARENT(ctx, node) \
+	((node)->parent != NULL && (node)->parent != &(ctx)->root_node)
+
+struct root_info {
+	char *base_subject;
+	unsigned int reply:1;
+	unsigned int sorted:1;
 };
 
-struct mail_thread_rec {
-	struct mail_hash_record rec;
-
-	uint32_t refcount:31;
-	uint32_t expunge_rebuilds:1;
-
-	uint32_t msgid_crc32;
-	uint32_t sent_date;
-
-	uint32_t parent_idx;
-	uint32_t first_child_idx;
-	uint32_t next_idx;
+struct node {
+	struct node *parent, *first_child, *next;
+
+	unsigned int id;
+	time_t sent_date;
+
+	union {
+		char *msgid;
+		struct root_info *info;
+	} u;
 };
-
-struct mail_thread_moved_rec {
-	struct mail_thread_rec rec;
-	/* first_child_idx and its siblings are in moved_recs */
-	unsigned int moved_children:1;
-};
-
-struct mail_thread_root_rec {
-	uint32_t idx;
-	uint32_t uid;
-	uint32_t sent_date;
-
-	/* a linked list of roots which belong to the same thread */
-	struct mail_thread_root_rec *next;
-
-	/* another record has this record in its next-pointer.
-	   this record isn't a root anymore. */
-	unsigned int nonroot:1;
-	/* idx points to moved_recs */
-	unsigned int moved:1;
-	/* subject contained a Re: or Fwd: */
-	unsigned int reply:1;
-};
-#define ROOT_REC_IS_DUMMY(rec) \
-	((rec)->uid == 0)
 
 struct thread_context {
 	struct mail_search_context *search_ctx;
 	struct mailbox_transaction_context *t;
 	struct mailbox *box;
 	struct ostream *output;
-	enum mail_thread_type thread_type;
-
-	struct mail *tmp_mail;
-	struct mail_thread_rec tmp_rec;
-
-	struct mail_search_arg tmp_search_arg;
-	struct mail_search_seqset seqset;
-
-	/* Hash record idx -> Message-ID */
-	ARRAY_DEFINE(msgid_map, const char *);
-	pool_t msgid_pool;
-	struct mail_hash *msgid_hash;
-
-	pool_t subject_pool;
+	struct mail *mail;
+
+	pool_t pool;
+	pool_t temp_pool;
+
+	struct hash_table *msgid_hash;
 	struct hash_table *subject_hash;
-	ARRAY_DEFINE(roots, struct mail_thread_root_rec *);
-	ARRAY_DEFINE(moved_recs, struct mail_thread_moved_rec);
-	uint32_t *alt_dates;
-
-	unsigned int cmp_match_count;
-	uint32_t cmp_last_idx;
-
-	unsigned int id_is_uid:1;
-	unsigned int failed:1;
+
+	struct node root_node;
+	size_t root_count; /* not exact after prune_dummy_messages() */
+
+	bool id_is_uid;
 };
 
-struct imap_thread_mailbox {
-	union mailbox_module_context module_ctx;
-	struct mail_hash *msgid_hash;
-
-	/* set only temporarily while needed */
-	struct thread_context *ctx;
-};
-
-static void (*next_hook_mailbox_opened)(struct mailbox *box);
-
-static MODULE_CONTEXT_DEFINE_INIT(imap_thread_storage_module,
-				  &mail_storage_module_register);
-
-static void imap_thread_hash_init(struct mailbox *box, bool create);
-
-static int mail_thread_input(struct thread_context *ctx, struct mail *mail);
-static int mail_thread_finish(struct thread_context *ctx);
-
-static int unlink_child(struct thread_context *ctx, uint32_t child_idx,
-			uint32_t new_parent_idx);
-
-static void mail_thread_deinit(struct imap_thread_mailbox *tbox,
-			       struct thread_context *ctx)
-{
-	i_free(ctx->alt_dates);
-
-	if (ctx->msgid_hash != tbox->msgid_hash)
-		mail_hash_free(&ctx->msgid_hash);
-	else
-		mail_hash_unlock(ctx->msgid_hash);
-
-	if (ctx->subject_hash != NULL) {
+static void mail_thread_input(struct thread_context *ctx, struct mail *mail);
+static void mail_thread_finish(struct thread_context *ctx);
+
+static void mail_thread_deinit(struct thread_context *ctx)
+{
+	if (ctx->msgid_hash != NULL)
+		hash_destroy(ctx->msgid_hash);
+	if (ctx->subject_hash != NULL)
 		hash_destroy(ctx->subject_hash);
-		pool_unref(ctx->subject_pool);
-	}


More information about the dovecot-cvs mailing list