dovecot-2.2: doveadm: Added deduplicate command.

dovecot at dovecot.org dovecot at dovecot.org
Tue Jun 18 17:05:36 EEST 2013


details:   http://hg.dovecot.org/dovecot-2.2/rev/3683d7bff095
changeset: 16535:3683d7bff095
user:      Timo Sirainen <tss at iki.fi>
date:      Tue Jun 18 17:05:20 2013 +0300
description:
doveadm: Added deduplicate command.
By default it deduplicates only by GUIDs. With -m parameter it deduplicates
by Message-Id: header.

diffstat:

 src/doveadm/Makefile.am                |    1 +
 src/doveadm/doveadm-mail-deduplicate.c |  203 +++++++++++++++++++++++++++++++++
 src/doveadm/doveadm-mail.c             |    1 +
 src/doveadm/doveadm-mail.h             |    1 +
 4 files changed, 206 insertions(+), 0 deletions(-)

diffs (240 lines):

diff -r 5e51c5545029 -r 3683d7bff095 src/doveadm/Makefile.am
--- a/src/doveadm/Makefile.am	Tue Jun 18 15:14:42 2013 +0300
+++ b/src/doveadm/Makefile.am	Tue Jun 18 17:05:20 2013 +0300
@@ -62,6 +62,7 @@
 	doveadm-mail.c \
 	doveadm-mail-altmove.c \
 	doveadm-mail-batch.c \
+	doveadm-mail-deduplicate.c \
 	doveadm-mail-expunge.c \
 	doveadm-mail-fetch.c \
 	doveadm-mail-flags.c \
diff -r 5e51c5545029 -r 3683d7bff095 src/doveadm/doveadm-mail-deduplicate.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/doveadm/doveadm-mail-deduplicate.c	Tue Jun 18 17:05:20 2013 +0300
@@ -0,0 +1,203 @@
+/* Copyright (c) 2013 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "hash.h"
+#include "mail-storage.h"
+#include "mail-search-build.h"
+#include "doveadm-mailbox-list-iter.h"
+#include "doveadm-mail-iter.h"
+#include "doveadm-mail.h"
+
+struct uidlist {
+	struct uidlist *next;
+	uint32_t uid;
+};
+
+struct deduplicate_cmd_context {
+	struct doveadm_mail_cmd_context ctx;
+	bool by_msgid;
+};
+
+static int cmd_deduplicate_uidlist(struct mailbox *box, struct uidlist *uidlist)
+{
+	struct mailbox_transaction_context *trans;
+	struct mail_search_context *search_ctx;
+	struct mail_search_args *search_args;
+	struct mail_search_arg *arg;
+	struct mail *mail;
+	ARRAY_TYPE(seq_range) uids;
+	int ret = 0;
+
+	/* the uidlist is reversed with oldest mails at the end.
+	   we'll delete everything but the oldest mail. */
+	if (uidlist->next == NULL)
+		return 0;
+
+	t_array_init(&uids, 8);
+	for (; uidlist->next != NULL; uidlist = uidlist->next)
+		seq_range_array_add(&uids, uidlist->uid);
+
+	search_args = mail_search_build_init();
+	arg = mail_search_build_add(search_args, SEARCH_UIDSET);
+	arg->value.seqset = uids;
+
+	trans = mailbox_transaction_begin(box, 0);
+	search_ctx = mailbox_search_init(trans, search_args, NULL, 0, NULL);
+	mail_search_args_unref(&search_args);
+
+	while (mailbox_search_next(search_ctx, &mail))
+		mail_expunge(mail);
+	if (mailbox_search_deinit(&search_ctx) < 0)
+		ret = -1;
+	if (mailbox_transaction_commit(&trans) < 0)
+		ret = -1;
+	return ret;
+}
+
+static int
+cmd_deduplicate_box(struct doveadm_mail_cmd_context *_ctx,
+		    const struct mailbox_info *info,
+		    struct mail_search_args *search_args)
+{
+	struct deduplicate_cmd_context *ctx =
+		(struct deduplicate_cmd_context *)_ctx;
+	struct doveadm_mail_iter *iter;
+	struct mailbox *box;
+	struct mail *mail;
+	enum mail_error error;
+	pool_t pool;
+	HASH_TABLE(const char *, struct uidlist *) hash;
+	const char *key, *errstr;
+	struct uidlist *value;
+	int ret = 0;
+
+	if (doveadm_mail_iter_init(_ctx, info, search_args, 0, NULL,
+				   &iter) < 0)
+		return -1;
+
+	pool = pool_alloconly_create("deduplicate", 10240);
+	hash_table_create(&hash, pool, 0, str_hash, strcmp);
+	while (doveadm_mail_iter_next(iter, &mail)) {
+		if (ctx->by_msgid) {
+			if (mail_get_first_header(mail, "Message-ID", &key) < 0) {
+				errstr = mailbox_get_last_error(box, &error);
+				if (error == MAIL_ERROR_NOTFOUND)
+					continue;
+				i_error("Couldn't lookup Message-ID: for UID=%u: %s",
+					mail->uid, errstr);
+				ret = -1;
+				break;
+			}
+		} else {
+			if (mail_get_special(mail, MAIL_FETCH_GUID, &key) < 0) {
+				errstr = mailbox_get_last_error(box, &error);
+				if (error == MAIL_ERROR_NOTFOUND)
+					continue;
+				i_error("Couldn't lookup GUID: for UID=%u: %s",
+					mail->uid, errstr);
+				ret = -1;
+				break;
+			}
+		}
+		if (key != NULL && *key != '\0') {
+			value = p_new(pool, struct uidlist, 1);
+			value->uid = mail->uid;
+			value->next = hash_table_lookup(hash, key);
+
+			if (value->next == NULL) {
+				key = p_strdup(pool, key);
+				hash_table_insert(hash, key, value);
+			} else {
+				hash_table_update(hash, key, value);
+			}
+		}
+	}
+
+	if (doveadm_mail_iter_deinit_keep_box(&iter, &box) < 0)
+		ret = -1;
+
+	if (ret == 0) {
+		struct hash_iterate_context *iter;
+
+		iter = hash_table_iterate_init(hash);
+		while (hash_table_iterate(iter, hash, &key, &value)) {
+			T_BEGIN {
+				if (cmd_deduplicate_uidlist(box, value) < 0)
+					ret = -1;
+			} T_END;
+		}
+		hash_table_iterate_deinit(&iter);
+	}
+
+	hash_table_destroy(&hash);
+	pool_unref(&pool);
+
+	if (mailbox_sync(box, 0) < 0) {
+		doveadm_mail_failed_mailbox(_ctx, box);
+		ret = -1;
+	}
+	mailbox_free(&box);
+	return ret;
+}
+
+static int
+cmd_deduplicate_run(struct doveadm_mail_cmd_context *ctx, struct mail_user *user)
+{
+	const enum mailbox_list_iter_flags iter_flags =
+		MAILBOX_LIST_ITER_NO_AUTO_BOXES |
+		MAILBOX_LIST_ITER_RETURN_NO_FLAGS;
+	struct doveadm_mailbox_list_iter *iter;
+	const struct mailbox_info *info;
+	int ret = 0;
+
+	iter = doveadm_mailbox_list_iter_init(ctx, user, ctx->search_args,
+					      iter_flags);
+	while ((info = doveadm_mailbox_list_iter_next(iter)) != NULL) T_BEGIN {
+		if (cmd_deduplicate_box(ctx, info, ctx->search_args) < 0)
+			ret = -1;
+	} T_END;
+	if (doveadm_mailbox_list_iter_deinit(&iter) < 0)
+		ret = -1;
+	return ret;
+}
+
+static void cmd_deduplicate_init(struct doveadm_mail_cmd_context *ctx,
+				 const char *const args[])
+{
+	if (args[0] == NULL)
+		doveadm_mail_help_name("deduplicate");
+
+	ctx->search_args = doveadm_mail_build_search_args(args);
+}
+
+static bool
+cmd_deduplicate_parse_arg(struct doveadm_mail_cmd_context *_ctx, int c)
+{
+	struct deduplicate_cmd_context *ctx =
+		(struct deduplicate_cmd_context *)_ctx;
+
+	switch (c) {
+	case 'm':
+		ctx->by_msgid = TRUE;
+		break;
+	default:
+		return FALSE;
+	}
+	return TRUE;
+}
+
+static struct doveadm_mail_cmd_context *cmd_deduplicate_alloc(void)
+{
+	struct deduplicate_cmd_context *ctx;
+
+	ctx = doveadm_mail_cmd_alloc(struct deduplicate_cmd_context);
+	ctx->ctx.getopt_args = "m";
+	ctx->ctx.v.parse_arg = cmd_deduplicate_parse_arg;
+	ctx->ctx.v.init = cmd_deduplicate_init;
+	ctx->ctx.v.run = cmd_deduplicate_run;
+	return &ctx->ctx;
+}
+
+struct doveadm_mail_cmd cmd_deduplicate = {
+	cmd_deduplicate_alloc, "deduplicate", "[-m] <search query>"
+};
diff -r 5e51c5545029 -r 3683d7bff095 src/doveadm/doveadm-mail.c
--- a/src/doveadm/doveadm-mail.c	Tue Jun 18 15:14:42 2013 +0300
+++ b/src/doveadm/doveadm-mail.c	Tue Jun 18 17:05:20 2013 +0300
@@ -699,6 +699,7 @@
 	&cmd_index,
 	&cmd_altmove,
 	&cmd_copy,
+	&cmd_deduplicate,
 	&cmd_move,
 	&cmd_mailbox_list,
 	&cmd_mailbox_create,
diff -r 5e51c5545029 -r 3683d7bff095 src/doveadm/doveadm-mail.h
--- a/src/doveadm/doveadm-mail.h	Tue Jun 18 15:14:42 2013 +0300
+++ b/src/doveadm/doveadm-mail.h	Tue Jun 18 17:05:20 2013 +0300
@@ -145,6 +145,7 @@
 extern struct doveadm_mail_cmd cmd_index;
 extern struct doveadm_mail_cmd cmd_altmove;
 extern struct doveadm_mail_cmd cmd_copy;
+extern struct doveadm_mail_cmd cmd_deduplicate;
 extern struct doveadm_mail_cmd cmd_move;
 extern struct doveadm_mail_cmd cmd_mailbox_list;
 extern struct doveadm_mail_cmd cmd_mailbox_create;


More information about the dovecot-cvs mailing list