dovecot-2.2: replicator: If sync fails, retry it in 5 minutes.

dovecot at dovecot.org dovecot at dovecot.org
Sun Mar 24 17:22:02 EET 2013


details:   http://hg.dovecot.org/dovecot-2.2/rev/c4138d8db3dd
changeset: 16100:c4138d8db3dd
user:      Timo Sirainen <tss at iki.fi>
date:      Sun Mar 24 17:21:49 2013 +0200
description:
replicator: If sync fails, retry it in 5 minutes.

diffstat:

 src/replication/replicator/replicator-queue.c |  50 ++++++++++++++++++++++----
 src/replication/replicator/replicator-queue.h |   6 ++-
 src/replication/replicator/replicator.c       |   5 ++-
 3 files changed, 50 insertions(+), 11 deletions(-)

diffs (144 lines):

diff -r 1af7c6f22793 -r c4138d8db3dd src/replication/replicator/replicator-queue.c
--- a/src/replication/replicator/replicator-queue.c	Sun Mar 24 17:04:22 2013 +0200
+++ b/src/replication/replicator/replicator-queue.c	Sun Mar 24 17:21:49 2013 +0200
@@ -30,6 +30,7 @@
 	ARRAY(struct replicator_sync_lookup) sync_lookups;
 
 	unsigned int full_sync_interval;
+	unsigned int failure_resync_interval;
 
 	void (*change_callback)(void *context);
 	void *change_context;
@@ -50,6 +51,18 @@
 			return -1;
 		if (user1->last_fast_sync > user2->last_fast_sync)
 			return 1;
+	} else if (user1->last_sync_failed != user2->last_sync_failed) {
+		/* resync failures first */
+		if (user1->last_sync_failed)
+			return -1;
+		else
+			return 1;
+	} else if (user1->last_sync_failed) {
+		/* both have failed. resync failures with fast-sync timestamp */
+		if (user1->last_fast_sync < user2->last_fast_sync)
+			return -1;
+		if (user1->last_fast_sync > user2->last_fast_sync)
+			return 1;
 	} else {
 		/* nothing to replicate, but do still periodic full syncs */
 		if (user1->last_full_sync < user2->last_full_sync)
@@ -60,12 +73,15 @@
 	return 0;
 }
 
-struct replicator_queue *replicator_queue_init(unsigned int full_sync_interval)
+struct replicator_queue *
+replicator_queue_init(unsigned int full_sync_interval,
+		      unsigned int failure_resync_interval)
 {
 	struct replicator_queue *queue;
 
 	queue = i_new(struct replicator_queue, 1);
 	queue->full_sync_interval = full_sync_interval;
+	queue->failure_resync_interval = failure_resync_interval;
 	queue->user_queue = priorityq_init(user_priority_cmp, 1024);
 	hash_table_create(&queue->user_hash, default_pool, 1024,
 			  str_hash, strcmp);
@@ -182,13 +198,35 @@
 		queue->change_callback(queue->change_context);
 }
 
+static bool
+replicator_queue_can_sync_now(struct replicator_queue *queue,
+			      struct replicator_user *user,
+			      unsigned int *next_secs_r)
+{
+	time_t next_sync;
+
+	if (user->priority != REPLICATION_PRIORITY_NONE)
+		return TRUE;
+
+	if (user->last_sync_failed) {
+		next_sync = user->last_fast_sync +
+			queue->failure_resync_interval;
+	} else {
+		next_sync = user->last_full_sync + queue->full_sync_interval;
+	}
+	if (next_sync <= ioloop_time)
+		return TRUE;
+
+	*next_secs_r = next_sync - ioloop_time;
+	return FALSE;
+}
+
 struct replicator_user *
 replicator_queue_pop(struct replicator_queue *queue,
 		     unsigned int *next_secs_r)
 {
 	struct priorityq_item *item;
 	struct replicator_user *user;
-	time_t next_full_sync;
 
 	item = priorityq_peek(queue->user_queue);
 	if (item == NULL) {
@@ -197,12 +235,8 @@
 		return NULL;
 	}
 	user = (struct replicator_user *)item;
-
-	next_full_sync = user->last_full_sync + queue->full_sync_interval;
-	if (user->priority == REPLICATION_PRIORITY_NONE &&
-	    next_full_sync > ioloop_time) {
-		/* we don't want to do a full sync yet */
-		*next_secs_r = next_full_sync - ioloop_time;
+	if (!replicator_queue_can_sync_now(queue, user, next_secs_r)) {
+		/* we don't want to sync the user yet */
 		return NULL;
 	}
 	priorityq_remove(queue->user_queue, &user->item);
diff -r 1af7c6f22793 -r c4138d8db3dd src/replication/replicator/replicator-queue.h
--- a/src/replication/replicator/replicator-queue.h	Sun Mar 24 17:04:22 2013 +0200
+++ b/src/replication/replicator/replicator-queue.h	Sun Mar 24 17:21:49 2013 +0200
@@ -12,7 +12,7 @@
 	char *state;
 	/* last time this user's state was updated */
 	time_t last_update;
-	/* last_fast_run is always >= last_full_run. */
+	/* last_fast_sync is always >= last_full_sync. */
 	time_t last_fast_sync, last_full_sync;
 
 	enum replication_priority priority;
@@ -24,7 +24,9 @@
 
 typedef void replicator_sync_callback_t(bool success, void *context);
 
-struct replicator_queue *replicator_queue_init(unsigned int full_sync_interval);
+struct replicator_queue *
+replicator_queue_init(unsigned int full_sync_interval,
+		      unsigned int failure_resync_interval);
 void replicator_queue_deinit(struct replicator_queue **queue);
 
 /* Call the specified callback when data is added/removed/moved in queue
diff -r 1af7c6f22793 -r c4138d8db3dd src/replication/replicator/replicator.c
--- a/src/replication/replicator/replicator.c	Sun Mar 24 17:04:22 2013 +0200
+++ b/src/replication/replicator/replicator.c	Sun Mar 24 17:21:49 2013 +0200
@@ -14,6 +14,8 @@
 
 #define REPLICATOR_AUTH_SERVICE_NAME "replicator"
 #define REPLICATOR_DB_DUMP_INTERVAL_MSECS (1000*60*15)
+/* if syncing fails, try again in 5 minutes */
+#define REPLICATOR_FAILURE_RESYNC_INTERVAL_SECS (60*5)
 #define REPLICATOR_DB_FNAME "replicator.db"
 
 static struct replicator_queue *queue;
@@ -79,7 +81,8 @@
 	sets = master_service_settings_get_others(master_service);
 	set = sets[0];
 
-	queue = replicator_queue_init(set->replication_full_sync_interval);
+	queue = replicator_queue_init(set->replication_full_sync_interval,
+				      REPLICATOR_FAILURE_RESYNC_INTERVAL_SECS);
 	replication_add_users(queue);
 	to_dump = timeout_add(REPLICATOR_DB_DUMP_INTERVAL_MSECS,
 			      replicator_dump_timeout, (void *)NULL);


More information about the dovecot-cvs mailing list