dovecot-2.2: dsync: Fixed again waiting for remote process wait ...

dovecot at dovecot.org dovecot at dovecot.org
Thu Aug 27 10:34:39 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/42d4da9ee7a9
changeset: 19020:42d4da9ee7a9
user:      Timo Sirainen <tss at iki.fi>
date:      Thu Aug 27 12:33:47 2015 +0200
description:
dsync: Fixed again waiting for remote process wait to die.
We can't rely on stderr getting closed. It doesn't happen if the remote
process crashes. Now waiting for SIGCHLD in ioloop should solve this and
still log all the error messages at exit.

diffstat:

 src/doveadm/doveadm-dsync.c |  55 ++++++++++++++++++++++++--------------------
 1 files changed, 30 insertions(+), 25 deletions(-)

diffs (141 lines):

diff -r ec6e672a6e32 -r 42d4da9ee7a9 src/doveadm/doveadm-dsync.c
--- a/src/doveadm/doveadm-dsync.c	Thu Aug 27 10:39:26 2015 +0200
+++ b/src/doveadm/doveadm-dsync.c	Thu Aug 27 12:33:47 2015 +0200
@@ -5,6 +5,7 @@
 #include "array.h"
 #include "execv-const.h"
 #include "fd-set-nonblock.h"
+#include "child-wait.h"
 #include "istream.h"
 #include "ostream.h"
 #include "iostream-ssl.h"
@@ -72,6 +73,8 @@
 	const char *local_location;
 	pid_t remote_pid;
 	const char *const *remote_cmd_args;
+	struct child_wait *child_wait;
+	int exit_status;
 
 	int fd_in, fd_out, fd_err;
 	struct io *io_err;
@@ -99,6 +102,7 @@
 	unsigned int no_mail_sync:1;
 	unsigned int local_location_from_arg:1;
 	unsigned int replicator_notify:1;
+	unsigned int exited:1;
 };
 
 static bool legacy_dsync = FALSE;
@@ -118,10 +122,6 @@
 	case -1:
 		if (ctx->io_err != NULL)
 			io_remove(&ctx->io_err);
-		if (ctx->fd_in == -1) {
-			/* we're shutting down. */
-			io_loop_stop(current_ioloop);
-		}
 		break;
 	default:
 		while ((line = i_stream_next_line(ctx->err_stream)) != NULL)
@@ -407,35 +407,33 @@
 	return doveadm_is_killed() ? -1 : 0;
 }
 
-static void cmd_dsync_wait_remote(struct dsync_cmd_context *ctx,
-				  int *status_r)
+static void cmd_dsync_remote_exited(const struct child_wait_status *status,
+				    struct dsync_cmd_context *ctx)
+{
+	ctx->exited = TRUE;
+	ctx->exit_status = status->status;
+	io_loop_stop(current_ioloop);
+}
+
+static void cmd_dsync_wait_remote(struct dsync_cmd_context *ctx)
 {
 	struct timeout *to;
 
-	/* wait for stderr to close. this indicates that the remote process
-	   has died. while we're running we're also reading and printing all
-	   errors that still coming from it. */
+	/* wait in ioloop for the remote process to die. while we're running
+	   we're also reading and printing all errors that still coming from
+	   it. */
 	to = timeout_add(DSYNC_REMOTE_CMD_EXIT_WAIT_SECS*1000,
 			 io_loop_stop, current_ioloop);
 	io_loop_run(current_ioloop);
 	timeout_remove(&to);
 
-	/* unless we timed out, the process should be dead now or very soon. */
-	alarm(1);
-	if (waitpid(ctx->remote_pid, status_r, 0) == -1) {
-		if (errno != EINTR) {
-			i_error("waitpid(%ld) failed: %m",
+	if (!ctx->exited) {
+		i_error("Remote command process isn't dying, killing it");
+		if (kill(ctx->remote_pid, SIGKILL) < 0 && errno != ESRCH) {
+			i_error("kill(%ld, SIGKILL) failed: %m",
 				(long)ctx->remote_pid);
-		} else {
-			i_error("Remote command process isn't dying, killing it");
-			if (kill(ctx->remote_pid, SIGKILL) < 0 && errno != ESRCH) {
-				i_error("kill(%ld, SIGKILL) failed: %m",
-					(long)ctx->remote_pid);
-			}
 		}
-		*status_r = -1;
 	}
-	alarm(0);
 }
 
 static void cmd_dsync_log_remote_status(int status, bool remote_errors_logged,
@@ -557,7 +555,7 @@
 	enum mail_error mail_error = 0, mail_error2;
 	bool remote_errors_logged = FALSE;
 	bool changes_during_sync = FALSE;
-	int status = 0, ret = 0;
+	int ret = 0;
 
 	memset(&set, 0, sizeof(set));
 	if (_ctx->cur_client_ip.family != 0) {
@@ -621,6 +619,7 @@
 	if (doveadm_debug)
 		brain_flags |= DSYNC_BRAIN_FLAG_DEBUG;
 
+	child_wait_init();
 	brain = dsync_brain_master_init(user, ibc, ctx->sync_type,
 					brain_flags, &set);
 
@@ -629,6 +628,8 @@
 					&changes_during_sync, &mail_error) < 0)
 			ret = -1;
 	} else {
+		ctx->child_wait = child_wait_new_with_pid(ctx->remote_pid,
+			cmd_dsync_remote_exited, ctx);
 		cmd_dsync_run_remote(user);
 	}
 
@@ -682,10 +683,11 @@
 	   shouldn't (at least with ssh) and we need stderr to be open to be
 	   able to print the final errors */
 	if (ctx->run_type == DSYNC_RUN_TYPE_CMD) {
-		cmd_dsync_wait_remote(ctx, &status);
+		cmd_dsync_wait_remote(ctx);
+		remote_error_input(ctx);
 		remote_errors_logged = ctx->err_stream->v_offset > 0;
 		i_stream_destroy(&ctx->err_stream);
-		cmd_dsync_log_remote_status(status, remote_errors_logged,
+		cmd_dsync_log_remote_status(ctx->exit_status, remote_errors_logged,
 					    ctx->remote_cmd_args);
 	} else {
 		i_assert(ctx->err_stream == NULL);
@@ -695,6 +697,9 @@
 	if (ctx->fd_err != -1)
 		i_close_fd(&ctx->fd_err);
 
+	if (ctx->child_wait != NULL)
+		child_wait_free(&ctx->child_wait);
+	child_wait_deinit();
 	return ret;
 }
 


More information about the dovecot-cvs mailing list