[2.3.8] possible replication issue

Carsten Rosenberg cr at ncxs.de
Fri Oct 18 14:52:37 EEST 2019


Hi,

some of our customers have discovered a replication issue after
upgraded from 2.3.7.2 to 2.3.8.

Running 2.3.8 several replication connections are hanging until defined
timeout. So after some seconds there are $replication_max_conns hanging
connections.
Other replications are running fast and successful.

Also running a doveadm sync tcp:... is working fine for all users.

I can't see exactly, but I haven't seen mailboxes timeouting again and
again. So I would assume it's not related to the mailbox.

>From the logs:

server1:
Oct 16 08:29:25 server1 dovecot[5715]:
dsync-local(username1 at domain.com)<FXnVDW22pl0tGAAA1cwDxA>: Error:
dsync(172.16.0.1): I/O has stalled, no activity for 600 seconds (version
not received)
Oct 16 08:29:25 server1 dovecot[5715]:
dsync-local(username1 at domain.com)<FXnVDW22pl0tGAAA1cwDxA>: Error:
Timeout during state=master_recv_handshake

server2:

Oct 16 08:29:25 server2 dovecot[8113]: doveadm: Error: read(server1)
failed: EOF (last sent=handshake, last recv=handshake)

There aren't any additional logs regarding the replication.

I have tried increasing vsz_limit or reducing replication_max_conns.
Nothing changed.

--

Both customers have 10k+ users. Currently I couldn't reproduce this on
smaller test systems.

Both installation were downgraded to 2.3.7.2 to fix the issue for now

--

I've attached a tcpdump showing the client showing the client stops
sending any data after the mailbox_guid table headers.



Any idea what could be wrong here or the debug this issue?

Thanks.

Carsten Rosenberg
-------------- next part --------------
root at server1:~# doveconf -n
# 2.3.7.2 (3c910f64b): /etc/dovecot/dovecot.conf
# Pigeonhole version 0.5.7.2 (7372921a)
# OS: Linux 4.15.0-65-generic x86_64 Ubuntu 18.04.3 LTS
# Hostname: server1
auth_cache_negative_ttl = 0
auth_cache_size = 10 M
auth_master_user_separator = *
auth_worker_max_count = 1024
base_dir = /var/run/dovecot/
default_client_limit = 10000
default_vsz_limit = 1 G
doveadm_password = # hidden, use -P to show it
doveadm_port = 12345
first_valid_gid = 10000
first_valid_uid = 10000
imap_max_line_length = 640 k
last_valid_gid = 10000
last_valid_uid = 10000
mail_gid = 10000
mail_location = mdbox:%h/mdbox
mail_plugins = " mail_log notify zlib notify replication"
mail_privileged_group = mail
mail_uid = 10000
managesieve_notify_capability = mailto
managesieve_sieve_capability = fileinto reject envelope encoded-character vacation subaddress comparator-i;ascii-numeric relational regex imap4flags copy include variables body enotify environment mailbox date index ihave duplicate mime foreverypart extracttext
namespace inbox {
  hidden = no
  inbox = yes
  list = yes
  location =
  prefix =
  separator = /
  subscriptions = yes
  type = private
}
passdb {
  args = /etc/dovecot.deny
  deny = yes
  driver = passwd-file
}
passdb {
  args = /etc/dovecot/private/passwd.masterusers
  driver = passwd-file
  master = yes
}
passdb {
  args = /etc/dovecot/dovecot-ldap-passdb.conf.ext
  driver = ldap
}
plugin {
  mail_replica = tcp:server2
  sieve = file:~/sieve;active=~/.dovecot.sieve
  sieve_default = /var/lib/dovecot/default.sieve
  sieve_max_actions = 55
  sieve_max_redirects = 50
}
pop3_uidl_format = %08Xv%08Xu
protocols = imap pop3 lmtp sieve
replication_dsync_parameters = -d -n INBOX -l 30 -U
replication_max_conns = 20
service aggregator {
  fifo_listener replication-notify-fifo {
    user = vmail
  }
  unix_listener replication-notify {
    user = vmail
  }
}
service auth-worker {
  user = $default_internal_user
}
service auth {
  client_limit = 10000
}
service config {
  process_min_avail = 8
}
service doveadm {
  inet_listener {
    port = 12345
  }
  vsz_limit = 1 G
}
service imap-login {
  process_min_avail = 64
  service_count = 0
}
service imap {
  process_limit = 8192
}
service lmtp {
  inet_listener lmtp {
    port = 24
  }
}
service managesieve-login {
  inet_listener sieve {
    port = 4190
  }
  process_min_avail = 8
  service_count = 0
}
service pop3-login {
  process_min_avail = 8
  service_count = 0
}
service replicator {
  process_min_avail = 1
  unix_listener replicator-doveadm {
    mode = 0600
    user = vmail
  }
}
service submission-login {
  service_count = 0
}
ssl = required
ssl_ca = </etc/ssl/certs/chain.pem
ssl_cert = </etc/ssl/certs/cert.pem
ssl_client_ca_dir = /etc/ssl/certs
ssl_dh = # hidden, use -P to show it
ssl_key = # hidden, use -P to show it
ssl_require_crl = no
userdb {
  args = /etc/dovecot/dovecot-ldap-userdb.conf.ext
  driver = ldap
  name = userdb_ldap
}
protocol imap {
  mail_max_userip_connections = 25
  mail_plugins = " mail_log notify zlib notify replication imap_zlib"
}
protocol lmtp {
  mail_plugins = " mail_log notify zlib notify replication sieve"
}
-------------- next part --------------
VERSION	doveadm-server	1	1
VERSION	doveadm-client	1	1
-
PLAIN	xxxx...
+
	username1	dsync-server	-uusername1	-U
.....
+
VERSION	dsync	3	5
Hhostname	sync_ns_prefix	sync_box	sync_box_guid	sync_type	debug
sync_visible_namespaces	exclude_mailboxes	send_mail_requests
backup_send	backup_recv	lock_timeout	no_mail_sync	no_mailbox_renames
no_backup_overwrite	purge_remote	no_notify	sync_since_timestamp
sync_max_size	sync_flags	sync_until_timestamp	virtual_all_box
empty_hdr_workaround	import_commit_msgs_interval	hashed_headers
Smailbox_guid	last_uidvalidity	last_common_uid	last_common_modseq
last_common_pvt_modseq	last_messages_count	changes_during_sync
Nname	existence	mailbox_guid	uid_validity	uid_next
last_renamed_or_created	subscribed	last_subscription_change
Dhierarchy_sep	mailboxes	dirs	unsubscribes
Bmailbox_guid	uid_validity	uid_next	messages_count	first_recent_uid
highest_modseq	highest_pvt_modseq	mailbox_lost	mailbox_ignore
cache_fields	have_guids	have_save_guids	have_only_guid128
Atype	key	value	stream	deleted	last_change	modseq
Ctype	uid	guid	hdr_hash	modseq	pvt_modseq	add_flags	remove_flags
final_flags	keywords_reset	keyword_changes	received_timestamp	virtual_size
Rguid	uid
Mguid	uid	pop3_uidl	pop3_order	received_date	saved_date	stream
Ferror	mail_error	require_full_resync
cname	decision	last_used
.
....JHserver2	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
.
VERSION	dsync	3	5
Hhostname	sync_ns_prefix	sync_box	sync_box_guid	sync_type	debug
sync_visible_namespaces	exclude_mailboxes	send_mail_requests
backup_send	backup_recv	lock_timeout	no_mail_sync	no_mailbox_renames
no_backup_overwrite	purge_remote	no_notify	sync_since_timestamp
sync_max_size	sync_flags	sync_until_timestamp	virtual_all_box
empty_hdr_workaround	import_commit_msgs_interval	hashed_headers
Smailbox_guid	last_uidvalidity	last_common_uid	last_common_modseq
last_common_pvt_modseq	last_messages_count	changes_during_sync
Nname	existence	mailbox_guid	uid_validity	uid_next
last_renamed_or_created	subscribed	last_subscription_change
Dhierarchy_sep	mailboxes	dirs	unsubscribes
Bmailbox_guid	uid_validity	uid_next	messages_count	first_recent_uid
highest_modseq	highest_pvt_modseq	mailbox_lost	mailbox_ignore
cache_fields	have_guids	have_save_guids	have_only_guid128
Atype	key	value	stream	deleted	last_change	modseq
Ctype	uid	guid	hdr_hash	modseq	pvt_modseq	add_flags	remove_flags
final_flags	keywords_reset	keyword_changes	received_timestamp	virtual_size
Rguid	uid
Mguid	uid	pop3_uidl	pop3_order	received_date	saved_date	stream
Ferror	mail_error	require_full_resync
cname	decision	last_used
.
Hserver1		.	.	s	.	.	.		.	.	20	.	.	.	.	.	.	.	.	.	.	.	100
Date.tMessage-ID.t
L...Z.read(server1) failed: EOF (last
sent=handshake, last recv=handshake)


More information about the dovecot mailing list