[2.3.8] possible replication issue
Carsten Rosenberg
cr at ncxs.de
Fri Oct 18 14:52:37 EEST 2019
Hi,
some of our customers have discovered a replication issue after
upgraded from 2.3.7.2 to 2.3.8.
Running 2.3.8 several replication connections are hanging until defined
timeout. So after some seconds there are $replication_max_conns hanging
connections.
Other replications are running fast and successful.
Also running a doveadm sync tcp:... is working fine for all users.
I can't see exactly, but I haven't seen mailboxes timeouting again and
again. So I would assume it's not related to the mailbox.
>From the logs:
server1:
Oct 16 08:29:25 server1 dovecot[5715]:
dsync-local(username1 at domain.com)<FXnVDW22pl0tGAAA1cwDxA>: Error:
dsync(172.16.0.1): I/O has stalled, no activity for 600 seconds (version
not received)
Oct 16 08:29:25 server1 dovecot[5715]:
dsync-local(username1 at domain.com)<FXnVDW22pl0tGAAA1cwDxA>: Error:
Timeout during state=master_recv_handshake
server2:
Oct 16 08:29:25 server2 dovecot[8113]: doveadm: Error: read(server1)
failed: EOF (last sent=handshake, last recv=handshake)
There aren't any additional logs regarding the replication.
I have tried increasing vsz_limit or reducing replication_max_conns.
Nothing changed.
--
Both customers have 10k+ users. Currently I couldn't reproduce this on
smaller test systems.
Both installation were downgraded to 2.3.7.2 to fix the issue for now
--
I've attached a tcpdump showing the client showing the client stops
sending any data after the mailbox_guid table headers.
Any idea what could be wrong here or the debug this issue?
Thanks.
Carsten Rosenberg
-------------- next part --------------
root at server1:~# doveconf -n
# 2.3.7.2 (3c910f64b): /etc/dovecot/dovecot.conf
# Pigeonhole version 0.5.7.2 (7372921a)
# OS: Linux 4.15.0-65-generic x86_64 Ubuntu 18.04.3 LTS
# Hostname: server1
auth_cache_negative_ttl = 0
auth_cache_size = 10 M
auth_master_user_separator = *
auth_worker_max_count = 1024
base_dir = /var/run/dovecot/
default_client_limit = 10000
default_vsz_limit = 1 G
doveadm_password = # hidden, use -P to show it
doveadm_port = 12345
first_valid_gid = 10000
first_valid_uid = 10000
imap_max_line_length = 640 k
last_valid_gid = 10000
last_valid_uid = 10000
mail_gid = 10000
mail_location = mdbox:%h/mdbox
mail_plugins = " mail_log notify zlib notify replication"
mail_privileged_group = mail
mail_uid = 10000
managesieve_notify_capability = mailto
managesieve_sieve_capability = fileinto reject envelope encoded-character vacation subaddress comparator-i;ascii-numeric relational regex imap4flags copy include variables body enotify environment mailbox date index ihave duplicate mime foreverypart extracttext
namespace inbox {
hidden = no
inbox = yes
list = yes
location =
prefix =
separator = /
subscriptions = yes
type = private
}
passdb {
args = /etc/dovecot.deny
deny = yes
driver = passwd-file
}
passdb {
args = /etc/dovecot/private/passwd.masterusers
driver = passwd-file
master = yes
}
passdb {
args = /etc/dovecot/dovecot-ldap-passdb.conf.ext
driver = ldap
}
plugin {
mail_replica = tcp:server2
sieve = file:~/sieve;active=~/.dovecot.sieve
sieve_default = /var/lib/dovecot/default.sieve
sieve_max_actions = 55
sieve_max_redirects = 50
}
pop3_uidl_format = %08Xv%08Xu
protocols = imap pop3 lmtp sieve
replication_dsync_parameters = -d -n INBOX -l 30 -U
replication_max_conns = 20
service aggregator {
fifo_listener replication-notify-fifo {
user = vmail
}
unix_listener replication-notify {
user = vmail
}
}
service auth-worker {
user = $default_internal_user
}
service auth {
client_limit = 10000
}
service config {
process_min_avail = 8
}
service doveadm {
inet_listener {
port = 12345
}
vsz_limit = 1 G
}
service imap-login {
process_min_avail = 64
service_count = 0
}
service imap {
process_limit = 8192
}
service lmtp {
inet_listener lmtp {
port = 24
}
}
service managesieve-login {
inet_listener sieve {
port = 4190
}
process_min_avail = 8
service_count = 0
}
service pop3-login {
process_min_avail = 8
service_count = 0
}
service replicator {
process_min_avail = 1
unix_listener replicator-doveadm {
mode = 0600
user = vmail
}
}
service submission-login {
service_count = 0
}
ssl = required
ssl_ca = </etc/ssl/certs/chain.pem
ssl_cert = </etc/ssl/certs/cert.pem
ssl_client_ca_dir = /etc/ssl/certs
ssl_dh = # hidden, use -P to show it
ssl_key = # hidden, use -P to show it
ssl_require_crl = no
userdb {
args = /etc/dovecot/dovecot-ldap-userdb.conf.ext
driver = ldap
name = userdb_ldap
}
protocol imap {
mail_max_userip_connections = 25
mail_plugins = " mail_log notify zlib notify replication imap_zlib"
}
protocol lmtp {
mail_plugins = " mail_log notify zlib notify replication sieve"
}
-------------- next part --------------
VERSION doveadm-server 1 1
VERSION doveadm-client 1 1
-
PLAIN xxxx...
+
username1 dsync-server -uusername1 -U
.....
+
VERSION dsync 3 5
Hhostname sync_ns_prefix sync_box sync_box_guid sync_type debug
sync_visible_namespaces exclude_mailboxes send_mail_requests
backup_send backup_recv lock_timeout no_mail_sync no_mailbox_renames
no_backup_overwrite purge_remote no_notify sync_since_timestamp
sync_max_size sync_flags sync_until_timestamp virtual_all_box
empty_hdr_workaround import_commit_msgs_interval hashed_headers
Smailbox_guid last_uidvalidity last_common_uid last_common_modseq
last_common_pvt_modseq last_messages_count changes_during_sync
Nname existence mailbox_guid uid_validity uid_next
last_renamed_or_created subscribed last_subscription_change
Dhierarchy_sep mailboxes dirs unsubscribes
Bmailbox_guid uid_validity uid_next messages_count first_recent_uid
highest_modseq highest_pvt_modseq mailbox_lost mailbox_ignore
cache_fields have_guids have_save_guids have_only_guid128
Atype key value stream deleted last_change modseq
Ctype uid guid hdr_hash modseq pvt_modseq add_flags remove_flags
final_flags keywords_reset keyword_changes received_timestamp virtual_size
Rguid uid
Mguid uid pop3_uidl pop3_order received_date saved_date stream
Ferror mail_error require_full_resync
cname decision last_used
.
....JHserver2 . . . . . . . . . . . . . . . . . . . . . .
.
VERSION dsync 3 5
Hhostname sync_ns_prefix sync_box sync_box_guid sync_type debug
sync_visible_namespaces exclude_mailboxes send_mail_requests
backup_send backup_recv lock_timeout no_mail_sync no_mailbox_renames
no_backup_overwrite purge_remote no_notify sync_since_timestamp
sync_max_size sync_flags sync_until_timestamp virtual_all_box
empty_hdr_workaround import_commit_msgs_interval hashed_headers
Smailbox_guid last_uidvalidity last_common_uid last_common_modseq
last_common_pvt_modseq last_messages_count changes_during_sync
Nname existence mailbox_guid uid_validity uid_next
last_renamed_or_created subscribed last_subscription_change
Dhierarchy_sep mailboxes dirs unsubscribes
Bmailbox_guid uid_validity uid_next messages_count first_recent_uid
highest_modseq highest_pvt_modseq mailbox_lost mailbox_ignore
cache_fields have_guids have_save_guids have_only_guid128
Atype key value stream deleted last_change modseq
Ctype uid guid hdr_hash modseq pvt_modseq add_flags remove_flags
final_flags keywords_reset keyword_changes received_timestamp virtual_size
Rguid uid
Mguid uid pop3_uidl pop3_order received_date saved_date stream
Ferror mail_error require_full_resync
cname decision last_used
.
Hserver1 . . s . . . . . 20 . . . . . . . . . . . 100
Date.tMessage-ID.t
L...Z.read(server1) failed: EOF (last
sent=handshake, last recv=handshake)
More information about the dovecot
mailing list