We are unfortunately still seeing a lot of these errors once the machine reaches a high number of concurrent users/logins (just below 20k simultaneous IMAP connections on a powerful 24 core machine with 128GB RAM):
2020-01-14T09:18:58.661349+01:00 dovecot: imap-login: Warning: net_connect_unix(imap) succeeded only after retrying - took 140330 us 2020-01-14T09:18:58.854692+01:00 dovecot: imap-login: Error: master(imap): net_connect_unix(imap) failed: Resource temporarily unavailable - http://wiki2.dovecot.org/SocketUnavailable (client-pid=107932, client-id=780262, rip=x, created 500 msecs ago, received 0/4 bytes) 2020-01-14T09:18:58.888228+01:00 dovecot: imap-login: Warning: net_connect_unix(imap) succeeded only after retrying - took 440762 us
The machine is at insignificant load numbers, and the dovecot process is somewhere near 25% CPU usage when the problem occurs. It is not close to saturating its core from what I can tell.
To make sure the issues are not task/fd-limit related, I have set this in /etc/systemd/system/dovecot.service.d/service.conf:
[Service] LimitNOFILE=infinity TasksMax=infinity
~# egrep "processes|files" /proc/pidof dovecot
/limits
Max processes 514051 514051
Max open files 1048576 1048576
~# cat /proc/sys/net/core/somaxconn
65536
~# cat /proc/sys/kernel/pid_max
278528
Dovecot is configured with NFS backed storage, and locally stored passwdfile userdb/passdb are used for authentication.
Backends are (now) behind directors. The directors/proxies are having no issues dealing with the traffic whatsoever.
At the time of failure, the process list looks like this:
~# ps -f --ppid pidof dovecot
| egrep -v "dovecot/(imap|pop3) "
UID PID PPID C STIME TTY TIME CMD
274264 44753 138506 0 12:43 ? 00:00:00 [imap]
308665 104852 138506 0 13:01 ? 00:00:00 [imap]
308665 104853 138506 0 13:01 ? 00:00:00 [imap]
dovenull 138508 138506 1 10:31 ? 00:03:00 dovecot/pop3-login [6
pre-login + 36 TLS proxies]
dovenull 138509 138506 0 10:31 ? 00:00:07 dovecot/imap-login
dovecot 138510 138506 0 10:31 ? 00:01:10 dovecot/anvil [20
connections]
root 138511 138506 1 10:31 ? 00:02:14 dovecot/log
dovenull 138512 138506 1 10:31 ? 00:01:48 dovecot/pop3-login [1
pre-login + 15 TLS proxies]
dovenull 138513 138506 0 10:31 ? 00:00:08 dovecot/imap-login
[redacted TLS proxy]
dovenull 138514 138506 0 10:31 ? 00:00:07 dovecot/imap-login [0
pre-login + 3 TLS proxies]
dovenull 138515 138506 0 10:31 ? 00:00:10 dovecot/imap-login
[redacted TLS proxy]
dovenull 138516 138506 0 10:31 ? 00:01:14 dovecot/imap-login [27
pre-login + 12 TLS proxies]
dovenull 138517 138506 0 10:31 ? 00:00:31 dovecot/imap-login [2
pre-login + 2 TLS proxies]
dovenull 138518 138506 0 10:31 ? 00:01:28 dovecot/imap-login [56
pre-login + 20 TLS proxies]
dovenull 138519 138506 0 10:31 ? 00:00:09 dovecot/imap-login [0
pre-login + 4 TLS proxies]
dovenull 138520 138506 0 10:31 ? 00:00:06 dovecot/imap-login
[redacted TLS proxy]
dovenull 138521 138506 0 10:31 ? 00:00:11 dovecot/imap-login [0
pre-login + 3 TLS proxies]
dovenull 138522 138506 0 10:31 ? 00:00:16 dovecot/imap-login [2
pre-login + 2 TLS proxies]
dovenull 138523 138506 0 10:31 ? 00:00:13 dovecot/imap-login [1
pre-login + 2 TLS proxies]
dovenull 138524 138506 0 10:31 ? 00:00:24 dovecot/imap-login [1
pre-login + 3 TLS proxies]
dovenull 138525 138506 0 10:31 ? 00:01:13 dovecot/imap-login [36
pre-login + 23 TLS proxies]
dovenull 138526 138506 0 10:31 ? 00:00:41 dovecot/imap-login [10
pre-login + 12 TLS proxies]
dovenull 138527 138506 0 10:31 ? 00:00:20 dovecot/imap-login [1
pre-login + 7 TLS proxies]
root 138528 138506 2 10:31 ? 00:04:45 dovecot/config
dovecot 138529 138506 1 10:31 ? 00:02:22 dovecot/stats [19389
connections]
dovecot 138530 138506 3 10:31 ? 00:05:53 dovecot/auth [137
wait, 0 passdb, 0 userdb]
root 148675 138506 0 13:13 ? 00:00:00 dovecot/doveadm-server
[redacted]
Other stats:
~# ps -f --ppid pidof dovecot
| grep "dovecot/imap " | wc -l
19328
~# doveadm process status | grep "^imap " | wc -l
19307
~# ss -ntp "( sport = :143 or sport = :993 )" | grep "\"imap\"" | wc -l
19141
~# ss -ntp "( sport = :143 or sport = :993 )" | grep "\"imap-login\"" |
wc -l
333
~# ss -ntp "( sport = :143 or sport = :993 )" | wc -l
19530
~# doveconf -n
# 2.3.9.2 (cf2918cac): /etc/dovecot/dovecot.conf # OS: Linux 4.9.0-9-amd64 x86_64 Debian 9.11 # Hostname: [redacted] default_vsz_limit = 768 M disable_plaintext_auth = no imap_id_log = * log_timestamp = "%F %T %z " login_trusted_networks = [redacted] mail_fsync = always mail_location = maildir:~/Maildir mail_nfs_index = yes mail_nfs_storage = yes mmap_disable = yes namespace inbox { inbox = yes location = mailbox Drafts { auto = subscribe special_use = \Drafts } mailbox Sent { auto = subscribe special_use = \Sent } mailbox "Sent Messages" { auto = no special_use = \Sent } mailbox Spam { auto = create special_use = \Junk } mailbox Trash { auto = subscribe special_use = \Trash } prefix = separator = / } passdb { args = username_format=%Lu /etc/dovecot/aliases default_fields = noauthenticate driver = passwd-file } passdb { args = /etc/dovecot/passwd driver = passwd-file } protocols = imap pop3 service doveadm { inet_listener { port = 24245 } inet_listener http { port = 8080 } } service imap-login { client_limit = 4096 inet_listener imap { port = 143 } inet_listener imaps { port = 993 ssl = yes } process_limit = 16 process_min_avail = 16 service_count = 0 vsz_limit = 768 M } service imap { client_limit = 1 process_limit = 65536 } service pop3-login { client_limit = 4096 inet_listener pop3 { port = 110 } inet_listener pop3s { port = 995 ssl = yes } process_limit = 8 process_min_avail = 2 service_count = 0 } service pop3 { process_limit = 16384 } service stats { client_limit = 65536 } ssl_cert = <[redacted] ssl_cipher_list = ALL:!LOW:!SSLv2:!EXP:!aNULL ssl_dh = # hidden, use -P to show it ssl_key = # hidden, use -P to show it userdb { args = /etc/dovecot/passwd driver = passwd-file } verbose_proctitle = yes protocol imap { mail_max_userip_connections = 20 rawlog_dir = [redacted] }
Are there any other tunables either in Dovecot or in the kernel that may relate to this issue that we may have missed?
-- Eirik
On 06/01/2020 11:28, Eirik Rye wrote:
Hi,
After upgrading Dovecot from version 2.2.27 to 2.3.9.2, we are sporadically seeing lots of these errors in the error log on many of our servers:
imap-login: Error: master(imap): net_connect_unix(imap) failed: Resource temporarily unavailable - http://wiki2.dovecot.org/SocketUnavailable
The issue is causing significant delays and/or timeouts on login.
From what I can tell, this happens because the imap-login service is unable to connect to the imap service socket, however I am unable to identify the root cause of the issue.
We have increased the client_limit for the stats service, as described in the upgrade document at (https://wiki2.dovecot.org/Upgrading/2.3). It is set to the same value as the process_limit for imap service.
System CPU usage does not appear to be saturated. The CPU usage sits at around 50% when these errors appear.
We have doubled the number of file descriptors after upgrading from 2.2.27. It was previously set to 16392 (which worked fine):
~# cat /proc/
pidof dovecot
/limits | grep "open files" Max open files 30000 30000 filesWe have tried increasing default_process_limit and default_client_limit from the default 1000 to 3000, but this has no effect.
Current configuration (with irrelevant parts removed):
~# doveconf -n
default_client_limit = 3000 # these were raised after upgrading in attempt to remedy default_process_limit = 3000 default_vsz_limit = 512 M [...] service imap-login { client_limit = 1250 inet_listener imap { port = 143 } inet_listener imaps { port = 993 ssl = yes } process_limit = 16 process_min_avail = 16 service_count = 0 vsz_limit = 512 M } service imap { client_limit = 1 process_limit = 20000 } service stats { client_limit = 20000 } [...]
We appear to be nowhere near the 20000 process_limit set for the imap service. We are also not seeing any warnings being logged with regards to the process_limit.
~# doveadm who -1 | wc -l 8765
~# doveadm process status | grep "^imap " | wc -l 7583
~# ps aux | grep "dovecot/imap" | wc -l 7449
~# doveadm process status | grep -v "^imap " name pid available_count total_count idle_start last_status_update last_kill_sent stats 19875 12384 582387 0 1578305489 0 pop3-login 19868 1000 0 0 1578061504 0 pop3-login 19867 1000 0 0 1578061503 0 pop3-login 19866 1000 0 0 1578061502 0 pop3-login 19865 1000 0 0 1578061503 0 pop3-login 19864 1000 0 0 1578061503 0 pop3-login 19853 1000 0 0 1578061504 0 pop3-login 19852 1000 0 0 1578061504 0 pop3-login 19851 1000 0 0 1578061504 0 pop3-login 19849 1000 0 0 1578061503 0 pop3-login 19847 1000 0 0 1578061503 0 pop3-login 19846 1000 0 0 1578061503 0 pop3-login 19845 1000 0 0 1578061503 0 pop3-login 19844 1000 0 0 1578061503 0 pop3-login 19843 1000 0 0 1578061503 0 pop3-login 19842 1000 0 0 1578061503 0 pop3-login 19839 1000 0 0 1578061502 0 log 19841 2973 27 0 1578061502 0 imap-login 19873 666 14942 0 1578305488 0 imap-login 19872 523 32792 0 1578305489 0 imap-login 19871 316 62876 0 1578305489 0 imap-login 19870 167 78332 0 1578305489 0 imap-login 19869 118 97989 0 1578305489 0 imap-login 19863 0 184726 0 1578305489 0 imap-login 19862 0 193094 0 1578305489 0 imap-login 19861 0 186800 0 1578305487 0 imap-login 19860 675 17806 0 1578305487 0 imap-login 19859 0 169446 0 1578305489 0 imap-login 19858 0 143517 0 1578305489 0 imap-login 19857 0 119215 0 1578305488 0 imap-login 19856 0 151958 0 1578305489 0 imap-login 19855 483 47036 0 1578305488 0 imap-login 19854 1 185240 0 1578305489 0 imap-login 19840 567 23859 0 1578305485 0 config 19874 2967 124482 0 1578305489 0 auth 19885 2967 124185 0 1578305489 0 anvil 10017 967 7567 0 1578305463 0