Hello
On latest git of dovecot, I get
Apr 24 04:07:36 gjserver dovecot[857958]: imap-login: Panic: file client-common.c: line 293 (client_disconnect): assertion failed: (client->prev == NULL && client->next == NULL)
and login process crash
On 2.3.14, there is no problems
Hope it helps
JM
chroot= does not resolve the issue
I have "chroot = login" in my conf
Here is the coredump
[root@gjserver coredump]# gdb /usr/libexec/dovecot/imap-login core.imap-login.76.bb9d94dc52924cc1a339505741fb5458.1632413.1619290159000000 GNU gdb (GDB) 10.1 Copyright (C) 2020 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. Type "show copying" and "show warranty" for details. This GDB was configured as "x86_64-pc-linux-gnu". Type "show configuration" for configuration details. For bug reporting instructions, please see: <https://www.gnu.org/software/gdb/bugs/>. Find the GDB manual and other documentation resources online at: <http://www.gnu.org/software/gdb/documentation/>.
For help, type "help". Type "apropos word" to search for commands related to "word"... Reading symbols from /usr/libexec/dovecot/imap-login... [New LWP 1632413] [Thread debugging using libthread_db enabled] Using host libthread_db library "/usr/lib/libthread_db.so.1". Core was generated by `dovecot/imap-login'. Program terminated with signal SIGABRT, Aborted. #0 0x00007f7a33003ef5 in raise () from /usr/lib/libc.so.6 (gdb) bt full #0 0x00007f7a33003ef5 in raise () from /usr/lib/libc.so.6 No symbol table info available. #1 0x00007f7a32fed862 in abort () from /usr/lib/libc.so.6 No symbol table info available. #2 0x00007f7a331e9377 in default_fatal_finish (status=0, type=LOG_TYPE_PANIC) at failures.c:459 backtrace = 0x55d70a134c40 "#0 fatal_handler_real[0x7f7a33293670] -> #1 i_internal_fatal_handler[0x7f7a332937a0] -> #2 i_panic[0x7f7a331e8f79] -> #3 client_disconnect.cold[0x7f7a33365722] -> #4 client_destroy[0x7f7a333683a0] -> "... backtrace = <optimized out> recursed = 0 #3 fatal_handler_real (ctx=<optimized out>, format=<optimized out>, args=<optimized out>) at failures.c:471 status = 0 #4 0x00007f7a332937c1 in i_internal_fatal_handler (ctx=<optimized out>, format=<optimized out>, args=<optimized out>) at failures.c:866 No locals. #5 0x00007f7a331e903f in i_panic (format=format@entry=0x7f7a333710c8 "file %s: line %d (%s): assertion failed: (%s)") at failures.c:523 ctx = {type = LOG_TYPE_PANIC, exit_status = 0, timestamp = 0x0, timestamp_usecs = 0, log_prefix = 0x0, log_prefix_type_pos = 0} args = {{gp_offset = 40, fp_offset = 48, overflow_arg_area = 0x7fff0dd2acf0, reg_save_area = 0x7fff0dd2ac30}} #6 0x00007f7a3336574a in client_disconnect (add_disconnected_prefix=false, reason=<optimized out>, client=0x55d70a1a98c8) at client-common.c:293 __func__ = "client_disconnect" extra_reason = <optimized out> event = <optimized out> _tmp_event = <optimized out> _tmp_event = <optimized out> #7 client_disconnect (client=0x55d70a1a98c8, reason=<optimized out>, add_disconnected_prefix=<optimized out>) at client-common.c:255 __func__ = "client_disconnect" extra_reason = <optimized out> event = <optimized out> _tmp_event = <optimized out> _tmp_event = <optimized out> #8 0x00007f7a33368436 in client_destroy (client=<optimized out>, reason=<optimized out>) at client-common.c:315 __func__ = "client_destroy" #9 0x00007f7a3336a4ce in sasl_callback (client=<optimized out>, sasl_reply=SASL_SERVER_REPLY_SUCCESS, data=0x0, args=<optimized out>) at client-common-auth.c:742 reply = {master_user = 0x0, reason = 0x0, fail_code = CLIENT_AUTH_FAIL_CODE_NONE, host = 0x0, hostip = 0x0, source_ip = 0x0, destuser = 0x0, password = 0x0, proxy_mech = 0x0, port = 0, proxy_timeout_msecs = 0, proxy_refresh_secs = 0, proxy_host_immediate_failure_after_secs = 0, ssl_flags = 0, all_fields = 0x0, proxy = false, proxy_nopipelining = false, proxy_not_trusted = false, nologin = false} __func__ = "sasl_callback" #10 0x00007f7a33218c04 in master_auth_connection_input (conn=<optimized out>) at master-auth.c:156 reply = <optimized out> ret = <optimized out> #11 0x00007f7a332a9309 in io_loop_call_io (io=0x55d70a1b87a0) at ioloop.c:715 ioloop = 0x55d70a13d2c0 t_id = 2 __func__ = "io_loop_call_io" #12 0x00007f7a332aa932 in io_loop_handler_run_internal (ioloop=ioloop@entry=0x55d70a13d2c0) at ioloop-epoll.c:222 ctx = 0x55d70a141380 events = <optimized out> event = 0x55d70a141400 list = 0x55d70a1b8800 io = <optimized out> tv = {tv_sec = 29, tv_usec = 998424} events_count = <optimized out> msecs = <optimized out> ret = 1 i = 0 --Type <RET> for more, q to quit, c to continue without paging-- j = 0 call = <optimized out> __func__ = "io_loop_handler_run_internal" #13 0x00007f7a332a93b0 in io_loop_handler_run (ioloop=0x55d70a13d2c0) at ioloop.c:767 __func__ = "io_loop_handler_run" #14 0x00007f7a332a9570 in io_loop_run (ioloop=0x55d70a13d2c0) at ioloop.c:740 __func__ = "io_loop_run" #15 0x00007f7a3321dcb3 in master_service_run (service=0x55d70a13d120, callback=callback@entry=0x7f7a3336e340 <client_connected>) at master-service.c:862 No locals. #16 0x00007f7a3336eb7d in login_binary_run (binary=<optimized out>, argc=<optimized out>, argv=<optimized out>) at main.c:562 service_flags = <optimized out> set_pool = 0x55d70a144de0 login_socket = 0x7f7a3337337d "login" c = <optimized out> #17 0x00007f7a32feeb25 in __libc_start_main () from /usr/lib/libc.so.6 No symbol table info available. #18 0x000055d70823a84e in _start () No symbol table info available.
On 2021-04-24 09:41, Aki Tuomi wrote:
Thanks! The chroot was needed to get the core dump. Can you try if this does fix the crash? Aki
The client needs to be removed from destroyed_clients linked list before it's added to client_fd_proxies linked list. Broken by 1c622cdbe08df2f642e28923c39894516143ae2a --- src/login-common/client-common.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/login-common/client-common.c b/src/login-common/client-common.c index bdb6e9c798..1d264d9f75 100644 --- a/src/login-common/client-common.c +++ b/src/login-common/client-common.c @@ -289,8 +289,9 @@ void client_disconnect(struct client *client, const char *reason, /* Login was successful. We may now be proxying the connection, so don't disconnect the client until client_unref(). */ if (client->iostream_fd_proxy != NULL) { + i_assert(!client->fd_proxying); client->fd_proxying = TRUE; - i_assert(client->prev == NULL && client->next == NULL); + DLLIST_REMOVE(&destroyed_clients, client); DLLIST_PREPEND(&client_fd_proxies, client); client_fd_proxies_count++; } @@ -307,8 +308,9 @@ void client_destroy(struct client *client, const char *reason) if (last_client == client) last_client = client->prev; - /* remove from clients linked list before it's added to - client_fd_proxies. */ + /* move to destroyed_clients linked list before it's potentially + added to client_fd_proxies. */ + i_assert(!client->fd_proxying); DLLIST_REMOVE(&clients, client); DLLIST_PREPEND(&destroyed_clients, client); @@ -409,13 +411,14 @@ bool client_unref(struct client **_client) DLLIST_REMOVE(&client_fd_proxies, client); i_assert(client_fd_proxies_count > 0); client_fd_proxies_count--; + } else { + DLLIST_REMOVE(&destroyed_clients, client); } i_stream_unref(&client->input); o_stream_unref(&client->output); i_close_fd(&client->fd); event_unref(&client->event); - DLLIST_REMOVE(&destroyed_clients, client); i_free(client->proxy_user); i_free(client->proxy_master_user); i_free(client->virtual_user); -- GitLab
The current autoconf code is bit buggy, but if you do indeed have libsystemd-dev installed it should do the right thing and will work with systemd even if you have Type=notify. This has been actually tested, so if it's not working, then something else is wrong. Did you remember to run ./autogen.sh after pulling from git to make sure you get new configure script? Aki
I don't know then. It works for me and I just tried it again. The only reason it would fail would be that HAVE_LIBSYSTEMD is not defined, so it would not be using libsystemd for notify support. $ sudo systemctl status dovecot ● dovecot.service - Dovecot IMAP/POP3 email server Loaded: loaded (/lib/systemd/system/dovecot.service; disabled; vendor preset: enabled) Active: active (running) since Mon 2021-04-26 10:30:02 EEST; 2s ago Docs: man:dovecot(1) https://doc.dovecot.org/ Main PID: 30213 (dovecot) Status: "v2.4.devel (98a1cca054) running" Tasks: 4 (limit: 4701) Memory: 3.3M CGroup: /system.slice/dovecot.service ├─30213 /home/cmouse/dovecot/sbin/dovecot -F ├─30214 dovecot/anvil ├─30215 dovecot/log └─30216 dovecot/config You can tell from the "Status" line that it's using Type=notify. Aki
I have # sudo systemctl status dovecot ● dovecot.service - Dovecot IMAP/POP3 email server Loaded: loaded (/usr/lib/systemd/system/dovecot.service; enabled; vendor preset: disabled) Active: active (running) since Sun 2021-04-25 20:13:25 UTC; 14h ago Docs: man:dovecot(1) https://doc.dovecot.org/ Main PID: 2559364 (dovecot) Tasks: 28 (limit: 76912) Memory: 1.0G CPU: 7min 18.342s CGroup: /system.slice/dovecot.service ├─2559364 /usr/sbin/dovecot -F ├─2559366 dovecot/imap-login ├─2559367 dovecot/anvil [11 connections] ├─2559368 dovecot/log On 2021-04-26 08:32, Aki Tuomi wrote:
Did you see if the problem was that the imap-login process was using 100% CPU, or was the issue something else? I can't find a bug with the patch itself. But attached is another patch that adds some more asserts to make sure the linked lists are being used correctly, so if there is some bug it should now assert-crash instead of doing something else weird like go to infinite loop. But maybe the high CPU usage was something unrelated to this patch?
participants (3)
-
Aki Tuomi
-
Joan Moreau
-
Timo Sirainen