Hi!
I've now seen this assertion failure from lmtp (most frequent), pop3 (next frequent), and imap (rarely). It didn't start happening until the number of accounts on the server was increased into the 10k-25k range. It happens when index->file_lock is a read lock and mail_index_lock is called asking for a write lock. Dovecot 2.x dies at the assertion on line 62 of mail-index-lock.c If that assertion were removed, it would likely die at the assertion on line 80.
Looking at the mail-index-lock.c from dovecot 1.2.15, it looks like the 1.2 series would enter the else case of the "kind of kludgy" block, and end up calling file_lock_try_update(), which seems like what needs to happen. Dovecot 2.x only does a file_lock_try_update() when unlocking, to demote a write lock to a read lock (according to grep).
# 2.0.5 (6ea01abfaea9): /etc/dovecot/dovecot.conf # OS: Linux 2.6.35.6 x86_64 Debian 5.0.6
Oct 9 23:02:59 stor29 dovecot: lmtp(4793, user@example.com): Panic: file mail-index-lock.c: line 62 (mail_index_lock): assertion failed: (index->lock_type == F_UNLCK) Oct 9 23:02:59 stor29 dovecot: lmtp(4793, user@example.com): Error: Raw backtrace: /usr/lib/dovecot/libdovecot.so.0 [0x7f03e16ff842] -> /usr/lib/dovecot/libdovecot.so.0 [0x7f03e16ff8aa] -> /usr/lib/dovecot/libdovecot.so.0(i_error+0) [0x7f03e16ffc53] -> /usr/lib/dovecot/libdovecot-storage.so.0 [0x7f03e1bce244] -> /usr/lib/dovecot/libdovecot-storage.so.0(mail_index_try_lock_exclusive+0x20) [0x7f03e1bce2d0] -> /usr/lib/dovecot/libdovecot-storage.so.0(mail_index_write+0x31d) [0x7f03e1be287d] -> /usr/lib/dovecot/libdovecot-storage.so.0(mail_index_sync_commit+0x166) [0x7f03e1bda3d6] -> /usr/lib/dovecot/libdovecot-storage.so.0 [0x7f03e1ba1d16] -> /usr/lib/dovecot/libdovecot-storage.so.0(maildir_transaction_save_commit_pre+0x4d2) [0x7f03e1b9d0b2] -> /usr/lib/dovecot/libdovecot-storage.so.0 [0x7f03e1b98213] -> /usr/lib/dovecot/libdovecot-storage.so.0(mail_index_transaction_commit_full+0x97) [0x7f03e1bd2017] -> /usr/lib/dovecot/libdovecot-storage.so.0(index_transaction_commit+0x8b) [0x7f03e1b97e9b] -> /usr/lib/dovecot/modules/lib10_quota_plugin.so [0x7f03e0b44bd1] -> /usr/lib/dovecot/libdovecot-storage.so.0(mailbox_transaction_commit_get_changes+0x3d) [0x7f03e1b7722d] -> /usr/lib/dovecot/libdovecot-lda.so.0(mail_deliver_save+0x176) [0x7f03e1942e96] -> /usr/lib/dovecot/libdovecot-lda.so.0(mail_deliver+0xe3) [0x7f03e1943193] -> dovecot/lmtp [0x40569e] -> /usr/lib/dovecot/libdovecot.so.0(io_loop_handler_run+0xcb) [0x7f03e170a93b] -> /usr/lib/dovecot/libdovecot.so.0(io_loop_run+0x18) [0x7f03e1709a58] -> /usr/lib/dovecot/libdovecot.so.0(master_service_run+0x13) [0x7f03e16f8f93] -> dovecot/lmtp(main+0x159) [0x404419] -> /lib/libc.so.6(__libc_start_main+0xe6) [0x7f03e0f831a6] -> dovecot/lmtp [0x4041f9] Oct 9 23:02:59 stor29 dovecot: master: Error: service(lmtp): child 4793 killed with signal 6 (core not dumped)
Here's a backtrace from a different lmtp process that I attached gdb to, since lmtp didn't want to dump core: #0 0x00007fe8054c7ed5 in raise () from /lib/libc.so.6 No symbol table info available. #1 0x00007fe8054c93f3 in abort () from /lib/libc.so.6 No symbol table info available. #2 0x00007fe805c30855 in default_fatal_finish (type=<value optimized out>, status=0) at failures.c:178 backtrace = 0x1a50bd0 "/usr/lib/dovecot/libdovecot.so.0 [0x7fe805c30842] -> /usr/lib/dovecot/libdovecot.so.0 [0x7fe805c308aa] -> /usr/lib/dovecot/libdovecot.so.0(i_error+0) [0x7fe805c30c53] -> /usr/lib/dovecot/libdovecot-st"... #3 0x00007fe805c308aa in i_internal_fatal_handler (type=LOG_TYPE_PANIC, status=0, fmt=<value optimized out>, args=<value optimized out>) at failures.c:607 No locals. #4 0x00007fe805c30c53 in i_panic (format=<value optimized out>) at failures.c:244 args = {{gp_offset = 40, fp_offset = 48, overflow_arg_area = 0x7fff71f8cc40, reg_save_area = 0x7fff71f8cb80}} #5 0x00007fe8060ff244 in mail_index_lock (index=0x1cf0f80, lock_type=1, timeout_secs=<value optimized out>, lock_id_r=0x7fff71f8cdac) at mail-index-lock.c:62 ret = <value optimized out> __FUNCTION__ = "mail_index_lock" #6 0x00007fe8060ff2d0 in mail_index_try_lock_exclusive (index=0x4c10, lock_id_r=<value optimized out>) at mail-index-lock.c:146 ret = <value optimized out> #7 0x00007fe80611387d in mail_index_write (index=0x1cf0f80, want_rotate=false) at mail-index-write.c:226 map = (struct mail_index_map *) 0x1d0baf0 st = {st_dev = 37632, st_ino = 12898927743, st_nlink = 1, st_mode = 33152, st_uid = 2500, st_gid = 2500, pad0 = 0, st_rdev = 0, st_size = 260200, st_blksize = 4096, st_blocks = 512, st_atim = { tv_sec = 1286434371, tv_nsec = 554727597}, st_mtim = {tv_sec = 1286611467, tv_nsec = 18456193}, st_ctim = {tv_sec = 1286611467, tv_nsec = 18456193}, __unused = {0, 0, 0}} lock_id = 0 ret = <value optimized out> __FUNCTION__ = "mail_index_write" #8 0x00007fe80610b3d6 in mail_index_sync_commit (_ctx=0x1cfd908) at mail-index-sync.c:837 ctx = (struct mail_index_sync_ctx *) 0x1cd4590 index = (struct mail_index *) 0x1cf0f80 next_uid = 0 want_rotate = 16 index_undeleted = false delete_index = false ret = 0 #9 0x00007fe8060d2d16 in maildir_sync_index_finish (ctx=0x1cfd8f0, success=<value optimized out>) at maildir-sync-index.c:332 mbox = (struct maildir_mailbox *) 0x1aa2af0 time_diff = <value optimized out> ret = 0 #10 0x00007fe8060ce0b2 in maildir_transaction_save_commit_pre (_ctx=0x1d47a40) at maildir-save.c:984 _t = (struct mailbox_transaction_context *) 0x1d0bd80 sync_flags = <value optimized out> ret = <value optimized out> __FUNCTION__ = "maildir_transaction_save_commit_pre" #11 0x00007fe8060c9213 in index_transaction_index_commit ( index_trans=0x1afbad0, result_r=0x7fff71f8d090) at index-transaction.c:27 it = (struct index_transaction_context *) 0x1d0bd80 ibox = (struct index_mailbox_context *) 0x1aa2eb8 ret = <value optimized out> __FUNCTION__ = "index_transaction_index_commit" #12 0x00007fe806103017 in mail_index_transaction_commit_full ( _t=0x7fff71f8d0b8, result_r=0x7fff71f8d090) at mail-index-transaction.c:234 t = (struct mail_index_transaction *) 0x1afbad0 index = (struct mail_index *) 0x1cf0f80 index_undeleted = false #13 0x00007fe8060c8e9b in index_transaction_commit (t=0x1d0bd80, changes_r=0x7fff71f8d160) at index-transaction.c:123 box = (struct mailbox *) 0x1aa2af0 itrans = (struct mail_index_transaction *) 0x0 result = {log_file_seq = 0, log_file_offset = 0, commit_size = 0, ignored_modseq_changes = 0} ret = <value optimized out> #14 0x00007fe805075bd1 in quota_mailbox_transaction_commit (ctx=0x1d0bd80, changes_r=0x7fff71f8d160) at quota-storage.c:94 qt = (struct quota_transaction_context *) 0x1cfd610 #15 0x00007fe8060a822d in mailbox_transaction_commit_get_changes ( _t=<value optimized out>, changes_r=0x7fff71f8d160) at mail-storage.c:1215 _data_stack_cur_id = 3 t = (struct mailbox_transaction_context *) 0x1d0bd80 ret = <value optimized out> #16 0x00007fe805e73e96 in mail_deliver_save (ctx=0x7fff71f8d250, mailbox=0x407ba1 "INBOX", flags=<value optimized out>, keywords=<value optimized out>, storage_r=0x7fff71f8d2c8) at mail-deliver.c:219 open_ctx = {user = 0x1ce6980, lda_mailbox_autocreate = false, lda_mailbox_autosubscribe = false} box = (struct mailbox *) 0x1aa2af0 trans_flags = <value optimized out> t = (struct mailbox_transaction_context *) 0x0 save_ctx = (struct mail_save_context *) 0x0 kw = (struct mail_keywords *) 0x0 error = MAIL_ERROR_NONE mailbox_name = 0x407ba1 "INBOX" errstr = <value optimized out> changes = {pool = 0x1cd3fb0, uid_validity = 1233133548, saved_uids = { arr = {buffer = 0x1cd3fd0, element_size = 8}, v = 0x1cd3fd0, v_modifiable = 0x1cd3fd0}, ignored_modseq_changes = 0} range = <value optimized out> ret = 0 __FUNCTION__ = "mail_deliver_save" #17 0x00007fe805e74193 in mail_deliver (ctx=0x7fff71f8d250, storage_r=0x7fff71f8d2c8) at mail-deliver.c:289 ret = -1 #18 0x000000000040569e in client_input_data_handle (client=0x1a87520) at commands.c:476 data = <value optimized out> size = 1662 ret = <value optimized out> #19 0x00007fe805c3b93b in io_loop_handler_run (ioloop=<value optimized out>) at ioloop-epoll.c:212 ctx = (struct ioloop_handler_context *) 0x1a5f480 event = (const struct epoll_event *) 0x1a5f4f0 list = (struct io_list *) 0x1a60170 io = (struct io_file *) 0x1b5aab0 tv = {tv_sec = 299, tv_usec = 998518} t_id = 2 msecs = <value optimized out> ret = 1 call = <value optimized out> #20 0x00007fe805c3aa58 in io_loop_run (ioloop=0x1a585b0) at ioloop.c:350 No locals. #21 0x00007fe805c29f93 in master_service_run (service=0x1a58480, callback=0x4c10) at master-service.c:496 22 0x0000000000404419 in main (argc=1, argv=0x1a58370) at main.c:120 set_roots = {0x6098a0, 0x408320, 0x0} service_flags = <value optimized out> storage_service_flags = 675 c = <value optimized out>
And here's index and index->file_lock from mail_index_lock: (gdb) p *index $6 = {dir = 0x1b54380 "/w/c/3/d/808008/Maildir", prefix = 0x1d04430 "dovecot.index", cache = 0x1d0b8b0, log = 0x1c6afb0, open_count = 2, flags = MAIL_INDEX_OPEN_FLAG_CREATE, fsync_mode = FSYNC_MODE_OPTIMIZED, fsync_mask = 0, mode = 384, gid = 4294967295, gid_origin = 0x1b5e270 "/w/c/3/d/808008/Maildir", extension_pool = 0x1c32130, extensions = {arr = {buffer = 0x1c32150, element_size = 64}, v = 0x1c32150, v_modifiable = 0x1c32150}, sync_lost_handlers = {arr = {buffer = 0x1c06e60, element_size = 8}, v = 0x1c06e60, v_modifiable = 0x1c06e60}, filepath = 0x1bd71b0 "/w/c/3/d/808008/Maildir/dovecot.index", fd = 14, map = 0x1d0baf0, indexid = 1286433474, inconsistency_id = 0, last_read_log_file_seq = 3, last_read_log_file_head_offset = 32852, last_read_log_file_tail_offset = 32852, last_read_stat = {st_dev = 37632, st_ino = 12898927743, st_nlink = 1, st_mode = 33152, st_uid = 2500, st_gid = 2500, pad0 = 0, st_rdev = 0, st_size = 260200, st_blksize = 4096, st_blocks = 512, st_atim = {tv_sec = 1286434371, tv_nsec = 554727597}, st_mtim = {tv_sec = 1286611467, tv_nsec = 18456193}, st_ctim = { tv_sec = 1286611467, tv_nsec = 18456193}, __unused = {0, 0, 0}}, fsck_log_head_file_seq = 0, fsck_log_head_file_offset = 0, sync_commit_result = 0x0, lock_type = 0, shared_lock_count = 1, excl_lock_count = 0, lock_id_counter = 4, lock_method = FILE_LOCK_METHOD_FCNTL, max_lock_timeout_secs = 4294967295, file_lock = 0x1b5e290, dotlock = 0x0, keywords_pool = 0x1b5dfb0, keywords = { arr = {buffer = 0x1b5e1a0, element_size = 8}, v = 0x1b5e1a0, v_modifiable = 0x1b5e1a0}, keywords_hash = 0x1c6af60, keywords_ext_id = 0, modseq_ext_id = 1, module_contexts = {arr = {buffer = 0x1ac22b0, element_size = 8}, v = 0x1ac22b0, v_modifiable = 0x1ac22b0}, error = 0x0, nodiskspace = 0, index_lock_timeout = 0, index_delete_requested = 0, index_deleted = 0, log_sync_locked = 1, readonly = 0, mapping = 0, syncing = 1, need_recreate = 0, index_min_write = 0, modseqs_enabled = 0, initial_create = 0, initial_mapped = 1} (gdb) p *index->file_lock $7 = {fd = 14, path = 0x1d0bac0 "/w/c/3/d/808008/Maildir/dovecot.index", lock_type = 0, lock_method = FILE_LOCK_METHOD_FCNTL}
I can easily reproduce this currently if more information is needed.
-- Ryan Murray rmurray@cyberhqz.com