no full syncs after upgrading to dovecot 2.3.18

Paul Kudla (SCOM.CA Internet Services Inc.) paul at scom.ca
Tue Apr 26 09:43:09 UTC 2022


ok until someone fixes the folder count (or i figure that out)

the errors will stay on the replicator side and i use that error 
(knowing they are not replicating) coming back to trigger a background 
dsync with a python script through doveadm (seems to work)

but this is a hack.

[05:41:37] mail18.scom.ca [root:0] /programs/lib
# sync.users
nick at elirpa.com                   high     00:06:41  503:54:00 - 
     y
keith at elirpa.com                  high     00:06:20  503:52:42 - 
     y
paul at scom.ca                      high     00:06:40  503:53:50 - 
     y
ed at scom.ca                        high     00:06:42  503:54:00 - 
     y
ed.hanna at dssmgmt.com              high     00:06:41  503:53:57 - 
     y
paul at paulkudla.net                high     00:06:43  503:54:02 620:42:06 
    y

[05:41:46] mail18.scom.ca [root:0] /programs/lib
# cat /programs/common/sync.users
doveadm replicator status '*' | grep '   y'





crontab :

*/10    *                *    *    *    root            /usr/bin/nohup 
/programs/common/sync.recover > /dev/null


# cat /programs/common/sync.recover
#!/usr/local/bin/python3

#Force sync between servers that are reporting bad?

import os,sys,django,socket
from optparse import OptionParser


from lib import *

#Sample Re-Index MB
#doveadm -D force-resync -u paul at scom.ca -f INBOX*



USAGE_TEXT = '''\
usage: %%prog %s[options]
'''

parser = OptionParser(usage=USAGE_TEXT % '', version='0.4')

parser.add_option("-m", "--send_to", dest="send_to", help="Send Email To")
parser.add_option("-e", "--email", dest="email_box", help="Box to Index")
parser.add_option("-d", "--detail",action='store_true', 
dest="detail",default =False, help="Detailed report")
parser.add_option("-i", "--index",action='store_true', 
dest="index",default =False, help="Index")

options, args = parser.parse_args()

print (options.email_box)
print (options.send_to)
print (options.detail)

#sys.exit()



print ('Getting Current User Sync Status')
command = commands("/usr/local/bin/doveadm replicator status '*'")


#print command

sync_user_status = command.output.split('\n')

#print sync_user_status

synced = []

for n in range(1,len(sync_user_status)) :
         user = sync_user_status[n]
         print ('Processing User : %s' %user.split(' ')[0])
         if user.split(' ')[0] != options.email_box :
                 if options.email_box != None :
                         continue

         if options.index == True :
                 command = '/usr/local/bin/doveadm -D force-resync -u %s 
-f INBOX*' %user.split(' ')[0]
                 command = commands(command)
                 command = command.output

         #print user
         for nn in range (len(user)-1,0,-1) :
                 #print nn
                 #print user[nn]

                 if user[nn] == '-' :
                         #print 'skipping ... %s' %user.split(' ')[0]

                         break



                 if user[nn] == 'y': #Found a Bad Mailbox
                         print ('syncing ... %s' %user.split(' ')[0])


                         if options.detail == True :
                                 command = '/usr/local/bin/doveadm -D 
sync -u %s -d -N -l 30 -U' %user.split(' ')[0]
                                 print (command)
                                 command = commands(command)
                                 command = command.output.split('\n')
                                 print (command)
                                 print ('Processed Mailbox for ... %s' 
%user.split(' ')[0] )
                                 synced.append('Processed Mailbox for 
... %s' %user.split(' ')[0])
                                 for nnn in range(len(command)):
                                         synced.append(command[nnn] + '\n')
                                 break


                         if options.detail == False :
                                 #command = '/usr/local/bin/doveadm -D 
sync -u %s -d -N -l 30 -U' %user.split(' ')[0]
                                 #print (command)
                                 #command = os.system(command)
                                 command = subprocess.Popen( 
["/usr/local/bin/doveadm sync -u %s -d -N -l 30 -U" %user.split(' ')[0] ], \
                                 shell = True, stdin=None, stdout=None, 
stderr=None, close_fds=True)

                                 print ( 'Processed Mailbox for ... %s' 
%user.split(' ')[0] )
                                 synced.append('Processed Mailbox for 
... %s' %user.split(' ')[0])
                                 #sys.exit()
                                 break

if len(synced) != 0 :
         #send email showing bad synced boxes ?

         if options.send_to != None :
                 send_from = 'monitor at scom.ca'
                 send_to = ['%s' %options.send_to]
                 send_subject = 'Dovecot Bad Sync Report for : %s' 
%(socket.gethostname())
                 send_text = '\n\n'
                 for n in range (len(synced)) :
                         send_text = send_text + synced[n] + '\n'

                 send_files = []
                 sendmail (send_from, send_to, send_subject, send_text, 
send_files)



sys.exit()


Lib3 (lib.py) my general libary load

# cat lib3.py
#Python Library written by paul kudla (c) 2011

#Load the librarys for the system

import os,sys,time,socket
import string
from ftplib import FTP
from decimal import *
from datetime import date
import datetime
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email.utils import COMMASPACE, formatdate
from email import encoders
import subprocess

getcontext().prec = 20


class commands:
         def __init__(self,command) :
                 self.command = command
                 #print (self.command)
                 self.output = 'Error'
                 self.status = '255'

                 #sample
                 #rc, gopath = subprocess.getstatusoutput('ls -a')

                 self.status, self.output = 
subprocess.getstatusoutput(self.command)

                 try:
                   self.cr = self.output.split('\n')
                 except :
                   self.cr = []
                 try:
                   self.count = len(self.cr)
                 except :
                   self.count = 0

                 self.status = int(self.status)

                 #return count=number of lines, cr = lines split, 
getoutput = actual output returned, status = return code

                 return

#Email with attachment
class sendmail:
         def __init__(self, send_from, send_to, send_subject, send_text, 
send_files):
         #send_from, send_to, send_subject, send_text, send_files):
                 #print ('lib.py sending email')
                 assert type(send_to)==list
                 assert type(send_files)==list

                 msg = MIMEMultipart()
                 msg['From'] = send_from
                 msg['To'] = COMMASPACE.join(send_to)
                 msg['Date'] = formatdate(localtime=True)
                 msg['Subject'] = send_subject

                 msg.attach( MIMEText(send_text) )

                 for f in send_files:
                         part = MIMEBase('application', "octet-stream")
                         part.set_payload( open(f,"rb").read() )
                         Encoders.encode_base64(part)
                         part.add_header('Content-Disposition', 
'attachment; filename="%s"' % os.path.basename(f))
                         msg.attach(part)

                 try : #Send Local?
                         smtp = smtplib.SMTP('mail.local.scom.ca')
                         #smtp.login('backup at scom.ca','522577')
                         #print ('Sending Email to : %s' %send_to)
                         smtp.sendmail(send_from, send_to, msg.as_string())
                         smtp.close()

                 except :
                         smtp = smtplib.SMTP('mail.scom.ca')
                         smtp.login('backup at scom.ca','522577')
                         #print ('Sending Email to : %s' %send_to)
                         smtp.sendmail(send_from, send_to, msg.as_string())
                         smtp.close()

class getdatetime:
         def __init__(self):
                 self.datetime = datetime.date.today()
                 self.datetime_now = datetime.datetime.now()
                 self.date = str( time.strftime("%Y-%m-%d %H:%M:%S") )
                 self.date_long = str( time.strftime("%Y-%m-%d %H:%M:%S") )
                 self.date_short = str( time.strftime("%Y-%m-%d") )
                 self.time =  str( time.strftime("%H:%M:%S") )
                 self.date_time_sec = self.datetime_now.strftime 
("%Y-%m-%d %H:%M:%S.%f")


#Return edi senddate string (short) 2011-10-31 into 111031

class create_ascii :
         def __init__(self,string_data) :
                 self.string_data = str(string_data)
                 import string
                 self.printable = set(string.printable)
                 self.list = list(filter(lambda x: x in self.printable, 
self.string_data))
                 #print (self.list)

                 self.ascii = ''
                 for n in range (0,len(self.list)) :
                         self.ascii = self.ascii + self.list[n]
                 self.ascii = str(self.ascii)

                 return


class edi_send_date_short:
         def __init__(self, senddate):
                 self.date = senddate
                 self.result = self.date[2] + self.date[3] + 
self.date[5] + self.date[6] + self.date[8] + self.date[9]

         def __str__(self):
                 return '%s' % self.result

##Return edi senddate string (long) 2011-10-31 into 20111031
class edi_send_date_long:
         def __init__(self, senddate):
                 self.date = senddate
                 self.result1 = self.date[0] + self.date[1] + 
self.date[2] + self.date[3] + self.date[5] + self.date[6] + self.date[8] 
+ self.date[9]
                 self.result2 = self.date[2] + self.date[3] + 
self.date[5] + self.date[6] + self.date[8] + self.date[9]

         def __str__(self):
                 return '%s' % (self.result1,self.result2)

class gpsdeg:
         def __init__(self, dms):
                 self.dms = dms
                 self.is_positive = self.dms >= 0
                 self.dms = abs(self.dms)
                 self.minutes,self.seconds = divmod(self.dms*3600,60)
                 self.degrees,self.minutes = divmod(self.minutes,60)
                 self.degrees = self.degrees if self.is_positive else 
-self.degrees

         def __str__(self):
                 return '%s' % (self.degrees,self.minutes,self.seconds)


class degdir:
         def __init__(self, degrees):
                 self.direction_data = ['N','348.75','11.25','NNE', 
'11.25','33.75','NE','33.75','56.25','ENE', 
'56.25','78.75','E','78.75','101.25','ESE','101.25','123.75','SE','123.75','146.25','SSE','146.25','168.75','S','168.75','191.25','SSW','191.25','213.75','SW','213.75','236.25','WSW','236.25','258.75','W','258.75','281.25','WNW','281.25','303.75','NW','303.75','326.25','NNW','326.25','348.75']


         def __str__(self):
                 return '%s' % (self.direction)


class gettime:
         def __init__(self):
                 self.uu = time.localtime()

                 self.todaystime = str(self.uu[3]) #get the hr

                 if int(self.uu[3]) < 10: #add a zero
                         self.todaystime = '0' + self.todaystime
                 if int(self.uu[4]) < 10: #add a zero in front
                         self.todaystime = self.todaystime 
+":0"+str(self.uu[4])
                 else:
                         self.todaystime = self.todaystime 
+":"+str(self.uu[4])

         def __str__(self):
                 return self.todaystime

class array2dbstring:
         def __init__(self,array):
                 self.data = array
                 for self.nn in range(0,len(self.data)):
                         print ('Data %s \t\t %s' % 
(str(self.data[self.nn]),str( type(self.data[self.nn])) ) ) #change all 
data into strings
                         self.a = type(self.data[self.nn])
                         self.a = str(self.a)
                         if 'Decimal' in self.a :
                                 self.n = str(self.data[self.nn])
                                 #self.n = self.n.lstrip("'")
                                 #self.n = self.n.rstrip("'")
                                 #self.data[self.nn] = 
float(self.data[self.nn])
                                 self.data[self.nn] = str('0.00')
                                 print (self.n)

                         if 'NoneType' in self.a :
                                 self.data[self.nn] = ''
                         if 'datetime.datetime' in self.a :
                                 #self.data[self.nn] = 
str(self.data[self.nn])
                                 #self.data[self.nn].replace
                                 self.data[self.nn] = '2012-01-25 00:00:00'
                 self.data = str(self.data)
                 self.data = self.data.lstrip('[')
                 self.data = self.data.rstrip(']')
                 self.data = self.data.replace("'NULL'","NULL")
                 #self.data = self.data.replace(" '',", ",")
                 #self.data = self.data.replace(" '0.00'","'100'")

         def __str__(self):
                 return self.data

class get_hostname:
         def __init__(self):
                 self.hostname = socket.gethostname()


Happy Tuesday !!!
Thanks - paul

Paul Kudla


Scom.ca Internet Services <http://www.scom.ca>
004-1009 Byron Street South
Whitby, Ontario - Canada
L1N 4S3

Toronto 416.642.7266
Main 1.866.411.7266
Fax 1.888.892.7266

On 4/25/2022 7:54 AM, Sebastian Marske wrote:
> 
> Hi there,
> 
> thanks for your insights and for diving deeper into this Paul!
> 
> For me, the users ending up in 'Waiting for dsync to finish' all have
> more than 256 Imap folders as well (ranging from 288 up to >5500; as per
> 'doveadm mailbox list -u <username> | wc -l'). For more details on my
> setup please see my post from February [1].
> 
> @Arnaud: What OS are you running on?
> 
> 
> Best
> Sebastian
> 
> 
> [1] https://dovecot.org/pipermail/dovecot/2022-February/124168.html
> 
> 
> On 4/24/22 19:36, Paul Kudla (SCOM.CA Internet Services Inc.) wrote:
>>
>> Question having similiar replication issues
>>
>> pls read everything below and advise the folder counts on the
>> non-replicated users?
>>
>> i find  the total number of folders / account seems to be a factor and
>> NOT the size of the mail box
>>
>> ie i have customers with 40G of emails no problem over 40 or so folders
>> and it works ok
>>
>> 300+ folders seems to be the issue
>>
>> i have been going through the replication code
>>
>> no errors being logged
>>
>> i am assuming that the replication --> dhclient --> other server is
>> timing out or not reading the folder lists correctly (ie dies after X
>> folders read)
>>
>> thus i am going through the code patching for log entries etc to find
>> the issues.
>>
>> see
>>
>> [13:33:57] mail18.scom.ca [root:0] /usr/local/var/lib/dovecot
>> # ll
>> total 86
>> drwxr-xr-x  2 root  wheel  uarch    4B Apr 24 11:11 .
>> drwxr-xr-x  4 root  wheel  uarch    4B Mar  8  2021 ..
>> -rw-r--r--  1 root  wheel  uarch   73B Apr 24 11:11 instances
>> -rw-r--r--  1 root  wheel  uarch  160K Apr 24 13:33 replicator.db
>>
>> [13:33:58] mail18.scom.ca [root:0] /usr/local/var/lib/dovecot
>> #
>>
>> replicator.db seems to get updated ok but never processed properly.
>>
>> # sync.users
>> nick at elirpa.com                   high     00:09:41  463:47:01 -     y
>> keith at elirpa.com                  high     00:09:23  463:45:43 -     y
>> paul at scom.ca                      high     00:09:41  463:46:51 -     y
>> ed at scom.ca                        high     00:09:43  463:47:01 -     y
>> ed.hanna at dssmgmt.com              high     00:09:42  463:46:58 -     y
>> paul at paulkudla.net                high     00:09:44  463:47:03 580:35:07
>>     y
>>
>>
>>
>>
>> so ....
>>
>>
>>
>> two things :
>>
>> first to get the production stuff to work i had to write a script that
>> whould find the bad sync's and the force a dsync between the servers
>>
>> i run this every five minutes or each server.
>>
>> in crontab
>>
>> */10    *                *    *    *    root            /usr/bin/nohup
>> /programs/common/sync.recover > /dev/null
>>
>>
>> python script to sort things out
>>
>> # cat /programs/common/sync.recover
>> #!/usr/local/bin/python3
>>
>> #Force sync between servers that are reporting bad?
>>
>> import os,sys,django,socket
>> from optparse import OptionParser
>>
>>
>> from lib import *
>>
>> #Sample Re-Index MB
>> #doveadm -D force-resync -u paul at scom.ca -f INBOX*
>>
>>
>>
>> USAGE_TEXT = '''\
>> usage: %%prog %s[options]
>> '''
>>
>> parser = OptionParser(usage=USAGE_TEXT % '', version='0.4')
>>
>> parser.add_option("-m", "--send_to", dest="send_to", help="Send Email To")
>> parser.add_option("-e", "--email", dest="email_box", help="Box to Index")
>> parser.add_option("-d", "--detail",action='store_true',
>> dest="detail",default =False, help="Detailed report")
>> parser.add_option("-i", "--index",action='store_true',
>> dest="index",default =False, help="Index")
>>
>> options, args = parser.parse_args()
>>
>> print (options.email_box)
>> print (options.send_to)
>> print (options.detail)
>>
>> #sys.exit()
>>
>>
>>
>> print ('Getting Current User Sync Status')
>> command = commands("/usr/local/bin/doveadm replicator status '*'")
>>
>>
>> #print command
>>
>> sync_user_status = command.output.split('\n')
>>
>> #print sync_user_status
>>
>> synced = []
>>
>> for n in range(1,len(sync_user_status)) :
>>          user = sync_user_status[n]
>>          print ('Processing User : %s' %user.split(' ')[0])
>>          if user.split(' ')[0] != options.email_box :
>>                  if options.email_box != None :
>>                          continue
>>
>>          if options.index == True :
>>                  command = '/usr/local/bin/doveadm -D force-resync -u %s
>> -f INBOX*' %user.split(' ')[0]
>>                  command = commands(command)
>>                  command = command.output
>>
>>          #print user
>>          for nn in range (len(user)-1,0,-1) :
>>                  #print nn
>>                  #print user[nn]
>>
>>                  if user[nn] == '-' :
>>                          #print 'skipping ... %s' %user.split(' ')[0]
>>
>>                          break
>>
>>
>>
>>                  if user[nn] == 'y': #Found a Bad Mailbox
>>                          print ('syncing ... %s' %user.split(' ')[0])
>>
>>
>>                          if options.detail == True :
>>                                  command = '/usr/local/bin/doveadm -D
>> sync -u %s -d -N -l 30 -U' %user.split(' ')[0]
>>                                  print (command)
>>                                  command = commands(command)
>>                                  command = command.output.split('\n')
>>                                  print (command)
>>                                  print ('Processed Mailbox for ... %s'
>> %user.split(' ')[0] )
>>                                  synced.append('Processed Mailbox for ...
>> %s' %user.split(' ')[0])
>>                                  for nnn in range(len(command)):
>>                                          synced.append(command[nnn] + '\n')
>>                                  break
>>
>>
>>                          if options.detail == False :
>>                                  #command = '/usr/local/bin/doveadm -D
>> sync -u %s -d -N -l 30 -U' %user.split(' ')[0]
>>                                  #print (command)
>>                                  #command = os.system(command)
>>                                  command = subprocess.Popen(
>> ["/usr/local/bin/doveadm sync -u %s -d -N -l 30 -U" %user.split(' ')[0]
>> ], \
>>                                  shell = True, stdin=None, stdout=None,
>> stderr=None, close_fds=True)
>>
>>                                  print ( 'Processed Mailbox for ... %s'
>> %user.split(' ')[0] )
>>                                  synced.append('Processed Mailbox for ...
>> %s' %user.split(' ')[0])
>>                                  #sys.exit()
>>                                  break
>>
>> if len(synced) != 0 :
>>          #send email showing bad synced boxes ?
>>
>>          if options.send_to != None :
>>                  send_from = 'monitor at scom.ca'
>>                  send_to = ['%s' %options.send_to]
>>                  send_subject = 'Dovecot Bad Sync Report for : %s'
>> %(socket.gethostname())
>>                  send_text = '\n\n'
>>                  for n in range (len(synced)) :
>>                          send_text = send_text + synced[n] + '\n'
>>
>>                  send_files = []
>>                  sendmail (send_from, send_to, send_subject, send_text,
>> send_files)
>>
>>
>>
>> sys.exit()
>>
>> second :
>>
>> i posted this a month ago - no response
>>
>> please appreciate that i am trying to help ....
>>
>> after much testing i can now reporduce the replication issues at hand
>>
>> I am running on freebsd 12 & 13 stable (both test and production servers)
>>
>> sdram drives etc ...
>>
>> Basically replication works fine until reaching a folder quantity of ~
>> 256 or more
>>
>> to reproduce using doveadm i created folders like
>>
>> INBOX/folder-0
>> INBOX/folder-1
>> INBOX/folder-2
>> INBOX/folder-3
>> and so forth ......
>>
>> I created 200 folders and they replicated ok on both servers
>>
>> I created another 200 (400 total) and the replicator got stuck and would
>> not update the mbox on the alternate server anymore and is still
>> updating 4 days later ?
>>
>> basically replicator goes so far and either hangs or more likely bails
>> on an error that is not reported to the debug reporting ?
>>
>> however dsync will sync the two servers but only when run manually (ie
>> all the folders will sync)
>>
>> I have two test servers avaliable if you need any kind of access - again
>> here to help.
>>
>> [07:28:42] mail18.scom.ca [root:0] ~
>> # sync.status
>> Queued 'sync' requests        0
>> Queued 'high' requests        6
>> Queued 'low' requests         0
>> Queued 'failed' requests      0
>> Queued 'full resync' requests 0
>> Waiting 'failed' requests     0
>> Total number of known users   255
>>
>> username                       type        status
>> paul at scom.ca                   normal      Waiting for dsync to finish
>> keith at elirpa.com               incremental Waiting for dsync to finish
>> ed.hanna at dssmgmt.com           incremental Waiting for dsync to finish
>> ed at scom.ca                     incremental Waiting for dsync to finish
>> nick at elirpa.com                incremental Waiting for dsync to finish
>> paul at paulkudla.net             incremental Waiting for dsync to finish
>>
>>
>> i have been going through the c code and it seems the replication gets
>> requested ok
>>
>> replicator.db does get updated ok with the replicated request for the
>> mbox in question.
>>
>> however i am still looking for the actual replicator function in the
>> lib's that do the actual replication requests
>>
>> the number of folders & subfolders is defanately the issue - not the
>> mbox pyhsical size as thought origionally.
>>
>> if someone can point me in the right direction, it seems either the
>> replicator is not picking up on the number of folders to replicat
>> properly or it has a hard set limit like 256 / 512 / 65535 etc and stops
>> the replication request thereafter.
>>
>> I am mainly a machine code programmer from the 80's and have
>> concentrated on python as of late, 'c' i am starting to go through just
>> to give you a background on my talents.
>>
>> It took 2 months to finger this out.
>>
>> this issue also seems to be indirectly causing the duplicate messages
>> supression not to work as well.
>>
>> python programming to reproduce issue (loops are for last run started @
>> 200 - fyi) :
>>
>> # cat mbox.gen
>> #!/usr/local/bin/python2
>>
>> import os,sys
>>
>> from lib import *
>>
>>
>> user = 'paul at paulkudla.net'
>>
>> """
>> for count in range (0,600) :
>>          box = 'INBOX/folder-%s' %count
>>          print count
>>          command = '/usr/local/bin/doveadm mailbox create -s -u %s %s'
>> %(user,box)
>>          print command
>>          a = commands.getoutput(command)
>>          print a
>> """
>>
>> for count in range (0,600) :
>>          box = 'INBOX/folder-0/sub-%' %count
>>          print count
>>          command = '/usr/local/bin/doveadm mailbox create -s -u %s %s'
>> %(user,box)
>>          print command
>>          a = commands.getoutput(command)
>>          print a
>>
>>
>>
>>          #sys.exit()
>>
>>
>>
>>
>>
>> Happy Sunday !!!
>> Thanks - paul
>>
>> Paul Kudla
>>
>>
>> Scom.ca Internet Services <http://www.scom.ca>
>> 004-1009 Byron Street South
>> Whitby, Ontario - Canada
>> L1N 4S3
>>
>> Toronto 416.642.7266
>> Main 1.866.411.7266
>> Fax 1.888.892.7266
>>
>> On 4/24/2022 10:22 AM, Arnaud Abélard wrote:
>>> Hello,
>>>
>>> I am working on replicating a server (and adding compression on the
>>> other side) and since I had "Error: dsync I/O has stalled, no activity
>>> for 600 seconds (version not received)" errors I upgraded both source
>>> and destination server with the latest 2.3 version (2.3.18). While
>>> before the upgrade all the 15 replication connections were busy after
>>> upgrading dovecot replicator dsync-status shows that most of the time
>>> nothing is being replicated at all. I can see some brief replications
>>> that last, but 99,9% of the time nothing is happening at all.
>>>
>>> I have a replication_full_sync_interval of 12 hours but I have
>>> thousands of users with their last full sync over 90 hours ago.
>>>
>>> "doveadm replicator status" also shows that i have over 35,000 queued
>>> full resync requests, but no sync, high or low queued requests so why
>>> aren't the full requests occuring?
>>>
>>> There are no errors in the logs.
>>>
>>> Thanks,
>>>
>>> Arnaud
>>>
>>>
>>>
>>>
>>>
> 


More information about the dovecot mailing list