/* IMAP uses two ways to access messages. Message sequence numbers (MSN) and unique identifiers (UID). Whenever message is expunged, all the MSNs after it will be decreased by one. The problem is how to handle message expunges efficiently so that we minimize disk I/O at expunge time, but also keep MSN and UID lookups fast later. Easiest way to do this would be to simply compress away the expunged records by moving non-expunged records over them. This would keep MSN lookups always as a simple array lookup, but it might also mean a lot of disk I/O when expunging old messages in a large mailbox. What we do is build a binary tree of the expunged records. The binary tree records are stored over the expunged records, so they don't take any extra space. Each node in the btree contains the amount of expunge-nodes on it's left. That means that the next unexpunged record's MSN is the current_record_index+1 - left_node_count. By traversing the expunge tree we can find the wanted MSN pretty fast. UID lookups would be easiest to do using a binary search which just skips the expunged nodes. However nearly always we want the UID's MSN returned as well, so it gets slightly more complex. What we do is to traverse the expunge tree again and find the two nodes which contain our wanted UID between them, after that we'll do a binary search between them. MSN is again index+1 - left_node_count. The expunged records should still be compressed away once in a while. Something like when 10% of file contains expunged records, or when a lookup has to traverse through more than 10 nodes. There's several optimizations that can be done with this. All MSN lookups up until first expunged message are a simple array lookup. Expunging last few messages should rather be directly compressed instead of adding to expunge tree since they both require pretty much the same amount of disk I/O. The expunge tree should preferrably be somewhat balanced, but currently we don't do that. The balancing should anyway be done so that the last messages are the fastest to find, first messages the next fastest and messages in the middle the slowest. */ #include #include #include #include #define TRUE 1 #define FALSE 0 #define bufsize 12345 static size_t page_size = 4096; struct mail_index { unsigned char *buf; size_t size, used; unsigned int expunge_root_idx, count; }; struct mail_index_record { unsigned int flags; unsigned int uid; unsigned int offset; }; struct mail_index_expunge_record { unsigned int nodes_left; unsigned int left_idx; unsigned int right_idx; }; /*union mail_index_union_record { struct mail_index_record rec; struct mail_index_expunge_record erec; };*/ #define MAIL_FLAG_EXPUNGED (1U << 31) static void index_set_corrupted(struct mail_index *index, const char *fmt, ...) { va_list va; va_start(va, fmt); vfprintf(stderr, fmt, va); fprintf(stderr, "\n"); va_end(va); abort(); } static int remove_record(struct mail_index *index, unsigned int idx) { struct mail_index_record *rec = (struct mail_index_record *) index->buf; struct mail_index_expunge_record *erec, *new_erec; unsigned int cur_idx, idx_limit; idx_limit = index->size / sizeof(struct mail_index_record); assert(idx < idx_limit); assert((rec[idx].flags & MAIL_FLAG_EXPUNGED) == 0); index->count--; #if 0 if (index->size < page_size*2 || idx * sizeof(struct mail_index_record) >= index->size - page_size - (index->size % page_size)) { /* if it's only two pageful, just rewrite them */ /* FIXME: it may have expunged records already which we need to compress as well */ memmove(&rec[idx], &rec[idx+1], index->size - (idx+1) * sizeof(struct mail_index_record)); index->size -= sizeof(struct mail_index_record); return TRUE; } #endif new_erec = (struct mail_index_expunge_record *) &rec[idx]; if (idx > index->expunge_root_idx) { /* keep the root node always the highest */ erec = (struct mail_index_expunge_record *) &rec[index->expunge_root_idx]; new_erec->nodes_left = erec->nodes_left + 1; new_erec->left_idx = index->expunge_root_idx; new_erec->right_idx = (unsigned int)-1; index->expunge_root_idx = idx; return TRUE; } new_erec->nodes_left = MAIL_FLAG_EXPUNGED; new_erec->left_idx = (unsigned int)-1; new_erec->right_idx = (unsigned int)-1; if (index->expunge_root_idx == (unsigned int)-1) { index->expunge_root_idx = idx; return TRUE; } cur_idx = index->expunge_root_idx; for (;;) { if (cur_idx >= idx_limit) { index_set_corrupted(index, "Expunge node points outside index (%u >= %u)", idx, idx_limit); return FALSE; } erec = (struct mail_index_expunge_record *) &rec[cur_idx]; if (idx < cur_idx) { /* move left */ erec->nodes_left++; if (erec->left_idx == (unsigned int)-1) { erec->left_idx = idx; break; } cur_idx = erec->left_idx; } else { /* move right */ if (erec->right_idx == (unsigned int)-1) { erec->right_idx = idx; break; } cur_idx = erec->right_idx; } } return TRUE; } static struct mail_index_record * get_next(struct mail_index *index, unsigned int idx) { struct mail_index_record *rec = (struct mail_index_record *) index->buf; unsigned int idx_limit; idx_limit = index->size / sizeof(struct mail_index_record); if (idx >= idx_limit) { /* last one */ return NULL; } while (++idx < idx_limit) { if ((rec[idx].flags & MAIL_FLAG_EXPUNGED) == 0) return rec + idx; } return NULL; } static struct mail_index_record * get_prev(struct mail_index *index, unsigned int idx) { struct mail_index_record *rec, *first; if (idx == 0) return NULL; first = (struct mail_index_record *) index->buf; rec = ((struct mail_index_record *) index->buf) + idx - 1; while ((rec->flags & MAIL_FLAG_EXPUNGED) != 0) { if (rec == first) return NULL; rec--; } return rec; } static struct mail_index_record * get_nth_record(struct mail_index *index, unsigned int seq) { struct mail_index_expunge_record *erec; struct mail_index_record *rec, *rec_p; unsigned int idx, idx_limit, left_count, next_seq, cur_nodes_left; assert(seq != 0); if (seq > index->count) return NULL; rec_p = (struct mail_index_record *) index->buf; if (index->expunge_root_idx == (unsigned int)-1) return rec_p + (seq-1); idx_limit = index->size / sizeof(struct mail_index_record); idx = index->expunge_root_idx; left_count = 0; for (;;) { if (idx >= idx_limit) { index_set_corrupted(index, "Expunge node points outside index (%u >= %u)", idx, idx_limit); return NULL; } if ((rec_p[idx].flags & MAIL_FLAG_EXPUNGED) == 0) { index_set_corrupted(index, "Expunge node doesn't have EXPUNGE flag set"); return NULL; } erec = (struct mail_index_expunge_record *) &rec_p[idx]; cur_nodes_left = erec->nodes_left & 0x7fffffff; if (cur_nodes_left > idx + left_count) { index_set_corrupted(index, "Expunge left node counters corrupted " "(%u > %u + %u)", cur_nodes_left, idx, left_count); return NULL; } next_seq = idx - (left_count + cur_nodes_left) + 1; if (seq > next_seq) { /* it's right from us */ if (erec->right_idx == (unsigned int)-1) { rec = get_next(index, idx); return rec + (seq - next_seq); } if (erec->right_idx <= idx) { index_set_corrupted(index, "Invalid right expunge node (%u <= %u)", erec->right_idx, idx); return NULL; } left_count += cur_nodes_left + 1; idx = erec->right_idx; } else if (seq < next_seq) { /* it's left from us */ if (erec->left_idx == (unsigned int)-1) { rec = get_prev(index, idx); return rec - (next_seq - seq - 1); } if (erec->left_idx >= idx) { index_set_corrupted(index, "Invalid left expunge node (%u >= %u)", erec->left_idx, idx); return NULL; } idx = erec->left_idx; } else { rec = get_next(index, idx); if (rec == NULL) { index_set_corrupted(index, "No records found after idx %u", idx); } return rec; } } } static struct mail_index_record * get_uid_range(struct mail_index *index, unsigned int uid1, unsigned int uid2, unsigned int *seq_r) { struct mail_index_expunge_record *erec; struct mail_index_record *rec_p, *rec; unsigned int idx_limit, idx, left_idx, right_idx, nodes_left; unsigned int left_next_idx, right_prev_idx; assert(uid1 <= uid2); if (seq_r != NULL) *seq_r = 0; rec_p = (struct mail_index_record *) index->buf; idx_limit = index->size / sizeof(struct mail_index_record); idx = index->expunge_root_idx; nodes_left = 0; left_idx = right_idx = (unsigned int)-1; while (idx != (unsigned int)-1) { if (idx >= idx_limit) { index_set_corrupted(index, "Expunge node points outside index (%u >= %u)", idx, idx_limit); return NULL; } erec = (struct mail_index_expunge_record *) &rec_p[idx]; rec = get_prev(index, idx); if (rec == NULL || rec->uid < uid1) { left_idx = idx; nodes_left += (erec->nodes_left & 0x7fffffff) + 1; idx = erec->right_idx; } else { right_idx = idx; right_prev_idx = (unsigned int) (rec - rec_p); idx = erec->left_idx; } } if (left_idx == (unsigned int)-1) left_next_idx = 0; else { rec = get_next(index, left_idx); if (rec == NULL || rec->uid > uid2) return NULL; left_next_idx = (unsigned int) (rec - rec_p); } if (right_idx == (unsigned int)-1) right_prev_idx = idx_limit-1; /* FIXME: use binary search here */ for (idx = left_next_idx; idx <= right_prev_idx; idx++) { if ((rec->flags & MAIL_FLAG_EXPUNGED) != 0) { index_set_corrupted(index, "Unexpectedly found expunged record"); return NULL; } if (rec_p[idx].uid >= uid1) { if (rec_p[idx].uid > uid2) break; if (seq_r != NULL) *seq_r = idx + 1 - nodes_left; return rec_p + idx; } } return NULL; } static struct mail_index_record * get_uid_range_slow(struct mail_index *index, unsigned int uid1, unsigned int uid2, unsigned int *seq_r) { struct mail_index_record *rec = (struct mail_index_record *) index->buf; unsigned int seq, idx, idx_limit; idx_limit = index->size / sizeof(struct mail_index_record); seq = 0; for (idx = 0; idx < idx_limit; idx++) { if ((rec[idx].flags & MAIL_FLAG_EXPUNGED) == 0) { seq++; if (rec[idx].uid >= uid1) { if (rec[idx].uid > uid2) break; *seq_r = seq; return rec+idx; } } } *seq_r = 0; return NULL; } int main(int argc, char *argv[]) { struct mail_index index = { 0 }; struct mail_index_record *rec_p; unsigned int seq1, seq2, uid1, uid2; size_t i, j; index.expunge_root_idx = (unsigned int)-1; index.size = bufsize * sizeof(struct mail_index_record); printf("count = %u, malloc = %lu\n", bufsize, index.size); index.buf = calloc(1, index.size); rec_p = (struct mail_index_record *) index.buf; for (i = 0; i < bufsize; i++) rec_p[i].uid = i+1; index.count = bufsize; for (i = 0; i < bufsize-1; i++) { rec_p = get_nth_record(&index, rand() % index.count + 1); remove_record(&index, rec_p - (struct mail_index_record *) index.buf); for (j = 0; j < 100; j++) { seq1 = rand() % index.count + 1; uid1 = get_nth_record(&index, seq1)->uid; uid2 = get_uid_range(&index, uid1, uid1, &seq2)->uid; if (uid1 != uid2 || seq1 != seq2) { printf("seq %u -> uid %u, uid -> seq %u, uid %u\n", seq1, uid1, seq2, uid2); get_uid_range(&index, uid1, uid1, &seq2)->uid; abort(); } } for (j = 0; j < 100; j++) { uid1 = rand() % bufsize + 1; uid2 = rand() % (bufsize - uid1) + uid1; rec_p = get_uid_range(&index, uid1, uid2, &seq1); if (rec_p != NULL) { uid1 = rec_p->uid; uid2 = get_nth_record(&index, seq1)->uid; if (uid1 != uid2) { printf("uid: %u vs %u, seq = %u\n", uid1, uid2, seq1); abort(); } } rec_p = get_uid_range_slow(&index, uid1, uid2, &seq2); if (seq1 != seq2) { printf("slow way %u..%u: %u vs %u, uid = %u\n", uid1, uid2, seq1, seq2, rec_p->uid); get_uid_range(&index, uid1, uid2, &seq1); abort(); } } } assert(index.count == 1); return 0; }