[dovecot-cvs] dovecot/src/lib-storage/index index-mail-headers.c,NONE,1.1 Makefile.am,1.11,1.12 index-mail.c,1.16,1.17 index-mail.h,1.6,1.7

cras at procontrol.fi cras at procontrol.fi
Mon Aug 18 07:24:39 EEST 2003


Update of /home/cvs/dovecot/src/lib-storage/index
In directory danu:/tmp/cvs-serv24519/lib-storage/index

Modified Files:
	Makefile.am index-mail.c index-mail.h 
Added Files:
	index-mail-headers.c 
Log Message:
Changed how cached headers are stored. Previously when we added new headers,
all the old headers were duplicated as well which wasted space. Also fixed
several bugs in caching.



--- NEW FILE: index-mail-headers.c ---
/* Copyright (C) 2003 Timo Sirainen */

/*
   Headers are stored in 1-4 pieces. There's a list of header names that each
   piece contains, so if piece doesn't actually contain some listed header,
   it's known not to exist in the mail at all.

   Header name lists are stored in sorted order, so we can use binary
   searching.

   We have to be able to do 3 things:
    - Get value for one header
    - Get a list of headers, possibly containing more than requested
    - Save some of the uncached headers into cache

   First is easy. Second means that we have to store the wanted headers in
   a single string which we can directly return.

   Third is a bit tricky if we want to avoid parsing and copying the data
   uselessly. It's possible if we want to cache all requested uncached
   headers. That should be the common case, so I'll optimize for that.
   Another even more common case is that everything is already cached. So:

   - If we request only cached headers, parse them and copy only wanted
     headers to header_data.
   - If we request a non-cached header, trash the header_data and all
     pointers to it. Copy all cached headers to beginning if it and save
     a marker where it ends.
   - If we again request single cached header, we'll have to parse the
     header_data up to the marker again.
   - When saving the uncached headers, we know that they all come after the
     marker. If we want to save them all, it's directly there in a string.
     Otherwise we have to parse them and copy the wanted headers, but it's
     still less work.
*/

#include "lib.h"
#include "istream.h"
#include "buffer.h"
#include "str.h"
#include "message-date.h"
#include "imap-envelope.h"
#include "index-storage.h"
#include "index-mail.h"

#include <stdlib.h>

struct cached_header {
	const char *name;
	size_t value_idx; /* in header_data */

	unsigned int parsing:1;
	unsigned int fully_saved:1;
};

static struct cached_header *
cached_header_find(struct index_mail *mail, const char *name,
		   unsigned int *idx_r)
{
	struct cached_header **data;
	size_t size;
	unsigned int idx, left_idx, right_idx;
	int ret;

	data = buffer_get_modifyable_data(mail->data.headers, &size);

	idx = left_idx = 0;
	right_idx = size / sizeof(struct cached_header *);

	while (left_idx < right_idx) {
		idx = (left_idx + right_idx) / 2;

		ret = strcasecmp(data[idx]->name, name);
		if (ret < 0)
			left_idx = ++idx;
		else if (ret > 0)
			right_idx = idx;
		else {
			if (idx_r != NULL)
				*idx_r = idx;
			return data[idx];
		}
	}

	if (idx_r != NULL)
		*idx_r = idx;
	return NULL;
}

static struct cached_header *
cached_header_add(struct index_mail *mail, const char *name)
{
	struct cached_header *hdr;
	unsigned int idx;

	hdr = cached_header_find(mail, name, &idx);
	if (hdr != NULL)
		return hdr;

	hdr = p_new(mail->pool, struct cached_header, 1);
	hdr->name = p_strdup(mail->pool, name);

	buffer_insert(mail->data.headers, idx * sizeof(hdr), &hdr, sizeof(hdr));
	return hdr;
}

static int strcasecmp_p(const void *p1, const void *p2)
{
	char *const *s1 = p1, *const *s2 = p2;

	return strcasecmp(*s1, *s2);
}

static const char *const *sort_array(const char *const *arr)
{
	static const char *null = NULL;
	buffer_t *buffer;
	const char **data;
	int i, already_sorted;

	/* copy the wanted_headers array */
	buffer = buffer_create_dynamic(data_stack_pool, 256, (size_t)-1);
	already_sorted = TRUE;
	for (i = 0; arr[i] != NULL; i++) {
		if (i > 0 && already_sorted &&
		    strcasecmp(arr[i], arr[i-1]) >= 0)
			already_sorted = FALSE;
		buffer_append(buffer, &arr[i], sizeof(const char *));
	}
	buffer_append(buffer, &null, sizeof(const char *));

	/* and sort it */
	data = buffer_get_modifyable_data(buffer, NULL);
	if (!already_sorted)
		qsort(data, i, sizeof(const char *), strcasecmp_p);
	return data;
}

static int find_wanted_headers(struct mail_cache *cache,
			       const char *const wanted_headers[])
{
	const char *const *headers, *const *tmp;
	int i, ret, cmp;

	if (wanted_headers == NULL || *wanted_headers == NULL)
		return -1;

	wanted_headers = sort_array(wanted_headers);

	ret = -1;
	for (i = MAIL_CACHE_HEADERS_COUNT-1; i >= 0; i--) {
		headers = mail_cache_get_header_fields(cache, i);
		if (headers == NULL)
			continue;

		for (tmp = wanted_headers; *headers != NULL; headers++) {
			cmp = strcasecmp(*tmp, *headers);
			if (cmp == 0) {
				if (*++tmp == NULL)
					break;
			} else {
				if (cmp < 0)
					break;
			}
		}

		if (*tmp != NULL)
			return ret;

		/* find the minimum matching header number */
		ret = i;
	}

	return ret;
}

static int mail_find_wanted_headers(struct index_mail *mail,
				    const char *const wanted_headers[])
{
	int idx;

	idx = find_wanted_headers(mail->ibox->index->cache, wanted_headers);
	if (idx < 0)
		return -1;

	for (; idx < MAIL_CACHE_HEADERS_COUNT; idx++) {
		if ((mail->data.cached_fields &
		     mail_cache_header_fields[idx]) != 0)
			return idx;
	}

	return -1;
}

static const char *const *cached_header_get_names(struct index_mail *mail)
{
	const struct cached_header **data;
	const char *null = NULL;
	buffer_t *buffer;
	size_t i, size;

	data = buffer_get_modifyable_data(mail->data.headers, &size);
	size /= sizeof(struct cached_header *);

	buffer = buffer_create_dynamic(data_stack_pool, 128, (size_t)-1);
	for (i = 0; i < size; i++)
		buffer_append(buffer, &data[i]->name, sizeof(const char *));
	buffer_append(buffer, &null, sizeof(const char *));

	return buffer_get_data(buffer, NULL);
}

static void cached_headers_mark_fully_saved(struct index_mail *mail)
{
	struct cached_header **data;
	size_t i, size;

	data = buffer_get_modifyable_data(mail->data.headers, &size);
	size /= sizeof(struct cached_header *);

	for (i = 0; i < size; i++) {
		if (data[i]->parsing) {
			data[i]->parsing = FALSE;
			data[i]->fully_saved = TRUE;
		}
	}
}

void index_mail_parse_header_init(struct index_mail *mail,
				  const char *const headers[])
{
	struct cached_header **data;
	size_t i, size;
	int cmp;

	if (mail->data.header_data == NULL)
		mail->data.header_data = str_new(mail->pool, 4096);

	data = buffer_get_modifyable_data(mail->data.headers, &size);
	size /= sizeof(struct cached_header *);

	if (headers == NULL) {
		/* parsing all headers */
		for (i = 0; i < size; i++)
			data[i]->parsing = TRUE;
	} else {
		t_push();
		headers = sort_array(headers);
		for (i = 0; i < size && *headers != NULL; i++) {
			cmp = strcasecmp(*headers, data[i]->name);
			if (cmp <= 0) {
				if (cmp == 0)
					data[i]->parsing = TRUE;
				headers++;
			}
		}
		t_pop();
	}
}

void index_mail_parse_header(struct message_part *part __attr_unused__,
			     struct message_header_line *hdr, void *context)
{
	struct index_mail *mail = context;
	struct index_mail_data *data = &mail->data;
	struct cached_header *cached_hdr;

	if (data->save_envelope) {
		imap_envelope_parse_header(mail->pool,
					   &data->envelope_data, hdr);

		if (hdr == NULL) {
			/* finalize the envelope */
			string_t *str;

			str = str_new(mail->pool, 256);
			imap_envelope_write_part_data(data->envelope_data, str);
			data->envelope = str_c(str);
		}
	}

	if (hdr == NULL) {
		/* end of headers */
		if (data->save_sent_date) {
			/* not found */
			data->sent_date.time = 0;
			data->sent_date.timezone = 0;
			data->save_sent_date = FALSE;
		}
		if (data->sent_date.time != (time_t)-1) {
			index_mail_cache_add(mail, MAIL_CACHE_SENT_DATE,
					     &data->sent_date,
					     sizeof(data->sent_date));
		}

		cached_headers_mark_fully_saved(mail);
		return;
	}

	if (data->save_sent_date && strcasecmp(hdr->name, "Date") == 0) {
		if (hdr->continues)
			hdr->use_full_value = TRUE;
		else {
			if (!message_date_parse(hdr->full_value,
						hdr->full_value_len,
						&data->sent_date.time,
						&data->sent_date.timezone)) {
				/* 0 == parse error */
				data->sent_date.time = 0;
				data->sent_date.timezone = 0;
			}
			data->save_sent_date = FALSE;
		}
	}

	cached_hdr = cached_header_find(mail, hdr->name, NULL);
	if (cached_hdr != NULL && !cached_hdr->fully_saved) {
		if (data->header_stream == NULL) {
			if (!hdr->continued) {
				str_append(data->header_data, hdr->name);
				str_append(data->header_data, ": ");
			}
			if (cached_hdr->value_idx == 0) {
				cached_hdr->value_idx =
					str_len(data->header_data);
			}
			str_append_n(data->header_data,
				     hdr->value, hdr->value_len);
			if (!hdr->no_newline)
				str_append(data->header_data, "\n");
		} else {
			/* it's already in header_data. */
			if (cached_hdr->value_idx == 0) {
				cached_hdr->value_idx =
					data->header_stream->v_offset;
			}
		}
	}
}

static int index_mail_can_cache_headers(struct index_mail *mail)
{
	if ((mail->data.cached_fields &
	     mail_cache_header_fields[MAIL_CACHE_HEADERS_COUNT-1]) != 0)
		return FALSE; /* all headers used */

	/* FIXME: add some smart checks here. we don't necessarily want to
	   cache everything.. */

	if (!index_mail_cache_transaction_begin(mail))
		return FALSE;

	return TRUE;
}

static void cached_headers_clear_values(struct index_mail *mail)
{
	struct cached_header **data;
	size_t i, size, clear_offset;

	clear_offset = str_len(mail->data.header_data);
	data = buffer_get_modifyable_data(mail->data.headers, &size);
	size /= sizeof(struct cached_header *);

	for (i = 0; i < size; i++) {
		if (data[i]->value_idx >= clear_offset)
			data[i]->value_idx = 0;
	}
}

static int parse_cached_headers(struct index_mail *mail, int idx)
{
	struct index_mail_data *data = &mail->data;
	struct istream *istream;
	const char *str, *const *idx_headers;

	t_push();
	if (idx < data->header_data_cached) {
		/* it's already in header_data. */
		istream = i_stream_create_from_data(data_stack_pool,
						    str_data(data->header_data),
						    str_len(data->header_data));
		/* we might be parsing a bit more.. */
		idx = data->header_data_cached-1;
		data->header_stream = istream;
	} else {
		str = mail_cache_lookup_string_field(
			mail->ibox->index->cache, data->rec,
			mail_cache_header_fields[idx]);
		if (str == NULL) {
			/* broken - we expected the header to exist */
			t_pop();
			return FALSE;
		}

		data->header_data_cached_partial = TRUE;
		istream = i_stream_create_from_data(data_stack_pool,
						    str, strlen(str));
	}

	idx_headers = mail_cache_get_header_fields(mail->ibox->index->cache,
						   idx);
	i_assert(idx_headers != NULL);

	index_mail_parse_header_init(mail, idx_headers);
	message_parse_header(NULL, istream, NULL,
			     index_mail_parse_header, mail);

	data->header_stream = NULL;
	i_stream_unref(istream);
	t_pop();

	return TRUE;
}

int index_mail_parse_headers(struct index_mail *mail)
{
	struct mail_cache *cache = mail->ibox->index->cache;
	struct index_mail_data *data = &mail->data;
	const char *str, *const *headers;
	int idx, max;

	if (!index_mail_open_stream(mail, 0))
		return FALSE;

	if (mail->data.header_data == NULL)
		mail->data.header_data = str_new(mail->pool, 4096);

	/* can_cache_headers() locks the cache file. it must be done before
	   we can expect cached headers to stay the same. it's not a good idea
	   to cache some headers twice because of race conditions.. */
	if (!data->header_fully_parsed && index_mail_can_cache_headers(mail)) {
		if (data->header_data_cached_partial) {
			/* too difficult to handle efficiently, trash it */
			data->header_data_cached_partial = FALSE;
			data->header_data_cached =
				data->header_data_cached_contiguous;

			str_truncate(data->header_data,
				     data->header_data_uncached_offset);
			cached_headers_clear_values(mail);
		}

		/* add all cached headers to beginning of header_data */
                idx = data->header_data_cached; max = idx-1;
		for (; idx < MAIL_CACHE_HEADERS_COUNT; idx++) {
			str = mail_cache_lookup_string_field(cache, data->rec,
						mail_cache_header_fields[idx]);
			if (str == NULL)
				continue;

			max = idx;
			str_append(mail->data.header_data, str);
		}
		data->header_data_cached = max+1;
		data->header_data_uncached_offset =
			str_len(mail->data.header_data);

		/* make sure we cache everything */
		for (idx = MAIL_CACHE_HEADERS_COUNT-1; idx >= 0; idx--) {
			headers = mail_cache_get_header_fields(cache, idx);
			if (headers != NULL)
				break;
		}

		if (headers != NULL) {
			while (*headers != NULL) {
				cached_header_add(mail, *headers);
				headers++;
			}
		}

		if (max >= 0) {
			/* now we'll have to set value_idx for all headers that
			   are already cached */
			if (!parse_cached_headers(mail, max))
				return FALSE;
		}

		data->header_save = TRUE;
		data->header_save_idx = idx;
	}

	index_mail_parse_header_init(mail, NULL);
	message_parse_header(NULL, data->stream, &data->hdr_size,
			     index_mail_parse_header, mail);
	data->parse_header = FALSE;
	data->hdr_size_set = TRUE;
	data->header_fully_parsed = TRUE;

	return TRUE;
}

const char *index_mail_get_header(struct mail *_mail, const char *field)
{
	struct index_mail *mail = (struct index_mail *) _mail;
	struct cached_header *hdr;
	const char *arr[2];
	int idx;

	hdr = cached_header_add(mail, field);
	if (!hdr->fully_saved) {
		if (mail->data.parse_header) {
			/* we need to parse header anyway */
			idx = -1;
		} else {
			arr[0] = field; arr[1] = NULL;
			idx = mail_find_wanted_headers(mail, arr);

			if (idx >= 0) {
				if (!parse_cached_headers(mail, idx))
					return NULL;
			}
		}

		if (idx < 0) {
			index_mail_parse_headers(mail);

			/* might have been moved in memory, get it again */
			hdr = cached_header_find(mail, field, NULL);
		}
	}

	return hdr->value_idx == 0 ? NULL :
		t_strcut(str_c(mail->data.header_data) + hdr->value_idx, '\n');
}

struct istream *index_mail_get_headers(struct mail *_mail,
				       const char *const minimum_fields[])
{
	struct index_mail *mail = (struct index_mail *) _mail;
	struct index_mail_data *data = &mail->data;
	struct cached_header *hdr;
	const char *const *tmp, *str;
	int i, idx, all_saved;

	i_assert(*minimum_fields != NULL);

	if (mail->data.header_data == NULL)
		mail->data.header_data = str_new(mail->pool, 4096);

	idx = mail_find_wanted_headers(mail, minimum_fields);
	if (idx >= 0) {
		/* copy from cache to header_data */
		for (i = data->header_data_cached; i <= idx; i++) {
			str = mail_cache_lookup_string_field(
					mail->ibox->index->cache, data->rec,
					mail_cache_header_fields[i]);
			if (str == NULL)
				continue;

			str_append(data->header_data, str);
		}
		data->header_data_cached = idx+1;
		if (!data->header_data_cached_partial) {
			data->header_data_uncached_offset =
				str_len(data->header_data);
			data->header_data_cached_contiguous = idx+1;
		}
	} else {
		/* it's not cached yet - see if we have them parsed */
		all_saved = TRUE;
		for (tmp = minimum_fields; *tmp != NULL; tmp++) {
			hdr = cached_header_add(mail, *tmp);
			if (!hdr->fully_saved)
				all_saved = FALSE;
		}

		if (!all_saved)
			index_mail_parse_headers(mail);
	}

	return i_stream_create_from_data(mail->pool,
					 str_data(data->header_data),
					 str_len(data->header_data));
}

void index_mail_headers_init(struct index_mail *mail)
{
	struct mail_cache *cache = mail->ibox->index->cache;
	int idx = -2, idx2 = -2;

	if (mail->wanted_headers != NULL && *mail->wanted_headers != NULL)
		idx = find_wanted_headers(cache, mail->wanted_headers);

	if (idx != -1 && (mail->wanted_fields & MAIL_FETCH_IMAP_ENVELOPE))
		idx2 = find_wanted_headers(cache, imap_envelope_headers);

	mail->wanted_headers_idx = idx == -1 || idx2 == -1 ? -1 :
		idx > idx2 ? idx : idx2;
}

void index_mail_headers_init_next(struct index_mail *mail)
{
	struct index_mail_data *data = &mail->data;
	const char *const *tmp;
	int idx;

	mail->data.headers = buffer_create_dynamic(mail->pool, 64, (size_t)-1);

	idx = mail->wanted_headers_idx;
	if (mail->wanted_headers != NULL) {
		const char *const *tmp;

		for (tmp = mail->wanted_headers; *tmp != NULL; tmp++)
			cached_header_add(mail, *tmp);
	}

	if (mail->wanted_fields & MAIL_FETCH_IMAP_ENVELOPE) {
		for (tmp = imap_envelope_headers; *tmp != NULL; tmp++)
			cached_header_add(mail, *tmp);
	} else if ((mail->wanted_fields & MAIL_FETCH_DATE) &&
		   data->sent_date.time == (time_t)-1) {
		cached_header_add(mail, "Date");
		if (idx != -1) {
			/* see if it's cached */
			const char *headers[] = { "Date", NULL };
			idx = mail_find_wanted_headers(mail, headers);
		}
	}

	/* See if we're going to have to parse the header */
	if (idx != -2) {
		if (idx >= 0) {
			for (; idx < MAIL_CACHE_HEADERS_COUNT; idx++) {
				if ((data->cached_fields &
				     mail_cache_header_fields[idx]) != 0)
					break;
			}
		}
		if (idx < 0 || idx >= MAIL_CACHE_HEADERS_COUNT)
			data->parse_header = TRUE;
	}
}

static int find_unused_header_idx(struct mail_cache *cache)
{
	int i;

	for (i = 0; i < MAIL_CACHE_HEADERS_COUNT; i++) {
		if (mail_cache_get_header_fields(cache, i) == NULL)
			return i;
	}

	return -1;
}

void index_mail_headers_close(struct index_mail *mail)
{
	struct index_mail_data *data = &mail->data;
	const char *str, *const *headers;
	size_t len;
	int idx;

	if (!data->header_save)
		return;

	/* FIXME: this breaks if fetch_uid() and fetch/search are both
	   accessing headers from same message. index_mails should probably be
	   shared.. */
	headers = cached_header_get_names(mail);
	idx = find_wanted_headers(mail->ibox->index->cache, headers);
	if (idx >= 0) {
		/* all headers found */
                i_assert(idx == mail->data.header_save_idx);
	} else {
		/* there's some new headers */
		idx = find_unused_header_idx(mail->ibox->index->cache);
		if (idx < 0)
			return;

		if (!mail_cache_set_header_fields(mail->ibox->trans_ctx,
						  idx, headers))
			return;
	}

	str = str_c(mail->data.header_data) + data->header_data_uncached_offset;
	len = str_len(mail->data.header_data) -
		data->header_data_uncached_offset;

	mail_cache_add(mail->ibox->trans_ctx, data->rec,
		       mail_cache_header_fields[idx], str, len+1);
	data->header_save = FALSE;
}

Index: Makefile.am
===================================================================
RCS file: /home/cvs/dovecot/src/lib-storage/index/Makefile.am,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -d -r1.11 -r1.12
--- Makefile.am	7 Aug 2003 09:56:59 -0000	1.11
+++ Makefile.am	18 Aug 2003 03:24:37 -0000	1.12
@@ -14,6 +14,7 @@
 	index-expunge.c \
 	index-fetch.c \
 	index-mail.c \
+	index-mail-headers.c \
 	index-mailbox-check.c \
 	index-messageset.c \
 	index-search.c \

Index: index-mail.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-storage/index/index-mail.c,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -d -r1.16 -r1.17
--- index-mail.c	13 Aug 2003 22:24:32 -0000	1.16
+++ index-mail.c	18 Aug 2003 03:24:37 -0000	1.17
@@ -1,16 +1,13 @@
-/* Copyright (C) 2002 Timo Sirainen */
+/* Copyright (C) 2002-2003 Timo Sirainen */
 
 #include "lib.h"
 #include "buffer.h"
 #include "istream.h"
 #include "str.h"
-#include "message-address.h"
 #include "message-date.h"
 #include "message-part-serialize.h"
 #include "imap-bodystructure.h"
 #include "imap-envelope.h"
-#include "mail-index.h"
-#include "mail-index-util.h"
 #include "mail-custom-flags.h"
 #include "mail-cache.h"
 #include "index-storage.h"
@@ -32,8 +29,6 @@
 		return NULL;
 	}
 
-	// FIXME: for non-multipart messages we could build it
-
 	if (!mail_cache_lookup_field(mail->ibox->index->cache, mail->data.rec,
 				     MAIL_CACHE_MESSAGEPART,
 				     &part_data, &part_size)) {
@@ -126,7 +121,7 @@
 	}
 }
 
-static int index_mail_cache_transaction_begin(struct index_mail *mail)
+int index_mail_cache_transaction_begin(struct index_mail *mail)
 {
 	if (mail->ibox->trans_ctx != NULL)
 		return TRUE;
@@ -159,9 +154,8 @@
 	return TRUE;
 }
 
-static void index_mail_cache_add(struct index_mail *mail,
-				 enum mail_cache_field field,
-				 const void *data, size_t size)
+void index_mail_cache_add(struct index_mail *mail, enum mail_cache_field field,
+			  const void *data, size_t size)
 {
 	struct index_mailbox *ibox = mail->ibox;
 
@@ -175,7 +169,7 @@
 	mail->data.cached_fields |= field;
 }
 
-static int open_stream(struct index_mail *mail, uoff_t position)
+int index_mail_open_stream(struct index_mail *mail, uoff_t position)
 {
 	struct index_mail_data *data = &mail->data;
 	int deleted;
@@ -200,269 +194,6 @@
 	return TRUE;
 }
 
-static int find_wanted_headers(struct mail_cache *cache,
-			       const char *const wanted_headers[])
-{
-	const char *const *headers, *const *tmp, *const *tmp2;
-	int i;
-
-	if (wanted_headers == NULL || *wanted_headers == NULL)
-		return -1;
-
-	for (i = MAIL_CACHE_HEADERS_COUNT-1; i >= 0; i--) {
-		headers = mail_cache_get_header_fields(cache, i);
-		if (headers == NULL)
-			continue;
-
-		for (tmp = wanted_headers; *tmp != NULL; tmp++) {
-			for (tmp2 = headers; *tmp2 != NULL; tmp2++) {
-				if (strcasecmp(*tmp2, *tmp) == 0)
-					break;
-			}
-
-			if (*tmp2 == NULL)
-				break;
-		}
-
-		if (*tmp == NULL)
-			return i;
-	}
-
-	return -1;
-}
-
-static struct cached_header *
-find_cached_header(struct index_mail *mail, const char *name, size_t len)
-{
-	struct cached_header *hdr;
-
-	for (hdr = mail->data.headers; hdr != NULL; hdr = hdr->next) {
-		if (len == hdr->name_len &&
-		    memcasecmp(hdr->name, name, len) == 0)
-			return hdr;
-	}
-
-	return NULL;
-}
-
-void index_mail_parse_header(struct message_part *part __attr_unused__,
-			     struct message_header_line *hdr, void *context)
-{
-	struct index_mail *mail = context;
-	struct index_mail_data *data = &mail->data;
-	struct cached_header *cached_hdr;
-
-	if (data->save_envelope) {
-		imap_envelope_parse_header(mail->pool,
-					   &data->envelope_data, hdr);
-
-		if (hdr == NULL) {
-			/* finalize the envelope */
-			string_t *str;
-
-			str = str_new(mail->pool, 256);
-			imap_envelope_write_part_data(data->envelope_data, str);
-			data->envelope = str_c(str);
-		}
-	}
-
-	if (hdr == NULL) {
-		/* end of headers */
-		if (data->save_sent_date) {
-			/* not found */
-			data->sent_date.time = 0;
-			data->sent_date.timezone = 0;
-			data->save_sent_date = FALSE;
-		}
-		if (data->sent_date.time != (time_t)-1) {
-			index_mail_cache_add(mail, MAIL_CACHE_SENT_DATE,
-					     &data->sent_date,
-					     sizeof(data->sent_date));
-		}
-
-		/* mark parsed headers as fully saved */
-                cached_hdr = data->headers;
-		for (; cached_hdr != NULL; cached_hdr = cached_hdr->next) {
-			if (cached_hdr->parsing) {
-				cached_hdr->parsing = FALSE;
-				cached_hdr->fully_saved = TRUE;
-			}
-		}
-		return;
-	}
-
-	if (data->save_sent_date && strcasecmp(hdr->name, "Date") == 0) {
-		if (hdr->continues) {
-			hdr->use_full_value = TRUE;
-			return;
-		}
-		if (!message_date_parse(hdr->full_value, hdr->full_value_len,
-					&data->sent_date.time,
-					&data->sent_date.timezone)) {
-			/* 0 == parse error */
-			data->sent_date.time = 0;
-			data->sent_date.timezone = 0;
-		}
-		data->save_sent_date = FALSE;
-	}
-
-	cached_hdr = find_cached_header(mail, hdr->name, hdr->name_len);
-	if (cached_hdr != NULL && !cached_hdr->fully_saved) {
-		if (!hdr->continued) {
-			str_append(data->header_data, hdr->name);
-			str_append(data->header_data, ": ");
-		}
-		if (cached_hdr->value_idx == 0)
-			cached_hdr->value_idx = str_len(data->header_data);
-		str_append_n(data->header_data, hdr->value, hdr->value_len);
-		if (!hdr->no_newline)
-			str_append(data->header_data, "\n");
-	}
-}
-
-static struct cached_header *
-add_cached_header(struct index_mail *mail, const char *name)
-{
-	struct cached_header *hdr;
-
-	i_assert(*name != '\0');
-
-	hdr = find_cached_header(mail, name, strlen(name));
-	if (hdr != NULL)
-		return hdr;
-
-	hdr = p_new(mail->pool, struct cached_header, 1);
-	hdr->name = p_strdup(mail->pool, name);
-	hdr->name_len = strlen(name);
-
-	hdr->next = mail->data.headers;
-	mail->data.headers = hdr;
-
-	return hdr;
-}
-
-static const char *const *get_header_names(struct cached_header *hdr)
-{
-	const char *null = NULL;
-	buffer_t *buffer;
-
-	buffer = buffer_create_dynamic(data_stack_pool, 128, (size_t)-1);
-	for (; hdr != NULL; hdr = hdr->next)
-		buffer_append(buffer, &hdr->name, sizeof(const char *));
-	buffer_append(buffer, &null, sizeof(const char *));
-
-	return buffer_get_data(buffer, NULL);
-}
-
-static int find_unused_header_idx(struct mail_cache *cache)
-{
-	int i;
-
-	for (i = 0; i < MAIL_CACHE_HEADERS_COUNT; i++) {
-		if (mail_cache_get_header_fields(cache, i) == NULL)
-			return i;
-	}
-	return -1;
-}
-
-void index_mail_parse_header_init(struct index_mail *mail,
-				  const char *const *headers)
-{
-	struct cached_header *hdr;
-	const char *const *tmp;
-
-	if (mail->data.header_data == NULL)
-		mail->data.header_data = str_new(mail->pool, 4096);
-
-	if (headers == NULL) {
-		/* parsing all headers */
-		for (hdr = mail->data.headers; hdr != NULL; hdr = hdr->next)
-			hdr->parsing = TRUE;
-	} else {
-		for (hdr = mail->data.headers; hdr != NULL; hdr = hdr->next) {
-			for (tmp = headers; *tmp != NULL; tmp++) {
-				if (strcasecmp(*tmp, hdr->name) == 0)
-					hdr->parsing = TRUE;
-			}
-		}
-	}
-}
-
-static int parse_header(struct index_mail *mail)
-{
-	struct mail_cache *cache = mail->ibox->index->cache;
-	const char *const *headers, *const *tmp;
-	int idx;
-
-	if (!open_stream(mail, 0))
-		return FALSE;
-
-	if (mail->data.save_cached_headers) {
-		/* we want to save some of the headers. that means we'll have
-		   to save all the headers in that group. if we're creating a
-		   new group, save all the headers in previous group in it
-		   too. */
-		idx = mail->data.save_header_idx;
-		if (idx < 0) {
-			/* can we reuse existing? */
-			headers = get_header_names(mail->data.headers);
-			idx = find_wanted_headers(cache, headers);
-			if (idx >= 0)
-				mail->data.save_header_idx = idx;
-		}
-		if (idx < 0) {
-			idx = find_unused_header_idx(cache);
-			idx--; /* include all previous headers too */
-		}
-
-		headers = idx < 0 ? NULL :
-			mail_cache_get_header_fields(cache, idx);
-
-		if (headers != NULL) {
-			for (tmp = headers; *tmp != NULL; tmp++)
-				add_cached_header(mail, *tmp);
-		}
-	}
-
-	index_mail_parse_header_init(mail, NULL);
-	message_parse_header(NULL, mail->data.stream, &mail->data.hdr_size,
-			     index_mail_parse_header, mail);
-	mail->data.parse_header = FALSE;
-	mail->data.headers_read = TRUE;
-	mail->data.hdr_size_set = TRUE;
-
-	return TRUE;
-}
-
-static int parse_cached_header(struct index_mail *mail, int idx)
-{
-	struct istream *istream;
-	const char *str, *const *idx_headers;
-
-	idx_headers = mail_cache_get_header_fields(mail->ibox->index->cache,
-						   idx);
-	i_assert(idx_headers != NULL);
-
-	str = mail_cache_lookup_string_field(mail->ibox->index->cache,
-					     mail->data.rec,
-					     mail_cache_header_fields[idx]);
-	if (str == NULL)
-		return FALSE;
-
-	t_push();
-	istream = i_stream_create_from_data(data_stack_pool, str, strlen(str));
-	index_mail_parse_header_init(mail, idx_headers);
-	message_parse_header(NULL, istream, NULL,
-			     index_mail_parse_header, mail);
-
-	i_stream_unref(istream);
-	t_pop();
-
-	if (idx == mail->data.header_idx)
-		mail->data.headers_read = TRUE;
-	return TRUE;
-}
-
 static const struct mail_full_flags *get_flags(struct mail *_mail)
 {
 	struct index_mail *mail = (struct index_mail *) _mail;
@@ -496,7 +227,7 @@
 			return data->parts;
 	}
 
-	if (!open_stream(mail, 0))
+	if (!index_mail_open_stream(mail, 0))
 		return NULL;
 
 	data->parts = message_parse(mail->pool, data->stream,
@@ -551,10 +282,9 @@
 
 static time_t get_date(struct mail *_mail, int *timezone)
 {
-	static const char *date_headers[] = { "Date", NULL };
 	struct index_mail *mail = (struct index_mail *) _mail;
 	struct index_mail_data *data = &mail->data;
-	int idx;
+	const char *str;
 
 	if (data->sent_date.time != (time_t)-1) {
 		if (timezone != NULL)
@@ -566,20 +296,20 @@
 		get_cached_sent_date(mail, &data->sent_date);
 
 	if (data->sent_date.time == (time_t)-1) {
-		idx = data->parse_header ? -1 :
-			find_wanted_headers(mail->ibox->index->cache,
-					    date_headers);
-
 		data->save_sent_date = TRUE;
-		if (idx >= 0) {
-			if (!parse_cached_header(mail, idx))
-				idx = -1;
+		str = _mail->get_header(_mail, "Date");
+		if (data->sent_date.time == (time_t)-1) {
+			if (!message_date_parse(str, (size_t)-1,
+						&data->sent_date.time,
+						&data->sent_date.timezone)) {
+				/* 0 == parse error */
+				data->sent_date.time = 0;
+				data->sent_date.timezone = 0;
+			}
+			index_mail_cache_add(mail, MAIL_CACHE_SENT_DATE,
+					     &data->sent_date,
+					     sizeof(data->sent_date));
 		}
-		if (idx < 0)
-			parse_header(mail);
-
-		index_mail_cache_add(mail, MAIL_CACHE_SENT_DATE,
-				     &data->sent_date, sizeof(data->sent_date));
 	}
 
 	if (timezone != NULL)
@@ -666,8 +396,9 @@
 	}
 
 	if (!get_msgpart_sizes(mail)) {
+		/* this gives us header size for free */
 		if (data->parse_header)
-			parse_header(mail);
+			index_mail_parse_headers(mail);
 	}
 
 	hdr_size = data->hdr_size_set ?
@@ -695,7 +426,8 @@
 	/* have to parse, slow.. */
 	hdr_phys_size = hdr_size != (uoff_t)-1 && data->hdr_size_set ?
 		data->hdr_size.physical_size : (uoff_t)-1;
-	if (!open_stream(mail, hdr_phys_size != (uoff_t)-1 ? hdr_phys_size : 0))
+	if (!index_mail_open_stream(mail, hdr_phys_size != (uoff_t)-1 ?
+				    hdr_phys_size : 0))
 		return (uoff_t)-1;
 
 	if (hdr_phys_size == (uoff_t)-1) {
@@ -717,97 +449,6 @@
 	return data->size;
 }
 
-static const char *get_header(struct mail *_mail, const char *field)
-{
-	struct index_mail *mail = (struct index_mail *) _mail;
-	struct cached_header *hdr;
-	size_t field_len;
-	int idx;
-
-	field_len = strlen(field);
-	hdr = find_cached_header(mail, field, field_len);
-	if (hdr == NULL) {
-		/* not wanted initially, add it and check if we can
-		   get it from cache */
-		const char *headers[2];
-
-		hdr = add_cached_header(mail, field);
-
-		headers[0] = field; headers[1] = NULL;
-		idx = find_wanted_headers(mail->ibox->index->cache, headers);
-	} else {
-		idx = mail->data.header_idx;
-	}
-
-	if (!hdr->fully_saved) {
-		if (idx >= 0) {
-			if (!parse_cached_header(mail, idx))
-				idx = -1;
-		}
-		if (idx < 0) {
-			mail->data.save_cached_headers = TRUE;
-			parse_header(mail);
-		}
-
-		hdr = find_cached_header(mail, field, field_len);
-	}
-
-	return hdr->value_idx == 0 ? NULL :
-		t_strcut(str_c(mail->data.header_data) + hdr->value_idx, '\n');
-}
-
-static struct istream *get_headers(struct mail *_mail,
-				   const char *const minimum_fields[])
-{
-	struct index_mail *mail = (struct index_mail *) _mail;
-	struct cached_header *hdr;
-	const char *const *tmp, *str;
-	int idx, all_exists, all_saved;
-
-	i_assert(*minimum_fields != NULL);
-
-	all_exists = all_saved = TRUE;
-	for (tmp = minimum_fields; *tmp != NULL; tmp++) {
-		hdr = find_cached_header(mail, *tmp, strlen(*tmp));
-		if (hdr == NULL) {
-			add_cached_header(mail, *tmp);
-			all_exists = FALSE;
-		} else if (!hdr->fully_saved)
-			all_saved = FALSE;
-	}
-
-	if (all_exists) {
-		if (all_saved) {
-			return i_stream_create_from_data(mail->pool,
-					str_data(mail->data.header_data),
-					str_len(mail->data.header_data));
-		}
-
-		idx = mail->data.header_idx;
-	} else {
-		idx = find_wanted_headers(mail->ibox->index->cache,
-					  get_header_names(mail->data.headers));
-	}
-
-	if (idx >= 0) {
-		/* everything should be cached */
-		str = mail_cache_lookup_string_field(mail->ibox->index->cache,
-				mail->data.rec, mail_cache_header_fields[idx]);
-		if (str != NULL) {
-			return i_stream_create_from_data(mail->pool,
-							 str, strlen(str));
-		}
-	}
-	if (idx < 0) {
-		mail->data.save_cached_headers = TRUE;
-		parse_header(mail);
-	}
-
-	return i_stream_create_from_data(mail->pool,
-					 str_data(mail->data.header_data),
-					 str_len(mail->data.header_data));
-}
-
 static struct istream *get_stream(struct mail *_mail,
 				  struct message_size *hdr_size,
 				  struct message_size *body_size)
@@ -815,7 +456,7 @@
 	struct index_mail *mail = (struct index_mail *) _mail;
 	struct index_mail_data *data = &mail->data;
 
-	if (!open_stream(mail, 0))
+	if (!index_mail_open_stream(mail, 0))
 		return NULL;
 
 	if (hdr_size != NULL || body_size != NULL) {
@@ -858,7 +499,6 @@
 	struct mail_cache *cache = mail->ibox->index->cache;
 	enum mail_cache_field cache_field;
 	char *str;
-	int i, idx;
 
 	switch (field) {
 	case MAIL_FETCH_IMAP_BODY:
@@ -894,7 +534,7 @@
 			return data->body;
 		}
 
-		if (!open_stream(mail, 0))
+		if (!index_mail_open_stream(mail, 0))
 			return NULL;
 
 		if (data->parts == NULL)
@@ -922,34 +562,8 @@
 		if (data->envelope != NULL)
 			return data->envelope;
 
-		if (data->parse_header) {
-			data->save_envelope = TRUE;
-			parse_header(mail);
-			return data->envelope;
-		}
-
-		if (data->save_envelope) {
-			/* it was in wanted_fields, header_idx should be
-			   correct */
-			idx = data->header_idx;
-			i_assert(idx >= 0);
-		} else {
-			idx = find_wanted_headers(cache, imap_envelope_headers);
-		}
-
 		data->save_envelope = TRUE;
-		if (idx >= 0) {
-			if (!parse_cached_header(mail, idx))
-				idx = -1;
-		}
-		if (idx < 0) {
-			for (i = 0; imap_envelope_headers[i] != NULL; i++) {
-				add_cached_header(mail,
-						  imap_envelope_headers[i]);
-			}
-			data->save_cached_headers = TRUE;
-			parse_header(mail);
-		}
+		(void)_mail->get_header(_mail, "Date");
 		return data->envelope;
 	default:
 		i_unreached();
@@ -965,8 +579,8 @@
 	get_received_date,
 	get_date,
 	get_size,
-	get_header,
-	get_headers,
+	index_mail_get_header,
+	index_mail_get_headers,
 	get_stream,
 	get_special,
 	index_storage_update_flags,
@@ -981,54 +595,24 @@
 	mail->mail = index_mail;
 	mail->mail.box = &ibox->box;
 
-	mail->wanted_headers_idx =
-		find_wanted_headers(ibox->index->cache, wanted_headers);
-
 	mail->pool = pool_alloconly_create("index_mail", 16384);
 	mail->ibox = ibox;
 	mail->wanted_fields = wanted_fields;
 	mail->wanted_headers = wanted_headers;
 	mail->expunge_counter = ibox->index->expunge_counter;
 
+	index_mail_headers_init(mail);
+
 	if (ibox->mail_init != NULL)
 		ibox->mail_init(mail);
 }
 
 static void index_mail_close(struct index_mail *mail)
 {
-	struct index_mail_data *data = &mail->data;
-	const char *const *headers;
-
-	if (data->stream != NULL)
-		i_stream_unref(data->stream);
-
-	if (!data->save_cached_headers || !data->headers_read)
-		return;
-
-	/* save cached headers - FIXME: this breaks if fetch_uid() and
-	   fetch/search are both accessing headers from same message.
-	   index_mails should probably be shared.. */
-	if (!index_mail_cache_transaction_begin(mail))
-		return;
-
-	if (data->save_header_idx < 0) {
-		data->save_header_idx =
-			find_unused_header_idx(mail->ibox->index->cache);
-		if (data->save_header_idx < 0)
-			return;
-
-                headers = get_header_names(data->headers);
-		if (!mail_cache_set_header_fields(mail->ibox->trans_ctx,
-						  data->save_header_idx,
-						  headers))
-			return;
-	}
+	if (mail->data.stream != NULL)
+		i_stream_unref(mail->data.stream);
 
-	mail_cache_add(mail->ibox->trans_ctx, data->rec,
-		       mail_cache_header_fields[data->save_header_idx],
-		       str_c(mail->data.header_data),
-		       str_len(mail->data.header_data)+1);
-	data->save_cached_headers = FALSE;
+	index_mail_headers_close(mail);
 }
 
 int index_mail_next(struct index_mail *mail, struct mail_index_record *rec,
@@ -1037,7 +621,7 @@
 	struct mail_index *index = mail->ibox->index;
 	struct index_mail_data *data = &mail->data;
         enum mail_index_record_flag index_flags;
-	int i, ret, open_mail, only_wanted_headers;
+	int ret, open_mail;
 
 	i_assert(mail->expunge_counter == index->expunge_counter);
 
@@ -1097,51 +681,10 @@
 					  MAIL_FETCH_STREAM_BODY))
 		open_mail = TRUE;
 
-	/* check headers */
-	if (mail->wanted_fields & MAIL_FETCH_IMAP_ENVELOPE) {
-		for (i = 0; imap_envelope_headers[i] != NULL; i++) 
-			add_cached_header(mail, imap_envelope_headers[i]);
-	} else if ((mail->wanted_fields & MAIL_FETCH_DATE) &&
-		   data->sent_date.time == (time_t)-1)
-		add_cached_header(mail, "Date");
-
-	only_wanted_headers = mail->data.headers == NULL;
-	if (mail->wanted_headers != NULL) {
-		const char *const *tmp;
-
-		for (tmp = mail->wanted_headers; *tmp != NULL; tmp++)
-			add_cached_header(mail, *tmp);
-	}
-
-	data->save_header_idx = -1;
-	if (data->headers != NULL &&
-	    (mail->wanted_fields & (MAIL_FETCH_STREAM_HEADER |
-				    MAIL_FETCH_STREAM_BODY)) == 0) {
-		/* we're not explicitly opening the file, caching the headers
-		   could be a good idea if they're not already cached */
-		if (only_wanted_headers) {
-			/* no extra headers, we already know if it's indexed */
-			data->header_idx = mail->wanted_headers_idx;
-		} else {
-			const char *const *headers;
-
-			headers = get_header_names(data->headers);
-			data->header_idx =
-				find_wanted_headers(index->cache, headers);
-		}
-		if (data->header_idx == -1 ||
-		    (data->cached_fields &
-		     mail_cache_header_fields[data->header_idx]) == 0) {
-			data->save_cached_headers = TRUE;
-			data->parse_header = TRUE;
-			data->save_header_idx = data->header_idx;
-		}
-	} else {
-		data->header_idx = -1;
-	}
+        index_mail_headers_init_next(mail);
 
 	if ((open_mail || data->parse_header) && !delay_open) {
-		if (!open_stream(mail, 0))
+		if (!index_mail_open_stream(mail, 0))
 			ret = data->deleted ? 0 : -1;
 		else
 			ret = 1;

Index: index-mail.h
===================================================================
RCS file: /home/cvs/dovecot/src/lib-storage/index/index-mail.h,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- index-mail.h	6 Aug 2003 20:15:33 -0000	1.6
+++ index-mail.h	18 Aug 2003 03:24:37 -0000	1.7
@@ -6,17 +6,6 @@
 
 struct message_header_line;
 
-struct cached_header {
-	struct cached_header *next;
-
-	size_t name_len;
-	const char *name;
-	size_t value_idx; /* in header_data */
-
-	unsigned int parsing:1;
-	unsigned int fully_saved:1;
-};
-
 struct index_mail_data {
 	struct mail_full_flags flags;
 	time_t date, received_date;
@@ -25,9 +14,12 @@
 	enum mail_cache_field cached_fields;
 	struct mail_sent_date sent_date;
 
-	struct cached_header *headers;
+	buffer_t *headers;
 	string_t *header_data;
-	int header_idx, save_header_idx;
+	int header_data_cached, header_data_cached_contiguous;
+	size_t header_data_uncached_offset;
+	struct istream *header_stream;
+	int header_save_idx;
 
 	struct message_part *parts;
 	const char *envelope, *body, *bodystructure;
@@ -40,13 +32,14 @@
         struct message_size hdr_size, body_size;
 
 	unsigned int parse_header:1;
-	unsigned int headers_read:1;
-	unsigned int save_cached_headers:1;
-	unsigned int save_sent_date:1;
 	unsigned int save_envelope:1;
+	unsigned int save_sent_date:1;
 	unsigned int hdr_size_set:1;
 	unsigned int body_size_set:1;
 	unsigned int deleted:1;
+	unsigned int header_data_cached_partial:1;
+	unsigned int header_fully_parsed:1;
+	unsigned int header_save:1;
 };
 
 struct index_mail {
@@ -71,8 +64,23 @@
 void index_mail_deinit(struct index_mail *mail);
 
 void index_mail_parse_header_init(struct index_mail *mail,
-				  const char *const *headers);
+				  const char *const headers[]);
 void index_mail_parse_header(struct message_part *part,
 			     struct message_header_line *hdr, void *context);
+
+int index_mail_cache_transaction_begin(struct index_mail *mail);
+void index_mail_cache_add(struct index_mail *mail, enum mail_cache_field field,
+			  const void *data, size_t size);
+
+int index_mail_open_stream(struct index_mail *mail, uoff_t position);
+int index_mail_parse_headers(struct index_mail *mail);
+
+void index_mail_headers_init(struct index_mail *mail);
+void index_mail_headers_init_next(struct index_mail *mail);
+void index_mail_headers_close(struct index_mail *mail);
+
+const char *index_mail_get_header(struct mail *_mail, const char *field);
+struct istream *index_mail_get_headers(struct mail *_mail,
+				       const char *const minimum_fields[]);
 
 #endif



More information about the dovecot-cvs mailing list