[dovecot] PATCH Add support for kqueue in ioloop subsystem

Dominic Marks dom at cus.org.uk
Wed Apr 16 22:33:14 EEST 2003


Hey,

I noticed that there was an ioloop "module" (if we can call it that) for
select and poll and decided to add one for kqueue (aka kevent) BSDs high
performance descriptor multiplexing API. I haven't done any of the
configure glue stuff but the code is complete and works well. kqueue is
available on all recent versions of FreeBSD, NetBSD, OpenBSD and Darwin
(and therefore MacOS X). I've tested both pop3 and imap with no
problems.

To get it working: (bodge method)

> tar zxvf dovecot-0.99.8.1.tar.gz
> cp ioloop-kevent.c dovecot-0.99.8.1/src/lib/
> cd dovecot-0.99.8.1/
> ./configure
> vi config.h
   1. replace IOLOOP_POLL (or IOLOOP_SELECT) with IOLOOP_KEVENT
> vi src/lib/Makefile
   1. find liblib_a_SOURCES add ioloop-kevent.c to it
   2. find liblib_a_OBJECTS add ioloop-kevent.$(OBJEXT) to it
> make

There is a paper on kqueue which includes a performance evaulation here:

 http://people.freebsd.org/~jlemon/papers/kqueue.pdf

Thanks,
-- 
Dominic
 <dom at cus.org.uk> <d.marks at student.umist.ac.uk>
-------------- next part --------------
/*
 * ioloop-kevent.c : I/O loop handler using kevent(2)
 *
 *  Copyright (c) 2002, Dominic Marks <dom at cus.org.uk>
 *
 *  This code is placed in the public domain.
 */

#include "lib.h"
#include "ioloop-internal.h"

#ifdef IOLOOP_KEVENT

#include <sys/time.h>
#include <sys/types.h>
#include <sys/event.h>

#ifndef KEVENT_SET_SIZE
#  define KEVENT_SET_SIZE 16
#endif

struct ioloop_handler_data {
	int kq; /* kqueue descriptor */
	struct kevent event; /* a kevent struct which we pass around */
};

void io_loop_handler_init(struct ioloop *ioloop)
{
	struct ioloop_handler_data *data;

	ioloop->handler_data = data =
		p_new(ioloop->pool, struct ioloop_handler_data, 1);

	data->kq = kqueue();
	if (data->kq < 0) {
		i_fatal("couldn't initialise kqueue: %m");
	}

	memset(&data->event, 0, sizeof(struct kevent));
}

void io_loop_handler_deinit(struct ioloop *ioloop)
{
	p_free(ioloop->pool, ioloop->handler_data);
}

void io_loop_handle_add(struct ioloop *ioloop, int fd, int condition)
{
	short filter;
	struct ioloop_handler_data *data = ioloop->handler_data;

	i_assert(fd >= 0);

	if (condition & IO_READ)
		filter |= EVFILT_READ;
	if (condition & IO_WRITE)
		filter |= EVFILT_WRITE;

	EV_SET(&data->event, fd, filter, EV_ADD, 0, 0, NULL);
	if (kevent(data->kq, &data->event, 1, NULL, 0, NULL) < 0) {
		i_warning("couldn't add filter with kqueue: %m");
	}
}

void io_loop_handle_remove(struct ioloop *ioloop, int fd, int condition)
{
	short filter;
	struct ioloop_handler_data *data = ioloop->handler_data;

	i_assert(fd >= 0);

	if (condition & IO_READ)
		filter |= EVFILT_READ;
	if (condition & IO_WRITE)
		filter |= EVFILT_WRITE;

	EV_SET(&data->event, fd, filter, EV_DELETE, 0, 0,
		NULL);
	if (kevent(data->kq, &data->event, 1, NULL, 0, NULL) < 0) {
		/*
		 * changing this to i_fatal is debatable, however
		 * if you do so you will potentially expose the case
		 * where a process receives the client from a socket
		 * then closes the listening socket, experiences an
		 * error and calls exit, dropping the client we just
		 * picked up
		 */
		i_warning("couldn't remove filter with kqueue: %m");
	}
}

void io_loop_handler_run(struct ioloop *ioloop)
{
	struct ioloop_handler_data *data = ioloop->handler_data;
        struct timeval tv;
	struct timespec ts;
	struct io *io, *next;
	struct kevent kes[KEVENT_SET_SIZE], *kev;
	unsigned int t_id;
	int ret, id, mark;

        /* get the time left for next timeout task */
	io_loop_get_wait_time(ioloop->timeouts, &tv, NULL);

	/* convert struct timeval into struct timespec */
	TIMEVAL_TO_TIMESPEC(&tv, &ts);

	/* zero the event vector */
	memset(kes, 0, sizeof(struct kevent) * KEVENT_SET_SIZE);

	/* get any waiting kevents */
	ret = kevent(data->kq, NULL, 0, kes, KEVENT_SET_SIZE, &ts);
	if (ret < 0 && errno != EINTR)
		i_warning("kevent processing failed: %m");

	/* execute timeout handlers */
        io_loop_handle_timeouts(ioloop);

	if (ret <= 0 || !ioloop->running) {
                /* no I/O events */
		return;
	}

	/* execute the I/O handlers in prioritized order */
	for (io = ioloop->ios; io != NULL && ret > 0; io = next) {
		next = io->next;

		if (io->destroyed) {
			/* we were destroyed, and io->fd points to
			   -1 now, so we can't know if there was any
			   revents left. */
			io_destroy(ioloop, io);
			continue;
		}

		i_assert(io->fd >= 0);

		mark = 0;
		/*
		 * XXX
		 * This approach has its upsides and downsides. Because
		 * of the way kevent(2) works you can't interogate the
		 * kernel for the status of an arbitary descriptor, you
		 * can however retrieve all the waiting descriptors. This
		 * is great if you don't care what order they are processed
		 * in. However here we have to process them in io handler
		 * priority order, which means we then need to go through
		 * our events and see if we have a match.
		 *
		 * Removing processed events from the list would improve
		 * the situation, however for small values of KEVENT_SET_SIZE
		 * it would not be a significant optimization, and since we
		 * pass this routine regularly the number of returned events
		 * is typically going to be small.
		 */
		for (id = 0; id < ret; id++) {
			kev = &kes[id];
			if (kev->ident == io->fd) {
				if (io->condition & (IO_READ | IO_WRITE)) {
					if (!((kev->filter & EVFILT_READ) ||
						(kev->filter & EVFILT_WRITE))) {
						continue;
					}
				}
				if (io->condition & IO_READ) {
					if (!(kev->filter & EVFILT_READ))
						continue;
				}
				if (io->condition & IO_WRITE) {
					if (!(kev->filter & EVFILT_WRITE))
						continue;
				}
				mark = 1;
			}
		}
		if (mark == 0) continue; /* no condition was satisfied */

		t_id = t_push();
		io->callback(io->context);
		if (t_pop() != t_id)
			i_panic("Leaked a t_pop() call!");

		if (io->destroyed)
			io_destroy(ioloop, io);

		ret --;
	}
}

#endif


More information about the dovecot mailing list