[Dovecot] [PATCH] [RFC] epoll based ioloop handler (now with patch)

Andrey Panin pazke at donpac.ru
Mon Aug 30 16:10:30 EEST 2004


Sorry, I forgot to attach patch to previous mail.

-- 
Andrey Panin		| Linux and UNIX system administrator
pazke at donpac.ru		| PGP key: wwwkeys.pgp.net
-------------- next part --------------
diff -urpNX /usr/share/dontdiff dovecot-cvs.vanilla/configure.in dovecot-cvs/configure.in
--- dovecot-cvs.vanilla/configure.in	2004-08-09 23:14:02.000000000 +0400
+++ dovecot-cvs/configure.in	2004-08-25 19:37:47.000000000 +0400
@@ -265,10 +265,14 @@ AC_CHECK_FUNCS(fcntl flock lockf inet_at
 
 dnl * poll/select?
 
-AC_CHECK_FUNC(poll, [
-	AC_DEFINE(IOLOOP_POLL,, Implement I/O loop with poll())
-], [
-	AC_DEFINE(IOLOOP_SELECT,, Implement I/O loop with select())
+AC_CHECK_FUNC(epoll_create, [
+		AC_DEFINE(IOLOOP_EPOLL,, Implement I/O loop with Linux 2.6 epoll())
+	], [
+		AC_CHECK_FUNC(poll, [
+			AC_DEFINE(IOLOOP_POLL,, Implement I/O loop with poll())
+		], [
+			AC_DEFINE(IOLOOP_SELECT,, Implement I/O loop with select())
+		])
 ])
 
 dnl * dnotify?
diff -urpNX /usr/share/dontdiff dovecot-cvs.vanilla/src/lib/ioloop-epoll.c dovecot-cvs/src/lib/ioloop-epoll.c
--- dovecot-cvs.vanilla/src/lib/ioloop-epoll.c	1970-01-01 03:00:00.000000000 +0300
+++ dovecot-cvs/src/lib/ioloop-epoll.c	2004-08-29 16:59:22.000000000 +0400
@@ -0,0 +1,248 @@
+/*
+ * Linux epoll() based ioloop handler.
+ *
+ * Copyright (c) 2004 Andrey Panin <pazke at donpac.ru>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published 
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/* @UNSAFE: whole file */
+
+#include "lib.h"
+#include "ioloop-internal.h"
+
+#ifdef IOLOOP_EPOLL
+
+#include <sys/epoll.h>
+#include <unistd.h>
+
+#define INITIAL_EPOLL_EVENTS	128
+#define EPOLL_IOS_PER_FD	2
+
+struct ioloop_handler_data {
+	int epfd;
+	int events_size, events_pos;
+	struct epoll_event *events;
+
+	unsigned int idx_size;
+	struct io_list **fd_index;
+};
+
+struct io_list {
+	int count;
+	struct io *ios[EPOLL_IOS_PER_FD];
+};
+
+void io_loop_handler_init(struct ioloop *ioloop)
+{
+	struct ioloop_handler_data *data;
+
+	ioloop->handler_data = data =
+		p_new(ioloop->pool, struct ioloop_handler_data, 1);
+
+	data->events_pos = 0;
+	data->events_size = INITIAL_EPOLL_EVENTS;
+	data->events = p_new(ioloop->pool, struct epoll_event,
+			     data->events_size);
+
+	data->idx_size = INITIAL_EPOLL_EVENTS;
+	data->fd_index = p_new(ioloop->pool, struct io_list *, data->idx_size);
+
+	data->epfd = epoll_create(INITIAL_EPOLL_EVENTS);
+}
+
+void io_loop_handler_deinit(struct ioloop *ioloop)
+{
+	struct ioloop_handler_data *data = ioloop->handler_data;
+
+	close(data->epfd);
+	p_free(ioloop->pool, ioloop->handler_data->events);
+	p_free(ioloop->pool, ioloop->handler_data->fd_index);
+	p_free(ioloop->pool, ioloop->handler_data);
+}
+
+#define IO_EPOLL_INPUT	(EPOLLIN | EPOLLPRI | EPOLLERR | EPOLLHUP)
+#define IO_EPOLL_OUTPUT	(EPOLLOUT | EPOLLERR | EPOLLHUP)
+
+static int epoll_event_mask(struct io_list *list)
+{
+	int events = 0, i;
+	struct io *io;
+
+	for (i = 0; i < list->count; i++) {
+		io = list->ios[i];
+
+		if (!io)
+			continue;
+
+		if (io->condition & IO_READ)
+			events |= IO_EPOLL_INPUT;
+		if (io->condition & IO_WRITE)
+			events |= IO_EPOLL_OUTPUT;
+	}
+
+	return events;
+}
+
+static void iolist_add(struct io_list *list, struct io *io)
+{
+	int i;
+
+	i_assert(list->count < EPOLL_IOS_PER_FD);
+
+	for (i = 0; i < EPOLL_IOS_PER_FD; i++) {
+		if (!list->ios[i]) {
+			list->ios[i] = io;
+			break;
+		}
+	}
+
+	list->count++;
+}
+
+static void iolist_del(struct io_list *list, struct io *io)
+{
+	int i;
+
+	for (i = 0; i < EPOLL_IOS_PER_FD; i++) {
+		if (list->ios[i] == io) {
+			list->ios[i] = NULL;
+			break;
+		}
+	}
+
+	list->count--;
+}
+
+void io_loop_handle_add(struct ioloop *ioloop, struct io *io)
+{
+	struct ioloop_handler_data *data = ioloop->handler_data;
+	int ret, op, fd = io->fd;
+	struct io_list *list = data->fd_index[fd];
+	struct epoll_event event;
+
+	if (!list) {
+		if ((unsigned int) fd >= data->idx_size) {
+                	/* grow the fd -> iolist array */
+			unsigned int old_size = data->idx_size;
+
+			data->idx_size = nearest_power((unsigned int) fd+1);
+
+			i_assert(data->idx_size < (size_t)-1 / sizeof(int));
+
+			data->fd_index = p_realloc(ioloop->pool, data->fd_index,
+						   sizeof(int) * old_size,
+						   sizeof(int) * data->idx_size);
+		}
+
+		data->fd_index[fd] = list =
+			p_new(ioloop->pool, struct io_list, 1);
+	}
+
+	iolist_add(list, io);
+
+	event.data.ptr = list;
+	event.events = epoll_event_mask(list);
+
+	op = (list->count == 1) ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
+
+	ret = epoll_ctl(data->epfd, op, fd, &event);
+	if (ret < 0)
+		i_warning("epoll_ctl() : %m");
+
+	if (data->events_pos >= data->events_size) {
+		data->events_size = nearest_power(data->events_size + 1);
+
+		p_free(ioloop->pool, data->events);
+		data->events = p_new(ioloop->pool, struct epoll_event,
+				     data->events_size);
+	}
+
+	data->events_pos++;
+}
+
+void io_loop_handle_remove(struct ioloop *ioloop, struct io *io)
+{
+	struct ioloop_handler_data *data = ioloop->handler_data;
+	struct io_list *list = data->fd_index[io->fd];
+	struct epoll_event event;
+	int ret, op;
+
+	i_assert(list);
+	i_assert(list->count > 0);
+
+	iolist_del(list, io);
+
+	event.data.ptr = list;
+	event.events = epoll_event_mask(list);
+
+	op = (list->count == 0) ? EPOLL_CTL_DEL : EPOLL_CTL_MOD;
+
+	ret = epoll_ctl(data->epfd, op, io->fd, &event);
+	if ((ret < 0) && (errno != EBADF))
+		i_warning("epoll_ctl() : %m");
+
+	data->events_pos--;
+}
+
+#define IO_RW	(IO_READ | IO_WRITE)
+
+void io_loop_handler_run(struct ioloop *ioloop)
+{
+	struct ioloop_handler_data *data = ioloop->handler_data;
+	struct epoll_event *event;
+	struct io_list *list;
+	struct io *io;
+	struct timeval tv;
+	unsigned int t_id;
+	int msecs, ret, i, call;
+
+        /* get the time left for next timeout task */
+	msecs = io_loop_get_wait_time(ioloop->timeouts, &tv, NULL);
+
+	ret = epoll_wait(data->epfd, data->events, data->events_size, msecs);
+	if (ret < 0 && errno != EINTR)
+		i_warning("epoll_wait() : %m");
+
+	/* execute timeout handlers */
+        io_loop_handle_timeouts(ioloop);
+
+	if (ret <= 0 || !ioloop->running) {
+		/* No events */
+		return;
+	}
+
+	event = data->events;
+	while (ret--) {
+		list = event->data.ptr;
+
+		for (i = 0; i < list->count; i++) {
+			call = FALSE;
+			io = list->ios[i];
+
+			if (event->events & (EPOLLHUP | EPOLLERR)) {
+				/* FIXME: is this Right Thing to do ? */
+				call = TRUE;
+			} else if ((io->condition & IO_RW) == IO_RW) {
+				call = TRUE;
+			} else if (io->condition & IO_READ) {
+				call = event->events & EPOLLIN;
+			} else if (io->condition & IO_WRITE) {
+				call = event->events & EPOLLOUT;
+			}
+
+			if (call) {
+				t_id = t_push();
+				io->callback(io->context);
+				if (t_pop() != t_id)
+					i_panic("Leaked a t_pop() call!");
+			}
+		}
+		event++;
+	}
+}
+
+#endif	/* IOLOOP_EPOLL */
diff -urpNX /usr/share/dontdiff dovecot-cvs.vanilla/src/lib/Makefile.am dovecot-cvs/src/lib/Makefile.am
--- dovecot-cvs.vanilla/src/lib/Makefile.am	2004-08-25 11:11:12.000000000 +0400
+++ dovecot-cvs/src/lib/Makefile.am	2004-08-25 19:35:19.000000000 +0400
@@ -30,6 +30,7 @@ liblib_a_SOURCES = \
 	ioloop-notify-dn.c \
 	ioloop-poll.c \
 	ioloop-select.c \
+	ioloop-epoll.c \
 	lib.c \
 	lib-signals.c \
 	md4.c \
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://dovecot.org/pipermail/dovecot/attachments/20040830/2f1a85a4/attachment-0001.bin>


More information about the dovecot mailing list