/* * IRC - Internet Relay Chat, ircd/engine_epoll.c * Copyright (C) 2003 Michael Poole * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 1, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /** @file * @brief Linux epoll_*() event engine. * @version $Id: engine_epoll.c 1933 2010-01-04 03:24:36Z entrope $ */ #include "config.h" #include "ircd.h" #include "ircd_events.h" #include "ircd_alloc.h" #include "ircd_features.h" #include "ircd_log.h" #include "s_debug.h" /* #include -- Now using assert in ircd_log.h */ #include #include #ifdef HAVE_STDINT_H #include /* bah */ #endif #include #include #include #include #include /* The GNU C library may have a valid header but stub implementations * of the epoll system calls. If so, provide our own. */ #if defined(__stub_epoll_create) || defined(__stub___epoll_create) || defined(EPOLL_NEED_BODY) /* Oh, did we mention that some glibc releases do not even define the * syscall numbers? */ #if !defined(__NR_epoll_create) #if defined(__ia64__) #define __NR_epoll_create 1243 #define __NR_epoll_ctl 1244 #define __NR_epoll_wait 1245 #elif defined(__x86_64__) #define __NR_epoll_create 214 #define __NR_epoll_ctl 233 #define __NR_epoll_wait 232 #elif defined(__sparc64__) || defined(__sparc__) #define __NR_epoll_create 193 #define __NR_epoll_ctl 194 #define __NR_epoll_wait 195 #elif defined(__s390__) || defined(__m68k__) #define __NR_epoll_create 249 #define __NR_epoll_ctl 250 #define __NR_epoll_wait 251 #elif defined(__ppc64__) || defined(__ppc__) #define __NR_epoll_create 236 #define __NR_epoll_ctl 237 #define __NR_epoll_wait 238 #elif defined(__parisc__) || defined(__arm26__) || defined(__arm__) #define __NR_epoll_create 224 #define __NR_epoll_ctl 225 #define __NR_epoll_wait 226 #elif defined(__alpha__) #define __NR_epoll_create 407 #define __NR_epoll_ctl 408 #define __NR_epoll_wait 409 #elif defined(__sh64__) #define __NR_epoll_create 282 #define __NR_epoll_ctl 283 #define __NR_epoll_wait 284 #elif defined(__i386__) || defined(__sh__) || defined(__m32r__) || defined(__h8300__) || defined(__frv__) #define __NR_epoll_create 254 #define __NR_epoll_ctl 255 #define __NR_epoll_wait 256 #else /* cpu types */ #error No system call numbers defined for epoll family. #endif /* cpu types */ #endif /* !defined(__NR_epoll_create) */ _syscall1(int, epoll_create, int, size) _syscall4(int, epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event *, event) _syscall4(int, epoll_wait, int, epfd, struct epoll_event *, pevents, int, maxevents, int, timeout) #endif /* epoll_create defined as stub */ #define EPOLL_ERROR_THRESHOLD 20 /**< after 20 epoll errors, restart */ #define ERROR_EXPIRE_TIME 3600 /**< expire errors after an hour */ /** File descriptor for epoll pseudo-file. */ static int epoll_fd; /** Number of recent epoll errors. */ static int errors; /** Periodic timer to forget errors. */ static struct Timer clear_error; /** Current array of event descriptors. */ static struct epoll_event *events; /** Number of ::events elements that have been populated. */ static int events_used; /** Decrement the error count (once per hour). * @param[in] ev Expired timer event (ignored). */ static void error_clear(struct Event *ev) { if (!--errors) timer_del(ev_timer(ev)); } /** Initialize the epoll engine. * @param[in] max_sockets Maximum number of file descriptors to support. * @return Non-zero on success, or zero on failure. */ static int engine_init(int max_sockets) { if ((epoll_fd = epoll_create(max_sockets)) < 0) { log_write(LS_SYSTEM, L_WARNING, 0, "epoll() engine cannot initialize: %m"); return 0; } return 1; } /** Set events for a particular socket. * @param[in] sock Socket to calculate events for. * @param[in] state Current socket state. * @param[in] events User-specified event interest list. * @param[out] evt epoll event structure for socket. */ static void set_events(struct Socket *sock, enum SocketState state, unsigned int events, struct epoll_event *evt) { assert(0 != sock); assert(0 <= s_fd(sock)); memset(evt, 0, sizeof(*evt)); evt->data.ptr = sock; switch (state) { case SS_CONNECTING: evt->events = EPOLLOUT; break; case SS_LISTENING: case SS_NOTSOCK: evt->events = EPOLLIN; break; case SS_CONNECTED: case SS_DATAGRAM: case SS_CONNECTDG: switch (events & SOCK_EVENT_MASK) { case 0: evt->events = 0; break; case SOCK_EVENT_READABLE: evt->events = EPOLLIN; break; case SOCK_EVENT_WRITABLE: evt->events = EPOLLOUT; break; case SOCK_EVENT_READABLE|SOCK_EVENT_WRITABLE: evt->events = EPOLLIN|EPOLLOUT; break; } break; } } /** Add a socket to the event engine. * @param[in] sock Socket to add to engine. * @return Non-zero on success, or zero on error. */ static int engine_add(struct Socket *sock) { struct epoll_event evt; assert(0 != sock); Debug((DEBUG_ENGINE, "epoll: Adding socket %d [%p], state %s, to engine", s_fd(sock), sock, state_to_name(s_state(sock)))); set_events(sock, s_state(sock), s_events(sock), &evt); if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, s_fd(sock), &evt) < 0) { event_generate(ET_ERROR, sock, errno); return 0; } return 1; } /** Handle state transition for a socket. * @param[in] sock Socket changing state. * @param[in] new_state New state for socket. */ static void engine_set_state(struct Socket *sock, enum SocketState new_state) { struct epoll_event evt; assert(0 != sock); Debug((DEBUG_ENGINE, "epoll: Changing state for socket %p to %s", sock, state_to_name(new_state))); set_events(sock, new_state, s_events(sock), &evt); if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, s_fd(sock), &evt) < 0) event_generate(ET_ERROR, sock, errno); } /** Handle change to preferred socket events. * @param[in] sock Socket getting new interest list. * @param[in] new_events New set of interesting events for socket. */ static void engine_set_events(struct Socket *sock, unsigned new_events) { struct epoll_event evt; assert(0 != sock); Debug((DEBUG_ENGINE, "epoll: Changing event mask for socket %p to [%s]", sock, sock_flags(new_events))); set_events(sock, s_state(sock), new_events, &evt); if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, s_fd(sock), &evt) < 0) event_generate(ET_ERROR, sock, errno); } /** Remove a socket from the event engine. * @param[in] sock Socket being destroyed. */ static void engine_delete(struct Socket *sock) { int ii; assert(0 != sock); Debug((DEBUG_ENGINE, "epoll: Deleting socket %d [%p], state %s", s_fd(sock), sock, state_to_name(s_state(sock)))); /* Drop any unprocessed events citing this socket. */ for (ii = 0; ii < events_used; ii++) { if (events[ii].data.ptr == sock) { events[ii] = events[--events_used]; } } } /** Run engine event loop. * @param[in] gen Lists of generators of various types. */ static void engine_loop(struct Generators *gen) { struct epoll_event *evt; struct Socket *sock; socklen_t codesize; int events_count, tmp, wait, errcode; if ((events_count = feature_int(FEAT_POLLS_PER_LOOP)) < 20) events_count = 20; events = MyMalloc(sizeof(events[0]) * events_count); while (running) { if ((tmp = feature_int(FEAT_POLLS_PER_LOOP)) >= 20 && tmp != events_count) { events = MyRealloc(events, sizeof(events[0]) * tmp); events_count = tmp; } wait = timer_next(gen) ? (timer_next(gen) - CurrentTime) * 1000 : -1; Debug((DEBUG_ENGINE, "epoll: delay: %d (%d) %d", timer_next(gen), CurrentTime, wait)); events_used = epoll_wait(epoll_fd, events, events_count, wait); CurrentTime = time(0); if (events_used < 0) { if (errno != EINTR) { log_write(LS_SOCKET, L_ERROR, 0, "epoll() error: %m"); if (!errors++) timer_add(timer_init(&clear_error), error_clear, 0, TT_PERIODIC, ERROR_EXPIRE_TIME); else if (errors > EPOLL_ERROR_THRESHOLD) server_restart("too many epoll errors"); } continue; } while (events_used > 0) { evt = &events[--events_used]; if (!(sock = evt->data.ptr)) continue; gen_ref_inc(sock); Debug((DEBUG_ENGINE, "epoll: Checking socket %p (fd %d) state %s, events %s", sock, s_fd(sock), state_to_name(s_state(sock)), sock_flags(s_events(sock)))); if (evt->events & EPOLLERR) { errcode = 0; codesize = sizeof(errcode); if (getsockopt(s_fd(sock), SOL_SOCKET, SO_ERROR, &errcode, &codesize) < 0) errcode = errno; if (errcode) { event_generate(ET_ERROR, sock, errcode); gen_ref_dec(sock); continue; } } else if (evt->events & EPOLLHUP) { event_generate(ET_EOF, sock, 0); } else switch (s_state(sock)) { case SS_CONNECTING: if (evt->events & EPOLLOUT) /* connection completed */ event_generate(ET_CONNECT, sock, 0); break; case SS_LISTENING: if (evt->events & EPOLLIN) /* incoming connection */ event_generate(ET_ACCEPT, sock, 0); break; case SS_NOTSOCK: case SS_CONNECTED: case SS_DATAGRAM: case SS_CONNECTDG: if (evt->events & EPOLLIN) event_generate(ET_READ, sock, 0); if (evt->events & EPOLLOUT) event_generate(ET_WRITE, sock, 0); break; } gen_ref_dec(sock); } timer_run(); } MyFree(events); } /** Descriptor for epoll event engine. */ struct Engine engine_epoll = { "epoll()", engine_init, 0, engine_add, engine_set_state, engine_set_events, engine_delete, engine_loop };