epoll patch - status?

Mike McCormack mike at codeweavers.com
Thu Sep 9 02:04:24 CDT 2004


Shachar Shemesh wrote:
>> * used direct syscalls as my libc doesn't have epoll_* functions
> 
> 
> Which Alexandre already asked not to. In any case, if you do, you have 
> to add a copyright notice to the authors of the header file who's 
> content you are copying.

OK, fixed.

>> * fixed array of epoll_event structures for starters
> 
> 
> I missed that. What did you mean by this?

I mean I've allocated an array of 10 struct epoll_events to receive 
events from epoll, and I plan to dynamically allocate it later.

>> * do_epoll_remove assert commented as it occasionally triggers
> 
> 
> Was that assert in my code? By looking at it, I see no reason for it NOT 
> to be triggered occasionally. Don't forget that the file descriptor may 
> have been closed prior to being removed from the list, in which case the 
> system removed it from the epoll fd itself.

I've tried to make it so that I only add FDs that are known not to be 
epoll'ed already, and remove FDs that are.  To verify that is correct, I 
am using asserts.

> If the file descriptor is already in there, EPOLL_CTL_ADD will fail. You 
> will then have to call EPOLL_CTL_MOD.

The patch from the previous mail always removed the FD before adding it 
instead of trying to modify the poll flags.

> I'm assuming you pulled the timeout code into a separate function here. 
> If there is anything fd related here, please let me know.

Correct.

> If you really have to, make that:
> #if POLLIN!=EPOLLIN || ....

Fixed in a different way.

> Read the comment in the appropriate section of my code. You have a race 
> here that will crash you occasionally.

Fixed.

>> +void main_loop(void)
>> +{
>> +#ifdef USE_EPOLL
>> +    if( epoll_fd >= 0 )
>> +    {
>> +        fprintf(stderr,"epoll enabled\n");
>> +        epoll_loop();
>> +        close( epoll_fd );
>> +        return;
>> +    }
>> +#endif
>> +    select_loop();
>>  
>>
> You are calling both epoll AND poll here.

No, there's a return in there, so only one of the main loops is entered.

> Like I said earlier, EPOLL_CTL_MOD does the same thing.

The new patch uses EPOLL_CTL_MOD.

> Well, the most serious problem with this code is the race you did not 
> handle. I don't like the use of a whole pollfd array, but that's not for 
> me to decide.

OK, I've fixed the race as I mentioned above.  I'm trying to use exactly 
the same data structures as the select() loop where possible so that 
it's easier to verify the epoll_loop works assuming that the 
select_loop() works.

Looking forward to comments on the attached patch.

Mike


Note:  libepoll is available here:
http://www.xmailserver.org/linux-patches/nio-improve.html
-------------- next part --------------
Index: server/fd.c
===================================================================
RCS file: /home/wine/wine/server/fd.c,v
retrieving revision 1.25
diff -u -r1.25 fd.c
--- server/fd.c	9 Sep 2004 00:28:36 -0000	1.25
+++ server/fd.c	9 Sep 2004 04:51:16 -0000
@@ -2,6 +2,8 @@
  * Server-side file descriptor management
  *
  * Copyright (C) 2000, 2003 Alexandre Julliard
+ * Copyright (C) 2004 Shachar Shemesh for Lingnu Open Source Consulting ltd.
+ * Copyright (C) 2004 Mike McCormack
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -33,6 +35,10 @@
 #ifdef HAVE_SYS_POLL_H
 #include <sys/poll.h>
 #endif
+#include <stdint.h>
+#ifdef HAVE_SYS_EPOLL_H
+#include <sys/epoll.h>
+#endif
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/types.h>
@@ -236,6 +242,106 @@
 static int allocated_users;                 /* count of allocated entries in the array */
 static struct fd **freelist;                /* list of free entries in the array */
 
+#ifdef HAVE_SYS_EPOLL_H
+
+#define EPOLL_SIZE 1024
+#define MAX_EVENTS 10
+
+static int epoll_fd;
+struct epoll_event *epoll_ev_first, *epoll_ev_last;
+
+/* this should optimize away if the poll constants are the same as the epoll ones */
+static inline int epoll_events( int events )
+{
+    int r = 0;
+
+    if( (POLLIN == EPOLLIN) && (POLLOUT == EPOLLOUT) &&
+        (POLLERR == EPOLLERR) && (POLLHUP == EPOLLHUP) )
+        return events;
+    if( POLLIN&events ) r |= EPOLLIN;
+    if( POLLOUT&events ) r |= EPOLLOUT;
+    if( POLLERR&events ) r |= EPOLLERR;
+    if( POLLHUP&events ) r |= EPOLLHUP;
+    return r;
+}
+
+static inline void do_epoll_add( int fd, int events, unsigned int user )
+{
+    struct epoll_event eev;
+    int r;
+
+    if (epoll_fd < 0)
+        return;
+    if (fd < 0)
+        return;
+    if (!events)
+        return;
+    eev.events = events;
+    eev.data.u32 = user;
+    r = epoll_ctl( epoll_fd, EPOLL_CTL_ADD, fd, &eev );
+    assert( 0 == r );
+}
+
+static inline void do_epoll_mod( int fd, int events, unsigned int user )
+{
+    struct epoll_event eev;
+    int r;
+
+    if (epoll_fd < 0)
+        return;
+    if (fd < 0)
+        return;
+    eev.events = events;
+    eev.data.u32 = user;
+    r = epoll_ctl( epoll_fd, EPOLL_CTL_MOD, fd, &eev );
+    assert( 0 == r );
+}
+
+static inline void do_epoll_remove( int fd, int old_events, unsigned int user )
+{
+    struct epoll_event eev, *i;
+    int r;
+
+    if (epoll_fd < 0)
+        return;
+    if (fd < 0)
+        return;
+    if (!old_events)
+        return;
+    r = epoll_ctl( epoll_fd, EPOLL_CTL_DEL, fd, &eev );
+    assert( 0 == r );
+
+    /* remove any unprocessed events for this fd */
+    for( i=epoll_ev_first; i<epoll_ev_last; i++ )
+        if (i->data.u32 == user)
+            i->events = 0;
+}
+
+void init_fd()
+{
+    epoll_fd = epoll_create(EPOLL_SIZE);
+}
+
+#else
+
+static inline void do_epoll_add( int fd, int events, unsigned int user )
+{
+}
+
+static inline void do_epoll_mod( int fd, int events, unsigned int user )
+{
+}
+
+static inline void do_epoll_remove( int fd, int old_events, unsigned int user )
+{
+}
+
+void init_fd()
+{
+}
+
+#endif
+
 /* add a user in the poll array and return its index, or -1 on failure */
 static int add_poll_user( struct fd *fd )
 {
@@ -280,6 +386,7 @@
 {
     assert( user >= 0 );
     assert( poll_users[user] == fd );
+    do_epoll_remove( pollfd[user].fd, pollfd[user].events, user );
     pollfd[user].fd = -1;
     pollfd[user].events = 0;
     pollfd[user].revents = 0;
@@ -288,56 +395,95 @@
     active_users--;
 }
 
-
-/* server main poll() loop */
-void main_loop(void)
+static inline int next_delta()
 {
-    int ret;
+    long diff = -1;
+    struct list expired_list, *ptr;
+    struct timeval now;
 
-    while (active_users)
+    if (list_empty(&timeout_list))
+        return diff;
+
+    gettimeofday( &now, NULL );
+
+    /* first remove all expired timers from the list */
+
+    list_init( &expired_list );
+    while ((ptr = list_head( &timeout_list )) != NULL)
     {
-        long diff = -1;
-        if (!list_empty( &timeout_list ))
+        struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
+
+        if (!time_before( &now, &timeout->when ))
         {
-            struct list expired_list, *ptr;
-            struct timeval now;
-            gettimeofday( &now, NULL );
+            list_remove( &timeout->entry );
+            list_add_tail( &expired_list, &timeout->entry );
+        }
+        else break;
+    }
 
-            /* first remove all expired timers from the list */
+    /* now call the callback for all the removed timers */
 
-            list_init( &expired_list );
-            while ((ptr = list_head( &timeout_list )) != NULL)
-            {
-                struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
+    while ((ptr = list_head( &expired_list )) != NULL)
+    {
+        struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
+        list_remove( &timeout->entry );
+        timeout->callback( timeout->private );
+        free( timeout );
+    }
 
-                if (!time_before( &now, &timeout->when ))
-                {
-                    list_remove( &timeout->entry );
-                    list_add_tail( &expired_list, &timeout->entry );
-                }
-                else break;
-            }
+    if (!active_users) return diff;  /* last user removed by a timeout */
 
-            /* now call the callback for all the removed timers */
+    if ((ptr = list_head( &timeout_list )) != NULL)
+    {
+        struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
+        diff = (timeout->when.tv_sec - now.tv_sec) * 1000
+             + (timeout->when.tv_usec - now.tv_usec) / 1000;
+        if (diff < 0) diff = 0;
+    }
 
-            while ((ptr = list_head( &expired_list )) != NULL)
-            {
-                struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
-                list_remove( &timeout->entry );
-                timeout->callback( timeout->private );
-                free( timeout );
-            }
+    return diff;
+}
 
-            if (!active_users) break;  /* last user removed by a timeout */
+#ifdef HAVE_SYS_EPOLL_H
 
-            if ((ptr = list_head( &timeout_list )) != NULL)
+/* server main poll() loop using epoll */
+static void epoll_loop(void)
+{
+    struct epoll_event ev[MAX_EVENTS];
+
+    while (active_users)
+    {
+        int ret, i, user, diff = next_delta();
+        if (!active_users) break;  /* last user removed by a timeout */
+
+        ret = epoll_wait( epoll_fd, &ev[0], MAX_EVENTS, diff );
+        for (i = 0; i < ret; i++)
+        {
+            user = ev[i].data.u32;
+            assert( user < nb_users );
+            epoll_ev_last = &ev[ret];
+            if (ev[i].events)
             {
-                struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
-                diff = (timeout->when.tv_sec - now.tv_sec) * 1000
-                     + (timeout->when.tv_usec - now.tv_usec) / 1000;
-                if (diff < 0) diff = 0;
+                epoll_ev_first = &ev[i+1];
+                pollfd[user].revents = ev[i].events;
+                fd_poll_event( poll_users[user], ev[i].events );
             }
         }
+        epoll_ev_first = NULL;
+        epoll_ev_last = NULL;
+    }
+}
+#endif
+
+/* server main poll() loop using select */
+static void select_loop(void)
+{
+    int ret;
+
+    while (active_users)
+    {
+        int diff = next_delta();
+        if (!active_users) break;  /* last user removed by a timeout */
 
         ret = poll( pollfd, nb_users, diff );
         if (ret > 0)
@@ -355,6 +501,18 @@
     }
 }
 
+void main_loop(void)
+{
+#ifdef HAVE_SYS_EPOLL_H
+    if( epoll_fd >= 0 )
+    {
+        epoll_loop();
+        close( epoll_fd );
+        return;
+    }
+#endif
+    select_loop();
+}
 
 /****************************************************************/
 /* inode functions */
@@ -837,12 +995,24 @@
     assert( poll_users[user] == fd );
     if (events == -1)  /* stop waiting on this fd completely */
     {
+        do_epoll_remove( pollfd[user].fd, pollfd[user].events, user );
         pollfd[user].fd = -1;
         pollfd[user].events = POLLERR;
         pollfd[user].revents = 0;
     }
     else if (pollfd[user].fd != -1 || !pollfd[user].events)
     {
+        /* remove the old fd */
+        if ( (fd->unix_fd != pollfd[user].fd) || !events )
+            do_epoll_remove( pollfd[user].fd, pollfd[user].events, user );
+        if (events)
+        {
+            /* add the new fd or update the old one */
+            if ( (pollfd[user].fd == -1) || !pollfd[user].events )
+                do_epoll_add( fd->unix_fd, events, user );
+            else if (pollfd[user].events != events)
+                do_epoll_mod( fd->unix_fd, events, user );
+        }
         pollfd[user].fd = fd->unix_fd;
         pollfd[user].events = events;
     }
Index: server/main.c
===================================================================
RCS file: /home/wine/wine/server/main.c,v
retrieving revision 1.31
diff -u -r1.31 main.c
--- server/main.c	26 Mar 2003 01:32:18 -0000	1.31
+++ server/main.c	9 Sep 2004 04:51:17 -0000
@@ -110,6 +110,7 @@
     exit(1);  /* make sure atexit functions get called */
 }
 
+void init_fd();
 int main( int argc, char *argv[] )
 {
     parse_args( argc, argv );
@@ -122,6 +123,7 @@
     signal( SIGTERM, sigterm_handler );
     signal( SIGABRT, sigterm_handler );
 
+    init_fd();
     sock_init();
     open_master_socket();
     sync_namespace = create_namespace( 37, TRUE );
Index: server/Makefile.in
===================================================================
RCS file: /home/wine/wine/server/Makefile.in,v
retrieving revision 1.51
diff -u -r1.51 Makefile.in
--- server/Makefile.in	23 Jun 2004 20:44:58 -0000	1.51
+++ server/Makefile.in	9 Sep 2004 04:51:17 -0000
@@ -5,6 +5,8 @@
 VPATH     = @srcdir@
 MODULE    = none
 
+LIBEPOLL  = @LIBEPOLL@
+
 C_SRCS = \
 	async.c \
 	atom.c \
@@ -52,7 +54,7 @@
 @MAKE_RULES@
 
 wineserver: $(OBJS)
-	$(CC) -o $(PROGRAMS) $(OBJS) $(LIBWINE) $(LIBUNICODE) $(LIBPORT) $(LDFLAGS) $(LIBS)
+	$(CC) -o $(PROGRAMS) $(OBJS) $(LIBWINE) $(LIBUNICODE) $(LIBPORT) $(LDFLAGS) $(LIBS) $(LIBEPOLL)
 
 install:: $(PROGRAMS)
 	$(MKINSTALLDIRS) $(bindir)
Index: configure.ac
===================================================================
RCS file: /home/wine/wine/configure.ac,v
retrieving revision 1.307
diff -u -r1.307 configure.ac
--- configure.ac	7 Sep 2004 22:44:34 -0000	1.307
+++ configure.ac	9 Sep 2004 04:51:17 -0000
@@ -132,6 +132,8 @@
 AC_CHECK_LIB(xpg4,_xpg4_setrunelocale)
 dnl Check for -lpoll for Mac OS X/Darwin
 AC_CHECK_LIB(poll,poll)
+dnl Check for -lepoll
+AC_CHECK_LIB(epoll,epoll_create,AC_SUBST(LIBEPOLL,-lepoll))
 dnl Check for -lresolv for Mac OS X/Darwin
 AC_CHECK_LIB(resolv,res_9_init)
 dnl Check for -lpthread
@@ -757,6 +759,7 @@
     EXTRACFLAGS="$EXTRACFLAGS -Wpointer-arith"
   fi
 fi
+EXTRACFLAGS="$EXTRACFLAGS -Wsign-compare"
 
 dnl **** Check how to define a function in assembly code ****
 
@@ -1046,6 +1049,7 @@
 	_vsnprintf \
 	chsize \
 	clone \
+	epoll_create \
 	finite \
 	fpclass \
 	fstatfs \
@@ -1159,6 +1163,7 @@
 	sys/msg.h \
 	sys/param.h \
 	sys/poll.h \
+	sys/epoll.h \
 	sys/ptrace.h \
 	sys/reg.h \
 	sys/scsiio.h \
@@ -1629,6 +1634,7 @@
 dlls/opengl32/Makefile
 dlls/psapi/Makefile
 dlls/psapi/tests/Makefile
+dlls/pstorec/Makefile
 dlls/qcap/Makefile
 dlls/quartz/Makefile
 dlls/quartz/tests/Makefile
Index: include/config.h.in
===================================================================
RCS file: /home/wine/wine/include/config.h.in,v
retrieving revision 1.195
diff -u -r1.195 config.h.in
--- include/config.h.in	7 Sep 2004 22:44:34 -0000	1.195
+++ include/config.h.in	9 Sep 2004 04:51:17 -0000
@@ -80,6 +80,9 @@
 /* Define to 1 if you have the <elf.h> header file. */
 #undef HAVE_ELF_H
 
+/* Define to 1 if you have the `epoll_create' function. */
+#undef HAVE_EPOLL_CREATE
+
 /* Define to 1 if you have the `finite' function. */
 #undef HAVE_FINITE
 
@@ -248,6 +251,9 @@
 /* Define if you have the curses library (-lcurses) */
 #undef HAVE_LIBCURSES
 
+/* Define to 1 if you have the `epoll' library (-lepoll). */
+#undef HAVE_LIBEPOLL
+
 /* Define to 1 if you have the `i386' library (-li386). */
 #undef HAVE_LIBI386
 
@@ -613,6 +619,9 @@
 
 /* Define to 1 if you have the <sys/elf32.h> header file. */
 #undef HAVE_SYS_ELF32_H
+
+/* Define to 1 if you have the <sys/epoll.h> header file. */
+#undef HAVE_SYS_EPOLL_H
 
 /* Define to 1 if you have the <sys/errno.h> header file. */
 #undef HAVE_SYS_ERRNO_H


More information about the wine-devel mailing list