[1/2] testbot/testagentd: Add a RemoveChildProc RPC.

Francois Gouget fgouget at codeweavers.com
Mon Jul 21 16:24:00 CDT 2014


The Wait RPC no longer causes the server to forget about the child process. That reponsibility is transferred to the RemoveChildProc RPC. This is to solve the case where the network connection breaks at just the time when the child process terminates which could cause the server to forget the child process exit status despite it not having reached the client process.
---

This avoids a race condition that occurs when the child process exits so 
that the server retrieves its exit code and sends it to the client, but 
the network connection breaks before the client receives this data. In 
such a case the server would still forget the child process exit code so 
that when the client reconnects and waits for it again it would get a 
'process does not exist or is not a child process' error.

The new code puts the client in charge of determining when it no longer 
needs the server to keep track of a given client process.

Note that the TestBot only needs the Wait RPC to not remove child 
processes. So the TestBot does not actually bother using use the new RPC 
because the testagentd server gets wiped clean by the VM revert as soon 
as the child process has exited.

 testbot/lib/WineTestBot/TestAgent.pm      | 26 ++++++++++++++++++++++++++
 testbot/scripts/TestAgent                 | 12 ++++++++++--
 testbot/src/testagentd/platform.h         |  8 ++++++++
 testbot/src/testagentd/platform_unix.c    | 17 +++++++++++++++++
 testbot/src/testagentd/platform_windows.c | 27 ++++++++++++++++++++++++---
 testbot/src/testagentd/testagentd.c       | 25 ++++++++++++++++++++++++-
 6 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/testbot/lib/WineTestBot/TestAgent.pm b/testbot/lib/WineTestBot/TestAgent.pm
index 8b5a4c9..2aa8373 100644
--- a/testbot/lib/WineTestBot/TestAgent.pm
+++ b/testbot/lib/WineTestBot/TestAgent.pm
@@ -39,6 +39,7 @@ my $RPC_WAIT2 = 6;
 my $RPC_SETTIME = 7;
 my $RPC_GETPROPERTIES = 8;
 my $RPC_UPGRADE = 9;
+my $RPC_RMCHILDPROC = 10;
 
 my %RpcNames=(
     $RPC_PING => 'ping',
@@ -51,6 +52,7 @@ my %RpcNames=(
     $RPC_SETTIME => 'settime',
     $RPC_GETPROPERTIES => 'getproperties',
     $RPC_UPGRADE => 'upgrade',
+    $RPC_RMCHILDPROC => 'rmchildproc',
 );
 
 my $Debug = 0;
@@ -1374,4 +1376,28 @@ sub Upgrade($$)
   return $rc;
 }
 
+sub RemoveChildProcess($$)
+{
+  my ($self, $Pid) = @_;
+  debug("RmChildProcess $Pid\n");
+
+  # Make sure we have the server version
+  return undef if (!$self->{agentversion} and !$self->_Connect());
+
+  # Up to 1.5 a seemingly successful Wait RPC automatically removes child
+  # processes.
+  return 1 if ($self->{agentversion} =~ / 1\.[0-5]$/);
+
+  # Send the command
+  if (!$self->_StartRPC($RPC_RMCHILDPROC) or
+      !$self->_SendListSize('ArgC', 1) or
+      !$self->_SendUInt64('Pid', $Pid))
+  {
+      return undef;
+  }
+
+  # Get the reply
+  return $self->_RecvList('');
+}
+
 1;
diff --git a/testbot/scripts/TestAgent b/testbot/scripts/TestAgent
index ac2c914..ec4504f 100755
--- a/testbot/scripts/TestAgent
+++ b/testbot/scripts/TestAgent
@@ -376,14 +376,22 @@ elsif ($Cmd eq "run")
         if (!($RunFlags & $TestAgent::RUN_DNT))
         {
             $Result = $TA->Wait($Pid, $Timeout, $Keepalive);
-            print "Child exit status: $Result\n" if (defined $Result);
+            if (defined $Result)
+            {
+                print "Child exit status: $Result\n";
+                $TA->RemoveChildProcess($Pid);
+            }
         }
     }
 }
 elsif ($Cmd eq "wait")
 {
     $Result = $TA->Wait($WaitPid, $Timeout, $Keepalive);
-    print "Child exit status: $Result\n" if (defined $Result);
+    if (defined $Result)
+    {
+        print "Child exit status: $Result\n";
+        $TA->RemoveChildProcess($WaitPid);
+    }
 }
 elsif ($Cmd eq "rm")
 {
diff --git a/testbot/src/testagentd/platform.h b/testbot/src/testagentd/platform.h
index c6c0f2e..46e4d37 100644
--- a/testbot/src/testagentd/platform.h
+++ b/testbot/src/testagentd/platform.h
@@ -74,11 +74,19 @@ uint64_t platform_run(char** argv, uint32_t flags, char** redirects);
 /* If a command was started in the background, waits until either that command
  * terminates, the specified timeout (in seconds) expires, or the client
  * disconnects (typically because it got tired of waiting).
+ * Note that this does not cause the child process to be forgotten, even if it
+ * did exit. This is so that the client can retrieve the information again if
+ * needed (e.g. in case it did not receive it due to a network issue).
  * If no command was started in the background, then reports an error
  * immediately.
  */
 int platform_wait(SOCKET client, uint64_t pid, uint32_t timeout, uint32_t *childstatus);
 
+/* Causes the given child process to be forgotten, which means it will no longer
+ * be possible to wait for it or retrieve its exit status.
+ */
+int platform_rmchildproc(SOCKET client, uint64_t pid);
+
 /* Sets the system time to the specified Unix epoch. If the system time is
  * already within leeway seconds of the specified time, then consider that
  * the system clock is already correct.
diff --git a/testbot/src/testagentd/platform_unix.c b/testbot/src/testagentd/platform_unix.c
index 75afc81..b27c016 100644
--- a/testbot/src/testagentd/platform_unix.c
+++ b/testbot/src/testagentd/platform_unix.c
@@ -194,6 +194,23 @@ int platform_wait(SOCKET client, uint64_t pid, uint32_t timeout, uint32_t *child
     }
     debug("process " U64FMT " returned status %u\n", pid, child->status);
     *childstatus = child->status;
+    return 1;
+}
+
+int platform_rmchildproc(SOCKET client, uint64_t pid)
+{
+    struct child_t *child;
+
+    LIST_FOR_EACH_ENTRY(child, &children, struct child_t, entry)
+    {
+        if (child->pid == pid)
+            break;
+    }
+    if (!child || child->pid != pid)
+    {
+        set_status(ST_ERROR, "the " U64FMT " process does not exist or is not a child process", pid);
+        return 0;
+    }
     list_remove(&child->entry);
     free(child);
     return 1;
diff --git a/testbot/src/testagentd/platform_windows.c b/testbot/src/testagentd/platform_windows.c
index dd8da8e..8e00f09 100644
--- a/testbot/src/testagentd/platform_windows.c
+++ b/testbot/src/testagentd/platform_windows.c
@@ -204,9 +204,9 @@ int platform_wait(SOCKET client, uint64_t pid, uint32_t timeout, uint32_t *child
         debug("WaitForMultipleObjects() returned %lu (le=%lu). Giving up!\n", r, GetLastError());
         break;
     }
-    CloseHandle(child->handle);
-    list_remove(&child->entry);
-    free(child);
+    /* Don't close child->handle so we can retrieve the exit status again if
+     * needed.
+     */
 
  cleanup:
     /* We must reset WSAEventSelect before we can make
@@ -221,6 +221,27 @@ int platform_wait(SOCKET client, uint64_t pid, uint32_t timeout, uint32_t *child
     return success;
 }
 
+int platform_rmchildproc(SOCKET client, uint64_t pid)
+{
+    struct child_t *child;
+
+    LIST_FOR_EACH_ENTRY(child, &children, struct child_t, entry)
+    {
+        if (child->pid == pid)
+            break;
+    }
+    if (!child || child->pid != pid)
+    {
+        set_status(ST_ERROR, "the " U64FMT " process does not exist or is not a child process", pid);
+        return 0;
+    }
+
+    CloseHandle(child->handle);
+    list_remove(&child->entry);
+    free(child);
+    return 1;
+}
+
 int platform_settime(uint64_t epoch, uint32_t leeway)
 {
     FILETIME filetime;
diff --git a/testbot/src/testagentd/testagentd.c b/testbot/src/testagentd/testagentd.c
index 292b1c7..dc181a6 100644
--- a/testbot/src/testagentd/testagentd.c
+++ b/testbot/src/testagentd/testagentd.c
@@ -37,8 +37,9 @@
  * 1.3:  Fix the zero / infinite timeouts in the wait2 RPC.
  * 1.4:  Add the settime RPC.
  * 1.5:  Add support for upgrading the server.
+ * 1.6:  Add support for the rmchildproc RPC.
  */
-#define PROTOCOL_VERSION "testagentd 1.5"
+#define PROTOCOL_VERSION "testagentd 1.6"
 
 #define BLOCK_SIZE       65536
 
@@ -88,6 +89,7 @@ enum rpc_ids_t
     RPCID_SETTIME,
     RPCID_GETPROPERTIES,
     RPCID_UPGRADE,
+    RPCID_RMCHILDPROC,
 };
 
 /* This is the RPC currently being processed */
@@ -108,6 +110,7 @@ static const char* rpc_name(uint32_t id)
         "settime",
         "getproperties",
         "upgrade",
+        "rmchildproc",
     };
 
     if (id < sizeof(names) / sizeof(*names))
@@ -837,6 +840,23 @@ static void do_wait2(SOCKET client)
         send_error(client);
 }
 
+static void do_rmchildproc(SOCKET client)
+{
+    uint64_t pid;
+
+    if (!expect_list_size(client, 1) ||
+        !recv_uint64(client, &pid))
+    {
+        send_error(client);
+        return;
+    }
+
+    if (platform_rmchildproc(client, pid))
+        send_list_size(client, 0);
+    else
+        send_error(client);
+}
+
 static void do_rm(SOCKET client)
 {
     int got_errors;
@@ -1090,6 +1110,9 @@ static void process_rpc(SOCKET client)
     case RPCID_UPGRADE:
         do_upgrade(client);
         break;
+    case RPCID_RMCHILDPROC:
+        do_rmchildproc(client);
+        break;
     default:
         do_unknown(client, rpcid);
     }
-- 
2.0.1




More information about the wine-patches mailing list