[1/2] testbot/TestAgent: Add a keepalive parameter to Wait().

Francois Gouget fgouget at codeweavers.com
Tue Mar 11 05:46:24 CDT 2014


This makes it possible to detect when the remote end crashes, freezes or is no longer reachable due to a network outage.
Also increase the leeway added to account for network transmission delays: 1 second is just too short.
---

Here's a first pair of patches which is tangentially related to making 
WineTestBot resilient in case of a network outage.

So this patch adds the infrastructure to detect network outages early by 
ensuring we have regular communication with the VM while waiting for the 
test to complete. That's the keepalive mechanism.

By increasing the time we allocate for the TestAgent server to send back 
its response, it should also reduce occurrences of the confusing 
'network read timeout' error message.


 testbot/lib/WineTestBot/TestAgent.pm | 79 ++++++++++++++++++++++++++++--------
 testbot/scripts/TestAgent            | 18 +++++---
 2 files changed, 74 insertions(+), 23 deletions(-)

diff --git a/testbot/lib/WineTestBot/TestAgent.pm b/testbot/lib/WineTestBot/TestAgent.pm
index 299f63e..f313984 100644
--- a/testbot/lib/WineTestBot/TestAgent.pm
+++ b/testbot/lib/WineTestBot/TestAgent.pm
@@ -1100,40 +1100,83 @@ sub Run($$$;$$$)
   return $self->_RecvList('Q');
 }
 
-sub Wait($$$)
+=pod
+=over 12
+
+=item C<Wait()>
+
+Waits at most Timeout seconds for the specified remote process to terminate.
+The Keepalive specifies how often, in seconds, to check that the remote end
+is still alive and reachable.
+
+=back
+=cut
+
+sub Wait($$$;$)
 {
-  my ($self, $Pid, $Timeout) = @_;
-  debug("Wait $Pid, ", defined $Timeout ? $Timeout : "<undef>", "\n");
-  # Add 1 second for the reply to come back
-  my $OldTimeout = $self->SetTimeout($Timeout + 1) if ($Timeout);
+  my ($self, $Pid, $Timeout, $Keepalive) = @_;
+  debug("Wait $Pid, ", defined $Timeout ? $Timeout : "<undef>", ", ",
+        defined $Keepalive ? $Keepalive : "<undef>", "\n");
 
   # Make sure we have the server version
   return undef if (!$self->{agentversion} and !$self->_Connect());
 
-  # Send the command
+  my ($OldTimeout, $Result);
   if ($self->{agentversion} =~ / 1\.0$/)
   {
-      if (!$self->_StartRPC($RPC_WAIT) or
-          !$self->_SendListSize(1) or
-          !$self->_SendUInt64($Pid))
-      {
-          return undef;
-      }
+    # Add a 5 second leeway to take into account network transmission delays
+    $OldTimeout = $self->SetTimeout($Timeout + 5) if ($Timeout);
+
+    # Send the command
+    if (!$self->_StartRPC($RPC_WAIT) or
+        !$self->_SendListSize(1) or
+        !$self->_SendUInt64($Pid))
+    {
+      last;
+    }
+
+    # Get the reply
+    $Result = $self->_RecvList('I');
   }
   else
   {
+    $Keepalive ||= 0xffffffff;
+    $OldTimeout = $self->{timeout};
+
+    my $Deadline;
+    $Deadline = time() + $Timeout if ($Timeout);
+    while (1)
+    {
+      my $Remaining = $Keepalive;
+      if ($Deadline)
+      {
+        $Remaining = $Deadline - time();
+        last if ($Remaining < 0);
+        $Remaining = $Keepalive if ($Keepalive < $Remaining);
+      }
+      # Add a 5 second leeway to take into account network transmission delays
+      $self->SetTimeout($Remaining + 5);
+
+      # Send the command
       if (!$self->_StartRPC($RPC_WAIT2) or
           !$self->_SendListSize(2) or
           !$self->_SendUInt64($Pid) or
-          !$self->_SendUInt32(defined $Timeout ? $Timeout : 0xffffffff))
+          !$self->_SendUInt32($Remaining))
       {
-          return undef;
+        last;
       }
-  }
 
-  # Get the reply
-  my $Result = $self->_RecvList('I');
-  $self->SetTimeout($OldTimeout) if ($Timeout);
+      # Get the reply
+      $Result = $self->_RecvList('I');
+
+      # The process has quit
+      last if (defined $Result);
+
+      # Retry only if the timeout occurred on the remote end
+      last if ($self->{err} !~ /timed out waiting/);
+    }
+  }
+  $self->SetTimeout($OldTimeout);
   return $Result;
 }
 
diff --git a/testbot/scripts/TestAgent b/testbot/scripts/TestAgent
index 30e2fbc..4fa8f4f 100755
--- a/testbot/scripts/TestAgent
+++ b/testbot/scripts/TestAgent
@@ -3,7 +3,7 @@
 # This is a testagentd client. It can be used to exchange files or run
 # commands on the testagentd server, mostly for testing purposes.
 #
-# Copyright 2012 Francois Gouget
+# Copyright 2012-2014 Francois Gouget
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -56,7 +56,7 @@ my ($Cmd, $Hostname, $LocalFilename, $ServerFilename, @Rm);
 my (@Run, $RunIn, $RunOut, $RunErr, $WaitPid);
 my $SendFlags = 0;
 my $RunFlags = 0;
-my ($Port, $ConnectTimeout, $Timeout, $TunnelOpt);
+my ($Port, $ConnectTimeout, $Timeout, $Keepalive, $TunnelOpt);
 my $Usage;
 
 sub check_opt_val($$)
@@ -96,6 +96,10 @@ while (@ARGV)
     {
         $Timeout = check_opt_val($arg, $Timeout);
     }
+    elsif ($arg eq "--keepalive")
+    {
+        $Keepalive = check_opt_val($arg, $Keepalive);
+    }
     elsif ($arg eq "--tunnel")
     {
         $TunnelOpt = check_opt_val($arg, $TunnelOpt);
@@ -216,7 +220,7 @@ if (!defined $Usage)
         $Usage = 2;
     }
     elsif ($Cmd ne "run" and ($RunFlags or defined $RunIn or defined $RunOut or
-                           defined $RunErr))
+                           defined $RunErr or defined $Keepalive))
     {
         error("the --run-xxx options can only be used with the run command\n");
         $Usage = 2;
@@ -245,6 +249,7 @@ if (!defined $Usage)
         error("you must specify the server files to delete\n");
         $Usage = 2;
     }
+    $Keepalive = 60 if (!defined $Keepalive);
     $AgentPort = $Port if (defined $Port);
     if (!defined $AgentPort)
     {
@@ -301,6 +306,9 @@ if (defined $Usage)
     print "                connecting instead of the default one.\n";
     print "  --timeout <timeout> Use the specified timeout (in seconds) instead of the\n";
     print "                default one for the operation.\n";
+    print "  --keepalive <keepalive> How often (in seconds) the run and wait operations\n";
+    print "                should check that the remove end is still alive and reachable.\n";
+    print "                The default is 60 seconds.\n";
     print "  --tunnel <uri> Tunnel the connection through ssh. The SSH connection is\n";
     print "                specified in the form of an ssh:// URI.\n";
     print "  --help        Shows this usage message.\n";
@@ -341,14 +349,14 @@ elsif ($Cmd eq "run")
         print "Started process $Pid\n";
         if (!($RunFlags & $TestAgent::RUN_DNT))
         {
-            $Result = $TA->Wait($Pid, $Timeout);
+            $Result = $TA->Wait($Pid, $Timeout, $Keepalive);
             print "Child exit status: $Result\n" if (defined $Result);
         }
     }
 }
 elsif ($Cmd eq "wait")
 {
-    $Result = $TA->Wait($WaitPid, $Timeout);
+    $Result = $TA->Wait($WaitPid, $Timeout, $Keepalive);
     print "Child exit status: $Result\n" if (defined $Result);
 }
 elsif ($Cmd eq "rm")
-- 
1.8.5.3




More information about the wine-patches mailing list