[PATCH] testbot/TestAgent: Modify and document the connection timeout handling.
Francois Gouget
fgouget at codeweavers.com
Wed Sep 18 03:08:57 CDT 2019
The new model better handles the case where we need to wait for the VM
to boot before we can connect to the testagentd server.
Signed-off-by: Francois Gouget <fgouget at codeweavers.com>
---
testbot/bin/LibvirtTool.pl | 9 +-
testbot/lib/WineTestBot/Config.pm | 15 ++-
testbot/lib/WineTestBot/TestAgent.pm | 169 ++++++++++++++++++++++++---
testbot/lib/WineTestBot/VMs.pm | 8 +-
testbot/scripts/SetWinLocale | 2 +-
testbot/scripts/TestAgent | 30 +++--
6 files changed, 181 insertions(+), 52 deletions(-)
diff --git a/testbot/bin/LibvirtTool.pl b/testbot/bin/LibvirtTool.pl
index 5c3906503..f15bab70b 100755
--- a/testbot/bin/LibvirtTool.pl
+++ b/testbot/bin/LibvirtTool.pl
@@ -408,12 +408,11 @@ sub Revert()
# The VM is now sleeping which may allow some tasks to run
return 1 if (ChangeStatus("reverting", "sleeping"));
- # Check the TestAgent connection. Note that this may take some time
- # if the VM needs to boot first.
- Debug(Elapsed($Start), " Trying the TestAgent connection\n");
- LogMsg "Waiting for ". $VM->Name ." (up to ${WaitForToolsInVM}s per attempt)\n";
+ # Verify that the TestAgent server accepts connections
+ Debug(Elapsed($Start), " Verifying the TestAgent server\n");
+ LogMsg "Verifying the $VMKey TestAgent server\n";
my $TA = $VM->GetAgent();
- $TA->SetConnectTimeout($WaitForToolsInVM, undef, $WaitForToolsInVM);
+ $TA->SetConnectTimeout(undef, undef, $WaitForBoot);
my $Success = $TA->Ping();
$TA->Disconnect();
if (!$Success)
diff --git a/testbot/lib/WineTestBot/Config.pm b/testbot/lib/WineTestBot/Config.pm
index 3d9214bae..3496c310e 100644
--- a/testbot/lib/WineTestBot/Config.pm
+++ b/testbot/lib/WineTestBot/Config.pm
@@ -28,7 +28,7 @@ WineTestBot::Config - Site-independent configuration settings
use vars qw (@ISA @EXPORT @EXPORT_OK $UseSSL $LogDir $DataDir $BinDir
%RepoURLs $DbDataSource $DbUsername $DbPassword $MaxRevertingVMs
$MaxRevertsWhileRunningVMs $MaxActiveVMs $MaxRunningVMs
- $MaxVMsWhenIdle $SleepAfterRevert $WaitForToolsInVM
+ $MaxVMsWhenIdle $SleepAfterRevert $WaitForBoot
$VMToolTimeout $MaxVMErrors $MaxTaskTries $AdminEMail $RobotEMail
$WinePatchToOverride $WinePatchCc
$ExeBuildNativeTimeout $ExeBuildTestTimeout $ExeModuleTimeout
@@ -44,7 +44,7 @@ require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw($UseSSL $LogDir $DataDir $BinDir %RepoURLs
$MaxRevertingVMs $MaxRevertsWhileRunningVMs $MaxActiveVMs
- $MaxRunningVMs $MaxVMsWhenIdle $SleepAfterRevert $WaitForToolsInVM
+ $MaxRunningVMs $MaxVMsWhenIdle $SleepAfterRevert $WaitForBoot
$VMToolTimeout $MaxVMErrors $MaxTaskTries $AdminEMail
$RobotEMail $WinePatchToOverride $WinePatchCc $SuiteTimeout
$ExeBuildNativeTimeout $ExeBuildTestTimeout $ExeModuleTimeout
@@ -90,14 +90,13 @@ $MaxActiveVMs = 2;
$MaxRunningVMs = 1;
$MaxVMsWhenIdle = undef;
-# How long to wait for each of the 3 connection attempts to the VM's TestAgent
-# server after a revert (in seconds). If there are powered off snapshots this
-# must be long enough for the VM to boot up first.
-$WaitForToolsInVM = 30;
-# How long to let the VM settle down after the revert before starting a task on
+# How long to attempt to connect to the TestAgent while the VM is booting.
+$WaitForBoot = 90;
+# How long to let the VM settle down after a revert before starting a task on
# it (in seconds).
$SleepAfterRevert = 0;
-# Take into account $WaitForToolsInVM and $SleepAfterRevert
+# How long to wait before considering a VM operation is stuck. In particular
+# for reverts this should take into account the time it may need to boot.
$VMToolTimeout = 6 * 60;
# After three consecutive failures to revert a VM, put it in maintenance mode.
diff --git a/testbot/lib/WineTestBot/TestAgent.pm b/testbot/lib/WineTestBot/TestAgent.pm
index d3d7137a5..8d7f456fd 100644
--- a/testbot/lib/WineTestBot/TestAgent.pm
+++ b/testbot/lib/WineTestBot/TestAgent.pm
@@ -101,9 +101,9 @@ sub new($$$;$)
agenthost => $Hostname,
agentport => $Port,
connection => "$Hostname:$Port",
- ctimeout => 20,
- cattempts => 3,
- cinterval => 0,
+ conetimeout => 20,
+ cminattempts => 2,
+ cmintimeout => 10,
timeout => 0,
fd => undef,
deadline => undef,
@@ -140,28 +140,103 @@ sub Disconnect($)
$self->{agentversion} = undef;
}
+=pod
+=over 12
+
+=item C<SetConnectTimeout()>
+
+Configures how many times and for how long to attempt to connect to the server.
+
+=item OneTimeout
+
+OneTimeout specifies the timeout for one connection attempt in seconds.
+Zero means there will be no timeout. Undef means the value is not changed.
+
+=item MinAttempts
+
+MinAttempts specifies the minimum number of connection attempts. It must be a
+non-zero positive integer. Undef means the value is not changed.
+
+=item MinTimeout
+
+MinTimeout specifies the minimum period during which connection attempts should
+be made in seconds. Zero means there will be no minimum timeout. Undef means
+the value is not changed.
+
+This means connection attempts will be made until either one succeeds or
+MinTimeout seconds have elapsed, even if each attempt fails quickly such that
+more than $MinAttempts attempts are performed. Conversely, if each attempt
+takes a long time such that more MinTimeout is reached before MinAttempt
+connection attempts have been made, then attempts will continue until the
+minimum number of attempts is reached.
+
+Thus the worst case connection timeout is:
+
+ max($MinAttempts * $OneTimeout, $MinTimeout + $OneTimeout)
+
+=item Return value
+
+For each modified value (i.e. not undef), the old value is returned.
+
+=item Usage
+
+To perform a single connection attempt with a 20 second timeout:
+
+ $TA->SetConnectionTimeout(20, 1, 0);
+
+For a bit more robustness in case one expects short network disruptions:
+
+ $TA->SetConnectionTimeout(20, 2, 10);
+
+But if the server needs time to boot before accepting connections, then
+MinTimeout should be set to the amount of time this is expected to take. It
+would make sense to reset MinTimeout after the first RPC has completed since
+then a reconnection would not have to wait for the server to boot again:
+
+ my $OldMinTimeout = $TA->SetConnectionTimeout(undef, undef, 90);
+
+ ... first RPC ...
+
+ $TA->SetConnectionTimeout(undef, undef, $OldMinTimeout);
+
+=back
+=cut
+
sub SetConnectTimeout($$;$$)
{
- my ($self, $Timeout, $Attempts, $Interval) = @_;
+ my ($self, $OneTimeout, $MinAttempts, $MinTimeout) = @_;
my @Ret;
- if (defined $Timeout)
+ if (defined $OneTimeout)
{
- push @Ret, $self->{ctimeout};
- $self->{ctimeout} = $Timeout;
+ push @Ret, $self->{conetimeout};
+ $self->{conetimeout} = $OneTimeout;
}
- if (defined $Attempts)
+ if (defined $MinAttempts)
{
- push @Ret, $self->{cattempts};
- $self->{cattempts} = $Attempts;
+ push @Ret, $self->{cminattempts};
+ $self->{cminattempts} = $MinAttempts;
}
- if (defined $Interval)
+ if (defined $MinTimeout)
{
- push @Ret, $self->{cinterval};
- $self->{cinterval} = $Interval;
+ push @Ret, $self->{cmintimeout};
+ $self->{cmintimeout} = $MinTimeout;
}
return @Ret;
}
+=pod
+=over 12
+
+=item C<SetTimeout()>
+
+Configures how long an individual RPC can take to complete (in seconds).
+
+Note that some operations like Wait() and Wait2() involve multiple RPCs. These
+have their own timeouts.
+
+=back
+=cut
+
sub SetTimeout($$)
{
my ($self, $Timeout) = @_;
@@ -857,14 +932,16 @@ sub _Connect($)
my $OldRPC = $self->{rpc};
$self->{rpc} = ($self->{rpc} ? "$self->{rpc}/" : "") ."connect";
- my $Start = time();
- foreach my $Attempt (1..$self->{cattempts})
+ my $Attempt = 1;
+ my $MinDeadline = $self->{cmintimeout} ? time() + $self->{cmintimeout} : 0;
+ while (1)
{
my $Step = "initializing";
eval
{
local $SIG{ALRM} = sub { die "timeout" };
- $self->{deadline} = $self->{ctimeout} ? time() + $self->{ctimeout} : undef;
+ my $OneDeadline = $self->{conetimeout} ? time() + $self->{conetimeout} : 0;
+ $self->{deadline} = ($OneDeadline < $MinDeadline) ? $MinDeadline : $OneDeadline;
$self->_SetAlarm();
if ($self->{tunnel})
@@ -945,8 +1022,13 @@ sub _Connect($)
last;
}
- my $Remaining = $Attempt * $self->{cinterval} - (time() - $Start);
- sleep($Remaining) if ($Remaining > 0);
+ $Attempt++;
+ last if ($Attempt > $self->{cminattempts} and $MinDeadline <= time() + 1);
+
+ # Wait at least 1 second between attempts so that if the error happens on
+ #the client-side (e.g. in case of an invalid ssh argument) we don't busy
+ # loop for $MinTimeout seconds.
+ sleep(1);
}
$self->{rpc} = $OldRPC;
return undef;
@@ -1023,6 +1105,19 @@ sub _SendStringOrFile($$$$$$)
$self->_RecvList('');
}
+=pod
+=over 12
+
+=item C<SendFile()>
+
+Sends the $LocalPathName file and saves it as the $ServerPathName file on the
+server. If $Flags is set to $SENDFILE_EXE the file will be made executable.
+
+Note that the transfer must complete within the SetTimeout() limit.
+
+=back
+=cut
+
sub SendFile($$$;$)
{
my ($self, $LocalPathName, $ServerPathName, $Flags) = @_;
@@ -1039,6 +1134,19 @@ sub SendFile($$$;$)
return undef;
}
+=pod
+=over 12
+
+=item C<SendFileFromString()>
+
+Sends the $Data string and saves it as the $ServerPathName file on the
+server. If $Flags is set to $SENDFILE_EXE the file will be made executable.
+
+Note that the transfer must complete within the SetTimeout() limit.
+
+=back
+=cut
+
sub SendFileFromString($$$;$)
{
my ($self, $Data, $ServerPathName, $Flags) = @_;
@@ -1059,6 +1167,19 @@ sub _GetStringOrFile($$$)
$self->_RecvString('String', 'd'));
}
+=pod
+=over 12
+
+=item C<GetFile()>
+
+Retrieves the $ServerPathName file from the server and saves it as
+$LocalPathName.
+
+Note that the transfer must complete within the SetTimeout() limit.
+
+=back
+=cut
+
sub GetFile($$$)
{
my ($self, $ServerPathName, $LocalPathName) = @_;
@@ -1075,6 +1196,18 @@ sub GetFile($$$)
return undef;
}
+=pod
+=over 12
+
+=item C<GetFileToString()>
+
+Retrieves the $ServerPathName file from the server returns it as a string.
+
+Note that the transfer must complete within the SetTimeout() limit.
+
+=back
+=cut
+
sub GetFileToString($$)
{
my ($self, $ServerPathName) = @_;
diff --git a/testbot/lib/WineTestBot/VMs.pm b/testbot/lib/WineTestBot/VMs.pm
index 2bf350435..d85df1d08 100644
--- a/testbot/lib/WineTestBot/VMs.pm
+++ b/testbot/lib/WineTestBot/VMs.pm
@@ -584,10 +584,10 @@ sub RunPowerOff($)
Reverts the VM so that it is ready to run jobs.
-Note that in addition to the hypervisor revert operation this implies checking
-that it responds to our commands ($WaitForToolsInVM) and possibly letting the
-VM settle down ($SleepAfterRevert). If this operation fails the administrator
-is notified and the VM is marked as offline.
+Note that in addition to the hypervisor revert operation this may imply waiting
+for the VM to boot, checking that the TestAgent server is accessible, and
+possibly letting the VM settle down ($SleepAfterRevert). If this operation
+fails the administrator is notified and the VM is put offline.
This operation can take a long time so it is performed in a separate process.
diff --git a/testbot/scripts/SetWinLocale b/testbot/scripts/SetWinLocale
index 2690dbb1f..ca750b792 100755
--- a/testbot/scripts/SetWinLocale
+++ b/testbot/scripts/SetWinLocale
@@ -843,7 +843,7 @@ if ($OptRefresh)
# Allow up to 10 minutes for the shutdown plus reboot.
Debug(Elapsed($Start), " Waiting for Windows to boot\n");
- $TA->SetConnectTimeout(20, 30);
+ $TA->SetConnectTimeout(undef, undef, $WaitForBoot);
if (!$DryRun and !$TA->Ping())
{
FatalError("could not reconnect to $OptHostName after the reboot: ", $TA->GetLastError(), "\n");
diff --git a/testbot/scripts/TestAgent b/testbot/scripts/TestAgent
index 25fc0d426..b4ec582be 100755
--- a/testbot/scripts/TestAgent
+++ b/testbot/scripts/TestAgent
@@ -56,7 +56,7 @@ my ($Cmd, $Hostname, $LocalFilename, $ServerFilename, $PropName, $PropValue, @Rm
my (@Run, $RunIn, $RunOut, $RunErr, $WaitPid);
my $SendFlags = 0;
my $RunFlags = 0;
-my ($Port, $ConnectTimeout, $ConnectAttempts, $ConnectInterval, $Timeout);
+my ($Port, $ConnectOneTimeout, $ConnectMinAttempts, $ConnectMinTimeout, $Timeout);
my ($Keepalive, $TunnelOpt);
my $Usage;
@@ -100,17 +100,17 @@ while (@ARGV)
{
$Port = check_opt_val($arg, $Port);
}
- elsif ($arg eq "--connect-timeout")
+ elsif ($arg eq "--connect-one-timeout")
{
- $ConnectTimeout = check_opt_val($arg, $ConnectTimeout);
+ $ConnectOneTimeout = check_opt_val($arg, $ConnectOneTimeout);
}
- elsif ($arg eq "--connect-attempts")
+ elsif ($arg eq "--connect-min-attempts")
{
- $ConnectAttempts = check_opt_val($arg, $ConnectAttempts);
+ $ConnectMinAttempts = check_opt_val($arg, $ConnectMinAttempts);
}
- elsif ($arg eq "--connect-interval")
+ elsif ($arg eq "--connect-min-timeout")
{
- $ConnectInterval = check_opt_val($arg, $ConnectInterval);
+ $ConnectMinTimeout = check_opt_val($arg, $ConnectMinTimeout);
}
elsif ($arg eq "--timeout")
{
@@ -361,11 +361,12 @@ if (defined $Usage)
print " restarts it.\n";
print " <hostname> Is the hostname of the server.\n";
print " --port <port> Use the specified port number instead of the default one.\n";
- print " --connect-timeout <timeout> Use the specified timeout (in seconds) when\n";
- print " connecting instead of the default one.\n";
- print " --connect-attempts <count> How many connection attempts to perform.\n";
- print " --connect-interval <time> The minimum interval (in seconds) between failed\n";
- print " connection attempts.\n";
+ print " --connect-one-timeout <time> Specifies the timeout for one connection\n";
+ print " attempt (in seconds).\n";
+ print " --connect-min-attempts <count> Specifies the minimum number of connection\n";
+ print " attempts.\n";
+ print " --connect-min-timeout <time> The minimum period (in seconds) during which\n";
+ print " connection attempts should be made.\n";
print " --timeout <timeout> Use the specified timeout (in seconds) instead of the\n";
print " default one for the operation.\n";
print " --keepalive <keepalive> How often (in seconds) the run and wait operations\n";
@@ -389,10 +390,7 @@ if ($TunnelOpt and $TunnelOpt =~ /^ssh:/)
}
my $TA = TestAgent->new($Hostname, $AgentPort, $TunnelInfo);
-if (defined $ConnectTimeout or defined $ConnectAttempts or defined $ConnectInterval)
-{
- $TA->SetConnectTimeout($ConnectTimeout, $ConnectAttempts, $ConnectInterval);
-}
+$TA->SetConnectTimeout($ConnectOneTimeout, $ConnectMinAttempts, $ConnectMinTimeout);
$TA->SetTimeout($Timeout) if (defined $Timeout);
my $RC = 0;
--
2.20.1
More information about the wine-devel
mailing list