[PATCH 2/2] testbot: Requeue the task if the VM is not ready for it.

Francois Gouget fgouget at codeweavers.com
Mon Sep 23 09:59:31 CDT 2019


In order to run a task a VM should be in the 'running' state and powered
on. If either check fails that's not the fault of the task and a second
attempt may succeed. So don't end the task with a boterror. Instead
requeue it and put the VM offline so its true state is evaluated (and
notify the administrator of course).
This implies only resetting the VM error count after successfully
running a task otherwise we could get into an infinite loop putting it
offline at the start of every task.

Signed-off-by: Francois Gouget <fgouget at codeweavers.com>
---
 testbot/bin/LibvirtTool.pl               |  1 -
 testbot/bin/WineRunBuild.pl              | 32 ++++++++++++++++--
 testbot/bin/WineRunReconfig.pl           | 37 ++++++++++++++++++---
 testbot/bin/WineRunTask.pl               | 37 ++++++++++++++++++---
 testbot/bin/WineRunWineTest.pl           | 41 ++++++++++++++++++++----
 testbot/lib/WineTestBot/LibvirtDomain.pm | 13 ++++++++
 6 files changed, 143 insertions(+), 18 deletions(-)

diff --git a/testbot/bin/LibvirtTool.pl b/testbot/bin/LibvirtTool.pl
index b876f56e2..829eab132 100755
--- a/testbot/bin/LibvirtTool.pl
+++ b/testbot/bin/LibvirtTool.pl
@@ -241,7 +241,6 @@ sub ChangeStatus($$;$)
   $VM->Status($To);
   if ($Done)
   {
-    $VM->Errors(undef) if ($To eq "idle");
     $VM->ChildDeadline(undef);
     $VM->ChildPid(undef);
   }
diff --git a/testbot/bin/WineRunBuild.pl b/testbot/bin/WineRunBuild.pl
index a8b7803e9..3a1b649e7 100755
--- a/testbot/bin/WineRunBuild.pl
+++ b/testbot/bin/WineRunBuild.pl
@@ -5,7 +5,7 @@
 # See the bin/build/Build.pl script.
 #
 # Copyright 2009 Ge van Geldorp
-# Copyright 2013-2016 Francois Gouget
+# Copyright 2013-2019 Francois Gouget
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -250,6 +250,15 @@ sub WrapUpAndExit($;$$)
   if ($VM->Status eq 'running')
   {
     $VM->Status($NewVMStatus);
+    if ($NewVMStatus eq 'offline')
+    {
+      my $Errors = ($VM->Errors || 0) + 1;
+      $VM->Errors($Errors);
+    }
+    else
+    {
+      $VM->Errors(undef);
+    }
     $VM->ChildDeadline(undef);
     $VM->ChildPid(undef);
     $VM->Save();
@@ -261,6 +270,8 @@ sub WrapUpAndExit($;$$)
   exit($Status eq 'completed' ? 0 : 1);
 }
 
+# Only to be used if the error cannot be fixed by re-running the task.
+# The TestBot will be indicated as having caused the failure.
 sub FatalError($;$)
 {
   my ($ErrMessage, $Retry) = @_;
@@ -319,11 +330,26 @@ if ($VM->Type ne "build")
 }
 if (!$Debug and $VM->Status ne "running")
 {
-  FatalError("The VM is not ready for use (" . $VM->Status . ")\n");
+  # Maybe the administrator tinkered with the VM state? In any case the VM
+  # is not ours to use so requeue the task and abort.
+  Error("The VM is not ready for use (" . $VM->Status . ")\n");
+  NotifyAdministrator("Putting the ". $VM->Name ." VM offline",
+    "The VM status should be 'running' but is '". $VM->Status ."' instead.\n".
+    "The VM has been put offline and the TestBot will try to regain\n".
+    "access to it.");
+  WrapUpAndExit('queued');
 }
 if (!$VM->GetDomain()->IsPoweredOn())
 {
-  FatalError("The VM is not powered on\n");
+  # Maybe the VM was prepared in advance and got taken down by a power outage?
+  # Requeue the task and treat this event as a failed revert to avoid infinite
+  # loops.
+  Error("The VM is not powered on\n");
+  NotifyAdministrator("Putting the ". $VM->Name ." VM offline",
+    "The VM is not powered on despite its status being 'running'.\n".
+    "The VM has been put offline and the TestBot will try to regain\n".
+    "access to it.");
+  WrapUpAndExit('queued');
 }
 
 if ($Step->Type ne "build")
diff --git a/testbot/bin/WineRunReconfig.pl b/testbot/bin/WineRunReconfig.pl
index a2b7afedc..94ecabcf5 100755
--- a/testbot/bin/WineRunReconfig.pl
+++ b/testbot/bin/WineRunReconfig.pl
@@ -5,7 +5,7 @@
 # See the bin/build/Reconfig.pl script.
 #
 # Copyright 2009 Ge van Geldorp
-# Copyright 2013-2018 Francois Gouget
+# Copyright 2013-2019 Francois Gouget
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -253,6 +253,15 @@ sub WrapUpAndExit($;$$)
   if ($VM->Status eq 'running')
   {
     $VM->Status($NewVMStatus);
+    if ($NewVMStatus eq 'offline')
+    {
+      my $Errors = ($VM->Errors || 0) + 1;
+      $VM->Errors($Errors);
+    }
+    else
+    {
+      $VM->Errors(undef);
+    }
     $VM->ChildDeadline(undef);
     $VM->ChildPid(undef);
     $VM->Save();
@@ -264,6 +273,8 @@ sub WrapUpAndExit($;$$)
   exit($Status eq 'completed' ? 0 : 1);
 }
 
+# Only to be used if the error cannot be fixed by re-running the task.
+# The TestBot will be indicated as having caused the failure.
 sub FatalError($;$)
 {
   my ($ErrMessage, $Retry) = @_;
@@ -322,11 +333,29 @@ if ($VM->Type ne "build" and $VM->Type ne "wine")
 }
 if (!$Debug and $VM->Status ne "running")
 {
-  FatalError("The VM is not ready for use (" . $VM->Status . ")\n");
+  # Maybe the administrator tinkered with the VM state? In any case the VM
+  # is not ours to use so requeue the task and abort.
+  Error("The VM is not ready for use (" . $VM->Status . ")\n");
+  NotifyAdministrator("Putting the ". $VM->Name ." VM offline",
+    "The VM status should be 'running' but is '". $VM->Status ."' instead.\n".
+    "The VM has been put offline and the TestBot will try to regain\n".
+    "access to it.");
+  WrapUpAndExit('queued');
 }
-if (!$VM->GetDomain()->IsPoweredOn())
+my $Domain = $VM->GetDomain();
+if (!$Domain->IsPoweredOn())
 {
-  FatalError("The VM is not powered on\n");
+  # Maybe the VM was prepared in advance and got taken down by a power outage?
+  # Requeue the task and treat this event as a failed revert to avoid infinite
+  # loops.
+  Error("The VM is not powered on\n");
+  NotifyAdministrator("Putting the ". $VM->Name ." VM offline",
+    "The ". $VM->Name ." VM should have been powered on to run the task\n".
+    "below but its state was ". $Domain->GetStateDescription() ." instead.\n".
+    MakeSecureURL(GetTaskURL($JobId, $StepNo, $TaskNo)) ."\n\n".
+    "So the VM has been put offline and the TestBot will try to regain\n".
+    "access to it.");
+  WrapUpAndExit('queued');
 }
 
 if ($Step->Type ne "reconfig")
diff --git a/testbot/bin/WineRunTask.pl b/testbot/bin/WineRunTask.pl
index df9fcad0e..53aa578a5 100755
--- a/testbot/bin/WineRunTask.pl
+++ b/testbot/bin/WineRunTask.pl
@@ -4,7 +4,7 @@
 # Sends and runs the tasks in the Windows test VMs.
 #
 # Copyright 2009 Ge van Geldorp
-# Copyright 2013-2016 Francois Gouget
+# Copyright 2013-2019 Francois Gouget
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -284,6 +284,15 @@ sub WrapUpAndExit($;$$$)
   if ($VM->Status eq 'running')
   {
     $VM->Status($NewVMStatus);
+    if ($NewVMStatus eq 'offline')
+    {
+      my $Errors = ($VM->Errors || 0) + 1;
+      $VM->Errors($Errors);
+    }
+    else
+    {
+      $VM->Errors(undef);
+    }
     $VM->ChildDeadline(undef);
     $VM->ChildPid(undef);
     $VM->Save();
@@ -313,6 +322,8 @@ sub WrapUpAndExit($;$$$)
   exit($Status eq 'completed' ? 0 : 1);
 }
 
+# Only to be used if the error cannot be fixed by re-running the task.
+# The TestBot will be indicated as having caused the failure.
 sub FatalError($;$)
 {
   my ($ErrMessage, $Retry) = @_;
@@ -377,11 +388,29 @@ if ($VM->Type ne "win32" and $VM->Type ne "win64")
 }
 if (!$Debug and $VM->Status ne "running")
 {
-  FatalError("The VM is not ready for use (" . $VM->Status . ")\n");
+  # Maybe the administrator tinkered with the VM state? In any case the VM
+  # is not ours to use so requeue the task and abort.
+  Error("The VM is not ready for use (" . $VM->Status . ")\n");
+  NotifyAdministrator("Putting the ". $VM->Name ." VM offline",
+    "The VM status should be 'running' but is '". $VM->Status ."' instead.\n".
+    "The VM has been put offline and the TestBot will try to regain\n".
+    "access to it.");
+  WrapUpAndExit('queued');
 }
-if (!$VM->GetDomain()->IsPoweredOn())
+my $Domain = $VM->GetDomain();
+if (!$Domain->IsPoweredOn())
 {
-  FatalError("The VM is not powered on\n");
+  # Maybe the VM was prepared in advance and got taken down by a power outage?
+  # Requeue the task and treat this event as a failed revert to avoid infinite
+  # loops.
+  Error("The VM is not powered on\n");
+  NotifyAdministrator("Putting the ". $VM->Name ." VM offline",
+    "The ". $VM->Name ." VM should have been powered on to run the task\n".
+    "below but its state was ". $Domain->GetStateDescription() ." instead.\n".
+    MakeSecureURL(GetTaskURL($JobId, $StepNo, $TaskNo)) ."\n\n".
+    "So the VM has been put offline and the TestBot will try to regain\n".
+    "access to it.");
+  WrapUpAndExit('queued');
 }
 
 if ($Step->Type ne "single" and $Step->Type ne "suite")
diff --git a/testbot/bin/WineRunWineTest.pl b/testbot/bin/WineRunWineTest.pl
index 79d7519de..e3db27930 100755
--- a/testbot/bin/WineRunWineTest.pl
+++ b/testbot/bin/WineRunWineTest.pl
@@ -4,7 +4,7 @@
 # Makes sure the Wine patches compile or run WineTest.
 # See the bin/build/WineTest.pl script.
 #
-# Copyright 2018 Francois Gouget
+# Copyright 2018-2019 Francois Gouget
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -280,6 +280,15 @@ sub WrapUpAndExit($;$$$)
   if ($VM->Status eq 'running')
   {
     $VM->Status($NewVMStatus);
+    if ($NewVMStatus eq 'offline')
+    {
+      my $Errors = ($VM->Errors || 0) + 1;
+      $VM->Errors($Errors);
+    }
+    else
+    {
+      $VM->Errors(undef);
+    }
     $VM->ChildDeadline(undef);
     $VM->ChildPid(undef);
     $VM->Save();
@@ -313,6 +322,8 @@ sub WrapUpAndExit($;$$$)
   exit($Status eq 'completed' ? 0 : 1);
 }
 
+# Only to be used if the error cannot be fixed by re-running the task.
+# The TestBot will be indicated as having caused the failure.
 sub FatalError($;$)
 {
   my ($ErrMessage, $Retry) = @_;
@@ -377,11 +388,29 @@ if ($VM->Type ne "wine")
 }
 if (!$Debug and $VM->Status ne "running")
 {
-  FatalError("The VM is not ready for use (" . $VM->Status . ")\n");
-}
-if (!$VM->GetDomain()->IsPoweredOn())
-{
-  FatalError("The VM is not powered on\n");
+  # Maybe the administrator tinkered with the VM state? In any case the VM
+  # is not ours to use so requeue the task and abort.
+  Error("The VM is not ready for use (" . $VM->Status . ")\n");
+  NotifyAdministrator("Putting the ". $VM->Name ." VM offline",
+    "The VM status should be 'running' but is '". $VM->Status ."' instead.\n".
+    "The VM has been put offline and the TestBot will try to regain\n".
+    "access to it.");
+  WrapUpAndExit('queued');
+}
+my $Domain = $VM->GetDomain();
+if (!$Domain->IsPoweredOn())
+{
+  # Maybe the VM was prepared in advance and got taken down by a power outage?
+  # Requeue the task and treat this event as a failed revert to avoid infinite
+  # loops.
+  Error("The VM is not powered on\n");
+  NotifyAdministrator("Putting the ". $VM->Name ." VM offline",
+    "The ". $VM->Name ." VM should have been powered on to run the task\n".
+    "below but its state was ". $Domain->GetStateDescription() ." instead.\n".
+    MakeSecureURL(GetTaskURL($JobId, $StepNo, $TaskNo)) ."\n\n".
+    "So the VM has been put offline and the TestBot will try to regain\n".
+    "access to it.");
+  WrapUpAndExit('queued');
 }
 
 if ($Step->Type ne "suite" and $Step->Type ne "single")
diff --git a/testbot/lib/WineTestBot/LibvirtDomain.pm b/testbot/lib/WineTestBot/LibvirtDomain.pm
index f3ac3f310..207dc6d45 100644
--- a/testbot/lib/WineTestBot/LibvirtDomain.pm
+++ b/testbot/lib/WineTestBot/LibvirtDomain.pm
@@ -394,6 +394,19 @@ sub IsReady($)
       );
 }
 
+sub GetStateDescription($)
+{
+  my ($self) = @_;
+
+  my ($ErrMessage, $Domain) = $self->_GetDomain();
+  return $ErrMessage if (defined $ErrMessage);
+
+  my ($State, $Reason);
+  eval { ($State, $Reason) = $Domain->get_state() };
+  return $self->_Reset($@) if ($@);
+  return _GetStateDescription($State, $Reason)
+}
+
 sub PowerOff($)
 {
   my ($self) = @_;
-- 
2.20.1



More information about the wine-devel mailing list