Francois Gouget : testbot: Detect VM revert loops.

Alexandre Julliard julliard at winehq.org
Wed May 16 17:20:14 CDT 2018


Module: tools
Branch: master
Commit: 02681e7b8fd3186f446add2391e69c87ca3a00df
URL:    https://source.winehq.org/git/tools.git/?a=commit;h=02681e7b8fd3186f446add2391e69c87ca3a00df

Author: Francois Gouget <fgouget at codeweavers.com>
Date:   Wed May 16 11:22:10 2018 +0200

testbot: Detect VM revert loops.

VM revert loops typically happen when a VM is misconfigured such that
the TestBot fails to access the TestAgent daemon after reverting it.
This results in the VM being put offline until it is accessible again
through Libvirt which is the case so that it is immediately put back
online and reverted again leading to a new error.
With this patch the VM is put in maintenance mode for an administrator
to look at if it has too many consecutive errors.

Signed-off-by: Francois Gouget <fgouget at codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard at winehq.org>

---

 testbot/bin/LibvirtTool.pl         | 30 +++++++++++++++++++++++++-----
 testbot/ddl/update33.sql           |  5 +++++
 testbot/ddl/winetestbot.sql        |  1 +
 testbot/doc/winetestbot-schema.dia | 23 +++++++++++++++++++++++
 testbot/lib/WineTestBot/Config.pm  |  7 +++++--
 testbot/lib/WineTestBot/VMs.pm     |  1 +
 testbot/web/admin/VMDetails.pl     |  4 +++-
 7 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/testbot/bin/LibvirtTool.pl b/testbot/bin/LibvirtTool.pl
index f9a43bb..91fe053 100755
--- a/testbot/bin/LibvirtTool.pl
+++ b/testbot/bin/LibvirtTool.pl
@@ -193,23 +193,42 @@ sub FatalError($)
   my ($ErrMessage) = @_;
   Error $ErrMessage;
 
-  # Put the VM offline if nobody else modified its status before us
+  # Put the VM offline or mark it for maintenance
+  my $Errors = ($VM->Errors || 0) + 1;
+  my $NewStatus = $Errors < $MaxVMErrors ? "offline" : "maintenance";
+
   $VM = CreateVMs()->GetItem($VMKey);
-  $VM->Status("offline") if ($VM->Status eq $CurrentStatus);
-  $VM->ChildDeadline(undef);
-  $VM->ChildPid(undef);
+  # Put the VM offline if nobody else modified its status before us
+  if ($VM->Status eq $CurrentStatus)
+  {
+    $VM->Status($NewStatus);
+    $VM->ChildDeadline(undef);
+    $VM->ChildPid(undef);
+    $VM->Errors($Errors);
+  }
+  else
+  {
+    $NewStatus = "";
+  }
   my ($ErrProperty, $SaveErrMessage) = $VM->Save();
   if (defined $SaveErrMessage)
   {
     LogMsg "Could not put the $VMKey VM offline: $SaveErrMessage ($ErrProperty)\n";
   }
-  elsif ($VM->Status eq "offline")
+  elsif ($NewStatus eq "offline")
   {
     NotifyAdministrator("Putting the $VMKey VM offline",
                         "Could not perform the $Action operation on the $VMKey VM:\n".
                         "\n$ErrMessage\n".
                         "The VM has been put offline and the TestBot will try to regain access to it.");
   }
+  elsif ($NewStatus eq "maintenance")
+  {
+    NotifyAdministrator("The $VMKey VM needs maintenance",
+                        "Got ". $VM->Errors ." consecutive errors working on the $VMKey VM:\n".
+                        "\n$ErrMessage\n".
+                        "An administrator needs to look at it and to put it back online.");
+  }
   exit 1;
 }
 
@@ -244,6 +263,7 @@ sub ChangeStatus($$;$)
   $VM->Status($To);
   if ($Done)
   {
+    $VM->Errors(undef) if ($To eq "idle");
     $VM->ChildDeadline(undef);
     $VM->ChildPid(undef);
   }
diff --git a/testbot/ddl/update33.sql b/testbot/ddl/update33.sql
new file mode 100644
index 0000000..7239f1d
--- /dev/null
+++ b/testbot/ddl/update33.sql
@@ -0,0 +1,5 @@
+USE winetestbot;
+
+ALTER TABLE VMs
+  ADD Errors INT(2) NULL
+      AFTER Status;
diff --git a/testbot/ddl/winetestbot.sql b/testbot/ddl/winetestbot.sql
index 89810dd..32eab4e 100644
--- a/testbot/ddl/winetestbot.sql
+++ b/testbot/ddl/winetestbot.sql
@@ -49,6 +49,7 @@ CREATE TABLE VMs
   Type          ENUM('win32', 'win64', 'build') NOT NULL,
   Role          ENUM('extra', 'base', 'winetest', 'retired', 'deleted') NOT NULL,
   Status        ENUM('dirty', 'reverting', 'sleeping', 'idle', 'running', 'off', 'offline', 'maintenance') NOT NULL,
+  Errors        INT(2)           NULL,
   ChildPid      INT(5)           NULL,
   ChildDeadline DATETIME         NULL,
   VirtURI       VARCHAR(64)      NOT NULL,
diff --git a/testbot/doc/winetestbot-schema.dia b/testbot/doc/winetestbot-schema.dia
index 61a722a..2c720f2 100644
--- a/testbot/doc/winetestbot-schema.dia
+++ b/testbot/doc/winetestbot-schema.dia
@@ -2454,6 +2454,29 @@
         </dia:composite>
         <dia:composite type="table_attribute">
           <dia:attribute name="name">
+            <dia:string>#Errors#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="type">
+            <dia:string>#INT(2)#</dia:string>
+          </dia:attribute>
+          <dia:attribute name="comment">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+          <dia:attribute name="primary_key">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="nullable">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="unique">
+            <dia:boolean val="false"/>
+          </dia:attribute>
+          <dia:attribute name="default_value">
+            <dia:string>##</dia:string>
+          </dia:attribute>
+        </dia:composite>
+        <dia:composite type="table_attribute">
+          <dia:attribute name="name">
             <dia:string>#ChildPid#</dia:string>
           </dia:attribute>
           <dia:attribute name="type">
diff --git a/testbot/lib/WineTestBot/Config.pm b/testbot/lib/WineTestBot/Config.pm
index 2eb4f45..4f4c8d8 100644
--- a/testbot/lib/WineTestBot/Config.pm
+++ b/testbot/lib/WineTestBot/Config.pm
@@ -29,7 +29,7 @@ use vars qw (@ISA @EXPORT @EXPORT_OK $UseSSL $LogDir $DataDir $BinDir
              $DbDataSource $DbUsername $DbPassword $MaxRevertingVMs
              $MaxRevertsWhileRunningVMs $MaxActiveVMs $MaxRunningVMs
              $MaxVMsWhenIdle $SleepAfterRevert $WaitForToolsInVM
-             $VMToolTimeout $MaxTaskTries $AdminEMail $RobotEMail
+             $VMToolTimeout $MaxVMErrors $MaxTaskTries $AdminEMail $RobotEMail
              $WinePatchToOverride $WinePatchCc $SuiteTimeout $SingleTimeout
              $BuildTimeout $ReconfigTimeout $TimeoutMargin $TagPrefix
              $MaxUnitSize $ProjectName $PatchesMailingList $LDAPServer
@@ -43,7 +43,7 @@ require Exporter;
 @EXPORT = qw($UseSSL $LogDir $DataDir $BinDir
              $MaxRevertingVMs $MaxRevertsWhileRunningVMs $MaxActiveVMs
              $MaxRunningVMs $MaxVMsWhenIdle $SleepAfterRevert $WaitForToolsInVM
-             $VMToolTimeout $MaxTaskTries $AdminEMail
+             $VMToolTimeout $MaxVMErrors $MaxTaskTries $AdminEMail
              $RobotEMail $WinePatchToOverride $WinePatchCc $SuiteTimeout
              $SingleTimeout $BuildTimeout $ReconfigTimeout $TimeoutMargin
              $TagPrefix $MaxUnitSize $ProjectName $PatchesMailingList
@@ -82,6 +82,9 @@ $SleepAfterRevert = 0;
 # Take into account $WaitForToolsInVM and $SleepAfterRevert
 $VMToolTimeout = 6 * 60;
 
+# After three consecutive failures to revert a VM, put it in maintenance mode.
+$MaxVMErrors = 3;
+
 # How many times to run a test that fails before giving up.
 $MaxTaskTries = 3;
 
diff --git a/testbot/lib/WineTestBot/VMs.pm b/testbot/lib/WineTestBot/VMs.pm
index 9f5a2c7..a29eebb 100644
--- a/testbot/lib/WineTestBot/VMs.pm
+++ b/testbot/lib/WineTestBot/VMs.pm
@@ -667,6 +667,7 @@ my @PropertyDescriptors = (
   CreateEnumPropertyDescriptor("Type", "Type of VM", !1, 1, ['win32', 'win64', 'build']),
   CreateEnumPropertyDescriptor("Role", "VM Role", !1, 1, ['extra', 'base', 'winetest', 'retired', 'deleted']),
   CreateEnumPropertyDescriptor("Status", "Current status", !1, 1, ['dirty', 'reverting', 'sleeping', 'idle', 'running', 'off', 'offline', 'maintenance']),
+  CreateBasicPropertyDescriptor("Errors", "Errors", !1, !1, "N", 2),
   CreateBasicPropertyDescriptor("ChildPid", "Child process id", !1, !1, "N", 5),
   CreateBasicPropertyDescriptor("ChildDeadline", "Child Deadline", !1, !1, "DT", 19),
   CreateBasicPropertyDescriptor("VirtURI", "LibVirt URI of the VM", !1, 1, "A", 64),
diff --git a/testbot/web/admin/VMDetails.pl b/testbot/web/admin/VMDetails.pl
index 93e9276..0571471 100644
--- a/testbot/web/admin/VMDetails.pl
+++ b/testbot/web/admin/VMDetails.pl
@@ -40,7 +40,7 @@ sub DisplayProperty($$)
   my ($self, $PropertyDescriptor) = @_;
 
   my $PropertyName = $PropertyDescriptor->GetName();
-  return "" if ($PropertyName =~ /^(?:ChildPid|ChildDeadline)$/);
+  return "" if ($PropertyName =~ /^(?:ChildPid|ChildDeadline|Errors)$/);
   return $self->SUPER::DisplayProperty($PropertyDescriptor);
 }
 
@@ -53,6 +53,8 @@ sub Save($)
 
   if ($OldStatus ne $self->{Item}->Status)
   {
+    # The administrator action resets the consecutive error count
+    $self->{Item}->Errors(undef);
     my ($ErrProperty, $ErrMessage) = $self->{Item}->Validate();
     if (!defined $ErrMessage)
     {




More information about the wine-cvs mailing list