[Tools 2/3] testbot: Deprecate Task::ChildPid in favor of VM::ChildPid.

Francois Gouget fgouget at codeweavers.com
Fri Oct 20 09:17:02 CDT 2017


This way the pid of the process working on a VM is always stored in the
same place which makes it easier to check if the VM is still 'busy'
when validating its status field.

Signed-off-by: Francois Gouget <fgouget at codeweavers.com>
---

This patch does not remove the ChildPid fied from the Tasks table so we 
can easily revert it if necessary. I'll send a patch to update the 
database once we're comfortable with this change.

 testbot/bin/Engine.pl            |  38 +++++-----
 testbot/bin/WineRunBuild.pl      |   2 +-
 testbot/bin/WineRunReconfig.pl   |   2 +-
 testbot/bin/WineRunTask.pl       |   2 +-
 testbot/lib/WineTestBot/Tasks.pm | 145 ++++++++++++++++-----------------------
 testbot/lib/WineTestBot/VMs.pm   |  28 ++++++--
 6 files changed, 104 insertions(+), 113 deletions(-)

diff --git a/testbot/bin/Engine.pl b/testbot/bin/Engine.pl
index e6755bcb..fe16f178 100755
--- a/testbot/bin/Engine.pl
+++ b/testbot/bin/Engine.pl
@@ -81,16 +81,17 @@ It has to contend with three main scenarios:
 - The Engine startup after a reboot. All task processes will be dead and need
   to be requeued. But the VMs are likely to be hosted on a separate machine so
   it is quite possible that they will still be running. Hopefully any running
-  process matching a Task's ChildPid will belong to another user so we don't
+  process matching a VM's ChildPid will belong to another user so we don't
   mistake that case for the previous one.
-- A shutdown of the Engine and its Tasks / VMs. In this case it's used to
-  kill the running tasks and requeue them, and/or power off the VMs.
+- A shutdown of the Engine and its Tasks / VMs. In this case Cleanup() is used
+  to kill the running tasks and requeue them, and/or power off the VMs.
 
-In all cases we only trust that a VM status field is still valid if:
-- It is 'running' and used by a Task that is still running.
-- It can have a running process and that process is still running.
-- It is 'idle' and powered on.
-In all other cases the VM will be powered off and marked as such.
+In all cases the VM status field cannot be trusted blindly.
+If the VM status indicates it can have a running process and it actually has
+a running process then we will trust the VM status.
+If the VM status is 'idle' we let RunCheckIdle() perform the relevant domain
+checks and update the status as appropriate.
+In all other cases the VM is powered off and marked as such.
 
 =back
 =cut
@@ -112,27 +113,22 @@ sub Cleanup(;$$)
       foreach my $Task (@{$Tasks->GetItems()})
       {
         my $TaskKey = join("/", $Job->Id, $Step->No, $Task->No);
-        my $ChildPid = $Task->ChildPid;
-        $ChildPid = undef if (defined $ChildPid and !kill(0, $ChildPid));
+        my $VM = $Task->VM;
 
         my $Requeue;
-        if (!defined $ChildPid)
+        if (!$VM->HasRunningChild())
         {
           # That task's process died somehow.
           $Requeue = 1;
         }
-        elsif ($Task->VM->Status ne "running")
+        elsif ($VM->Status ne "running")
         {
           # The Task and VM status should match.
           $Requeue = 1;
-          kill("TERM", $ChildPid);
         }
         elsif ($KillTasks)
         {
-          # Kill the task and requeue. Note that since the VM is still running
-          # we're not in the computer reboot case so ChildPid is probably
-          # still valid.
-          kill("TERM", $ChildPid);
+          # We will kill the Task's process so requeue the Task.
           $Requeue = 1;
         }
         else
@@ -147,7 +143,6 @@ sub Cleanup(;$$)
           LogMsg "Requeuing $TaskKey\n";
           system("rm", "-r", "-f", "$DataDir/jobs/$TaskKey");
           $Task->Status("queued");
-          $Task->ChildPid(undef);
           $Task->Started(undef);
           $Task->Ended(undef);
           $Task->TestFailures(undef);
@@ -178,7 +173,12 @@ sub Cleanup(;$$)
 
     if ($VM->HasRunningChild())
     {
-      if ($KillVMs)
+      if ($KillTasks and $VM->Status eq "running")
+      {
+        $VM->KillChild();
+        $VM->RunPowerOff();
+      }
+      elsif ($KillVMs and $VM->Status ne "running")
       {
         $VM->KillChild();
         # $KillVMs is normally used on shutdown so don't start a process that
diff --git a/testbot/bin/WineRunBuild.pl b/testbot/bin/WineRunBuild.pl
index f385feeb..e58d710a 100755
--- a/testbot/bin/WineRunBuild.pl
+++ b/testbot/bin/WineRunBuild.pl
@@ -226,7 +226,6 @@ sub WrapUpAndExit($;$)
   # Update the Task and Job
   $Task->Status($Status);
   $Task->TestFailures($TestFailures);
-  $Task->ChildPid(undef);
   if ($Status eq 'queued')
   {
     $Task->Started(undef);
@@ -245,6 +244,7 @@ sub WrapUpAndExit($;$)
   if ($VM->Status eq 'running')
   {
     $VM->Status($NewVMStatus);
+    $VM->ChildPid(undef);
     $VM->Save();
   }
 
diff --git a/testbot/bin/WineRunReconfig.pl b/testbot/bin/WineRunReconfig.pl
index aa529a2d..3eec9ed5 100755
--- a/testbot/bin/WineRunReconfig.pl
+++ b/testbot/bin/WineRunReconfig.pl
@@ -227,7 +227,6 @@ sub WrapUpAndExit($;$)
   # Update the Task and Job
   $Task->Status($Status);
   $Task->TestFailures($TestFailures);
-  $Task->ChildPid(undef);
   if ($Status eq 'queued')
   {
     $Task->Started(undef);
@@ -246,6 +245,7 @@ sub WrapUpAndExit($;$)
   if ($VM->Status eq 'running')
   {
     $VM->Status($NewVMStatus);
+    $VM->ChildPid(undef);
     $VM->Save();
   }
 
diff --git a/testbot/bin/WineRunTask.pl b/testbot/bin/WineRunTask.pl
index fc0bcf3a..65755f9b 100755
--- a/testbot/bin/WineRunTask.pl
+++ b/testbot/bin/WineRunTask.pl
@@ -255,7 +255,6 @@ sub WrapUpAndExit($;$$)
   # Update the Task and Job
   $Task->Status($Status);
   $Task->TestFailures($TestFailures);
-  $Task->ChildPid(undef);
   if ($Status eq 'queued')
   {
     $Task->Started(undef);
@@ -274,6 +273,7 @@ sub WrapUpAndExit($;$$)
   if ($VM->Status eq 'running')
   {
     $VM->Status($NewVMStatus);
+    $VM->ChildPid(undef);
     $VM->Save();
   }
 
diff --git a/testbot/lib/WineTestBot/Tasks.pm b/testbot/lib/WineTestBot/Tasks.pm
index 401e2757..bbb5d568 100644
--- a/testbot/lib/WineTestBot/Tasks.pm
+++ b/testbot/lib/WineTestBot/Tasks.pm
@@ -57,6 +57,30 @@ sub InitializeNew($$)
   $self->SUPER::InitializeNew($Collection);
 }
 
+sub _SetupTask($$)
+{
+  my ($VM, $Task) = @_;
+
+  # Capture Perl errors in the task's generic error log
+  my ($JobId, $StepNo, $TaskNo) = @{$Task->GetMasterKey()};
+  my $TaskDir = "$DataDir/jobs/$JobId/$StepNo/$TaskNo";
+  # Remove the previous run's files if any
+  rmtree $TaskDir;
+  mkdir $TaskDir;
+  if (open(STDERR, ">>", "$TaskDir/err"))
+  {
+    # Make sure stderr still flushes after each print
+    my $tmp=select(STDERR);
+    $| = 1;
+    select($tmp);
+  }
+  else
+  {
+    require WineTestBot::Log;
+    WineTestBot::Log::LogMsg("unable to redirect stderr to '$TaskDir/err': $!\n");
+  }
+}
+
 =pod
 =over 12
 
@@ -76,70 +100,20 @@ sub Run($$)
 {
   my ($self, $Step) = @_;
 
-  $self->Status("running");
-  $self->Save();
-
-  my $VM = $self->VM;
-  $VM->Status("running");
-  my ($ErrProperty, $ErrMessage) = $VM->Save();
-  return $ErrMessage if (defined $ErrMessage);
+  my ($JobId, $StepNo, $TaskNo) = @{$self->GetMasterKey()};
+  my $Script = $Step->Type eq "build" ? "Build" :
+               $Step->Type eq "reconfig" ? "Reconfig" :
+               "Task";
+  my $Args = ["$BinDir/${ProjectName}Run$Script.pl", "--log-only",
+              $JobId, $StepNo, $TaskNo];
 
-  my $RunScript;
-  if ($Step->Type eq "build")
-  {
-    $RunScript = "RunBuild.pl";
-  }
-  elsif ($Step->Type eq "reconfig")
-  {
-    $RunScript = "RunReconfig.pl";
-  }
-  else
-  {
-    $RunScript = "RunTask.pl";
-  }
-
-  # Make sure the child process does not inherit the database connection
-  $self->GetBackEnd()->Close();
-
-  my $Pid = fork;
-  if (!defined $Pid)
-  {
-    return "Unable to fork for ${ProjectName}$RunScript: $!";
-  }
-  elsif (!$Pid)
+  my $ErrMessage = $self->VM->Run("running", $Args, \&_SetupTask, $self);
+  if (!$ErrMessage)
   {
-    require WineTestBot::Log;
-    # Capture Perl errors in the task's generic error log
-    my ($JobId, $StepNo, $TaskNo) = @{$self->GetMasterKey()};
-    my $TaskDir = "$DataDir/jobs/$JobId/$StepNo/$TaskNo";
-    # Remove the previous run's files if any
-    rmtree $TaskDir;
-    mkdir $TaskDir;
-    if (open(STDERR, ">>", "$TaskDir/err"))
-    {
-      # Make sure stderr still flushes after each print
-      my $tmp=select(STDERR);
-      $| = 1;
-      select($tmp);
-    }
-    else
-    {
-      WineTestBot::Log::LogMsg("unable to redirect stderr to '$TaskDir/err': $!\n");
-    }
-    $ENV{PATH} = "/usr/bin:/bin";
-    delete $ENV{ENV};
-    exec("$BinDir/${ProjectName}$RunScript", "--log-only", $JobId, $StepNo, $TaskNo) or
-    WineTestBot::Log::LogMsg("Unable to exec ${ProjectName}$RunScript: $!\n");
-    exit(1);
+    $self->Started(time());
+    my $_ErrProperty;
+    ($_ErrProperty, $ErrMessage) = $self->Save();
   }
-
-  # Note that if the child process completes quickly (typically due to some
-  # error), it may set ChildPid to undef before we get here. So we may end up
-  # with non-running tasks for which ChildPid is set. That's ok because
-  # ChildPid should be ignored anyway if Status is not 'running'.
-  $self->ChildPid($Pid);
-  $self->Started(time);
-  ($ErrProperty, $ErrMessage) = $self->Save();
   return $ErrMessage;
 }
 
@@ -154,34 +128,37 @@ sub UpdateStatus($$)
   my ($self, $Skip) = @_;
 
   my $Status = $self->Status;
+  my $VM = $self->VM;
 
-  if (defined $self->ChildPid && !kill(0, $self->ChildPid) && $! == ESRCH)
+  if ($Status eq "running" and
+      ($VM->Status ne "running" or !$VM->HasRunningChild()))
   {
-    $self->ChildPid(undef);
-    if ($Status eq "running")
+    my ($JobId, $StepNo, $TaskNo) = @{$self->GetMasterKey()};
+    my $OldUMask = umask(002);
+    my $TaskDir = "$DataDir/jobs/$JobId/$StepNo/$TaskNo";
+    mkdir $TaskDir;
+    if (open TASKLOG, ">>$TaskDir/err")
+    {
+      print TASKLOG "Child process died unexpectedly\n";
+      close TASKLOG;
+    }
+    umask($OldUMask);
+    # This probably indicates a bug in the task script.
+    # Don't requeue the task to avoid an infinite loop.
+    require WineTestBot::Log;
+    WineTestBot::Log::LogMsg("Child process for task $JobId/$StepNo/$TaskNo died unexpectedly\n");
+    $self->Status("boterror");
+    $self->Save();
+
+    if ($VM->Status eq "running")
     {
-      my ($JobId, $StepNo, $TaskNo) = @{$self->GetMasterKey()};
-      my $OldUMask = umask(002);
-      my $TaskDir = "$DataDir/jobs/$JobId/$StepNo/$TaskNo";
-      mkdir $TaskDir;
-      if (open TASKLOG, ">>$TaskDir/err")
-      {
-        print TASKLOG "Child process died unexpectedly\n";
-        close TASKLOG;
-      }
-      umask($OldUMask);
-      # This probably indicates a bug in the task script.
-      # Don't requeue the task to avoid an infinite loop.
-      require WineTestBot::Log;
-      WineTestBot::Log::LogMsg("Child process for task $JobId/$StepNo/$TaskNo died unexpectedly\n");
-      $self->Status("boterror");
-      $Status = "boterror";
-
-      my $VM = $self->VM;
       $VM->Status('dirty');
+      $VM->ChildPid(undef);
       $VM->Save();
     }
-    $self->Save();
+    # else it looks like this is not our VM anymore
+
+    $Status = "boterror";
   }
   elsif ($Skip && $Status eq "queued")
   {
@@ -222,7 +199,7 @@ BEGIN
     CreateItemrefPropertyDescriptor("VM", "VM", !1,  1, \&CreateVMs, ["VMName"]),
     CreateBasicPropertyDescriptor("Timeout", "Timeout", !1, 1, "N", 4),
     CreateBasicPropertyDescriptor("CmdLineArg", "Command line args", !1, !1, "A", 256),
-    # Note: ChildPid is only valid when Status == 'running'.
+    # Note: ChildPid is not used anymore.
     CreateBasicPropertyDescriptor("ChildPid", "Child process id", !1, !1, "N", 5),
     CreateBasicPropertyDescriptor("Started", "Execution started", !1, !1, "DT", 19),
     CreateBasicPropertyDescriptor("Ended", "Execution ended", !1, !1, "DT", 19),
diff --git a/testbot/lib/WineTestBot/VMs.pm b/testbot/lib/WineTestBot/VMs.pm
index e71ad218..7bd6d64b 100644
--- a/testbot/lib/WineTestBot/VMs.pm
+++ b/testbot/lib/WineTestBot/VMs.pm
@@ -307,12 +307,11 @@ sub OnSaved($)
   }
 }
 
-sub _RunVMTool($$$)
+sub Run($$$$$)
 {
-  my ($self, $NewStatus, $Args) = @_;
+  my ($self, $NewStatus, $Args, $ChildSetup, $SetupData) = @_;
 
-  my $Tool = "LibvirtTool.pl";
-  unshift @$Args, "$BinDir/$Tool";
+  my $Tool = basename($Args->[0]);
 
   # There are two $VM->ChildPid race conditions to avoid:
   # - Between the child process and new calls to ScheduleJobs().
@@ -377,9 +376,7 @@ sub _RunVMTool($$$)
 
   require WineTestBot::Log;
   WineTestBot::Log::LogMsg("Starting child: @$Args\n");
-
-  # Set up the file descriptors for the new process
-  WineTestBot::Log::SetupRedirects();
+  $ChildSetup->($VM, $SetupData);
 
   $ENV{PATH} = "/usr/bin:/bin";
   exec(@$Args) or
@@ -396,6 +393,23 @@ sub _RunVMTool($$$)
   exit 1;
 }
 
+sub _SetupChild($$)
+{
+  my ($VM, $_UserData) = @_;
+
+  # Set up the file descriptors for the new process
+  WineTestBot::Log::SetupRedirects();
+}
+
+sub _RunVMTool($$$)
+{
+  my ($self, $NewStatus, $Args) = @_;
+
+  my $Tool = "LibvirtTool.pl";
+  unshift @$Args, "$BinDir/$Tool";
+  return $self->Run($NewStatus, $Args, \&_SetupChild, undef);
+}
+
 =pod
 =over 12
 
-- 
2.14.2




More information about the wine-patches mailing list