[tools] testbot/LogUtils: Deduplicate the latest WineTest reports.

Francois Gouget fgouget at codeweavers.com
Sun Feb 23 21:45:23 CST 2020


There is no need to keep old logs if they don't contain errors that are
already present in the latest one. It cuts down on the number of reports
that test results need to be compared to to detect new failures.

Signed-off-by: Francois Gouget <fgouget at codeweavers.com>
---

I don't expect much deduplicating to happen initially because there are 
too many "always new" errors.

 testbot/bin/UpdateTaskLogs          | 21 +++++--
 testbot/lib/WineTestBot/LogUtils.pm | 95 ++++++++++++++++++++++++++++-
 2 files changed, 107 insertions(+), 9 deletions(-)

diff --git a/testbot/bin/UpdateTaskLogs b/testbot/bin/UpdateTaskLogs
index 49f3c2e817..94adf422db 100755
--- a/testbot/bin/UpdateTaskLogs
+++ b/testbot/bin/UpdateTaskLogs
@@ -210,6 +210,9 @@ sub BuildErrorsCache($$$$;$)
   return CreateLogErrorsCache($LogInfo, $Task);
 }
 
+# The list of files in latest/. This includes files deleted for redundancy.
+my %LatestFiles;
+
 sub DoUpdateLatestReport($$$)
 {
   my ($Task, $ReportName, $SrcReportPath) = @_;
@@ -218,6 +221,8 @@ sub DoUpdateLatestReport($$$)
   my $SrcIsRef = ($SrcReportPath =~ /-job\d+-/);
   my $RefReportName = $SrcIsRef ? basename($SrcReportPath) :
                                   $Task->GetRefReportName($ReportName);
+  return 0 if ($LatestFiles{$RefReportName});
+  $LatestFiles{$RefReportName} = 1;
 
   my $Rc = 0;
   my $LatestReportPath = "$DataDir/latest/$RefReportName";
@@ -519,14 +524,18 @@ sub ProcessLatestReports()
     {
       $Rc += Delete("$LatestReportPath.errors", "orphaned");
     }
-    elsif (!-f "$LatestReportPath.errors")
+    else
     {
-      # Build the missing .errors file
-      my $ErrMessage = BuildErrorsCache("$DataDir/latest", $RefReportName, 1, 0);
-      if (defined $ErrMessage)
+      $LatestFiles{$RefReportName} = 1;
+      if (!-f "$LatestReportPath.errors")
       {
-        Error "$ErrMessage\n";
-        $Rc = 1;
+        # Build the missing .errors file
+        my $ErrMessage = BuildErrorsCache("$DataDir/latest", $RefReportName, 1, 0);
+        if (defined $ErrMessage)
+        {
+          Error "$ErrMessage\n";
+          $Rc = 1;
+        }
       }
     }
   }
diff --git a/testbot/lib/WineTestBot/LogUtils.pm b/testbot/lib/WineTestBot/LogUtils.pm
index f7b6f42513..e309753188 100644
--- a/testbot/lib/WineTestBot/LogUtils.pm
+++ b/testbot/lib/WineTestBot/LogUtils.pm
@@ -1028,9 +1028,9 @@ sub MarkAllErrorsAsNew($)
 
 =item C<_GetLineKey()>
 
-This is a helper for TagNewErrors(). It reformats the log lines so they
-can meaningfully be compared to the reference log even if line numbers change,
-etc.
+This is a helper for _DeduplicateLatestReport() and TagNewErrors(). It
+reformats the log lines so they can meaningfully be compared to the reference
+log even if line numbers change, etc.
 
 =back
 =cut
@@ -1244,6 +1244,92 @@ sub SnapshotLatestReport($$)
   return \@ErrMessages;
 }
 
+sub _IsReportRedundant($$)
+{
+  my ($RefInfo, $LogInfo) = @_;
+
+  return undef if (($RefInfo->{ErrCount} || 0) < ($LogInfo->{ErrCount} || 0));
+  return 1 if (($LogInfo->{ErrCount} || 0) == 0);
+
+  foreach my $GroupName (@{$LogInfo->{ErrGroupNames}})
+  {
+    my $LogGroup = $LogInfo->{ErrGroups}->{$GroupName};
+    my $RefGroup = $RefInfo->{ErrGroups}->{$GroupName};
+    return undef if (!$RefGroup);
+
+    my $Diff = Algorithm::Diff->new($RefGroup->{Errors}, $LogGroup->{Errors},
+                                    { keyGen => \&_GetLineKey });
+    while ($Diff->Next())
+    {
+      # Same() > 0 => Items(2) > 0 despite there being no new lines
+      if (!$Diff->Same() and $Diff->Items(2) > 0)
+      {
+        # This old log has errors not present in the latest one so keep it
+        return undef;
+      }
+    }
+  }
+
+  return 1;
+}
+
+sub _DeduplicateLatestReport($)
+{
+  my ($RefReportName) = @_;
+
+  my $LatestGlob = $RefReportName;
+  $LatestGlob =~ s/-job\d+-/-job*-/;
+  my @LatestPaths = glob("$DataDir/latest/$LatestGlob");
+  return undef if (@LatestPaths <= 1);
+
+  my $RefReportPath = "$DataDir/latest/$RefReportName";
+  my $RefInfo = LoadLogErrors($RefReportPath);
+  return $RefInfo->{BadLog} if (defined $RefInfo->{BadLog});
+
+  my %LatestAges;
+  foreach my $LogPath (@LatestPaths)
+  {
+    $LatestAges{$LogPath} = -M $LogPath || 0;
+  }
+  my $RefAge = $LatestAges{$RefReportPath};
+
+  my $ErrMessage;
+  my $ReportRE = $RefReportName;
+  $ReportRE =~ s/^([a-zA-Z0-9_]+)-job\d+-([a-zA-Z0-9_]+)$/\\Q$1\\E-job[0-9]+-\\Q$2/;
+  foreach my $LogPath (sort { $LatestAges{$a} <=> $LatestAges{$b} } @LatestPaths)
+  {
+    my $LogName = basename($LogPath);
+    next if ($LogName eq $RefReportName);
+    next if ($LogName !~ /^($ReportRE)$/);
+    $LogName = $1; # untaint
+    $LogPath = "$DataDir/latest/$LogName";
+
+    my $LogInfo = LoadLogErrors($LogPath);
+    if (defined $LogInfo->{BadLog})
+    {
+      # Take note of the error but continue to try deduplicating
+      $ErrMessage = $LogInfo->{BadLog};
+      next;
+    }
+    if ($RefAge < $LatestAges{$LogPath})
+    {
+      if (_IsReportRedundant($RefInfo, $LogInfo))
+      {
+        unlink $LogPath, "$LogPath.errors";
+      }
+    }
+    else
+    {
+      if (_IsReportRedundant($LogInfo, $RefInfo))
+      {
+        unlink $RefReportPath, "$RefReportPath.errors";
+        last;
+      }
+    }
+  }
+  return $ErrMessage;
+}
+
 sub UpdateLatestReport($$)
 {
   my ($RefReportName, $SrcReportPath) = @_;
@@ -1262,6 +1348,9 @@ sub UpdateLatestReport($$)
     }
   }
 
+  my $ErrMessage = _DeduplicateLatestReport($RefReportName);
+  push @ErrMessages, $ErrMessage if (defined $ErrMessage);
+
   return \@ErrMessages;
 }
 
-- 
2.20.1




More information about the wine-devel mailing list