Francois Gouget : winetest/build-patterns: Detect fixed failures at the report level.

Fri Apr 30 15:44:59 CDT 2021

Module: tools
Branch: master
Commit: 3928647cdad708cac2549e39e486eb2fdce5a116
URL:    https://source.winehq.org/git/tools.git/?a=commit;h=3928647cdad708cac2549e39e486eb2fdce5a116

Author: Francois Gouget <fgouget at codeweavers.com>
Date:   Fri Apr 30 14:56:16 2021 +0200

winetest/build-patterns: Detect fixed failures at the report level.

If a test configuration had a high failure rate and no longer has any
failure, it is not necessary to wait for $patternbuilds builds to
consider the failure fixed.
Then if the failures are fixed in all test configurations, the test unit
can be moved to the old/fixed failures list.

Signed-off-by: Francois Gouget <fgouget at codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard at winehq.org>

---

 winetest/build-patterns | 81 +++++++++++++++++++++++++++++++++++++++++++++++--
 winetest/winetest.conf  |  3 ++
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/winetest/build-patterns b/winetest/build-patterns
index 32bae6b..6a14fc3 100755
--- a/winetest/build-patterns
+++ b/winetest/build-patterns
@@ -31,7 +31,7 @@ sub BEGIN
     }
     unshift @INC, $1 if ($0 =~ m=^(/.*)/[^/]+$=);
 }
-use vars qw/$workdir $gitdir $patternbuilds/;
+use vars qw/$workdir $gitdir $patternbuilds $fixed_threshold/;
 require "winetest.conf";
 
 my $name0=$0;
@@ -275,6 +275,20 @@ my %reports;
 #     The @sortedbuilds index of the most recent build with a failure.
 #     -1 if there is none.
 #
+#   - failures
+#     The number of builds that had a failure between the first and last failure
+#     builds.
+#
+#   - failruns
+#     The number of test results between the first and last failure builds.
+#     Note that if there are builds for which WineTest was not run this will be
+#     different from last-first+1.
+#
+#   - fixedruns
+#     The number of successful test results following last. Note that if
+#     WineTest was not run (yet?) for some builds this will be different from
+#     @sortedbuilds-last.
+#
 #   - status
 #     A hashtable of test results indexed by the build name.
 my %tests;
@@ -389,6 +403,34 @@ sub fail_type($)
            "random";
 }
 
+sub get_fix_probability($$$)
+{
+    my ($failures, $failruns, $fixedruns) = @_;
+
+    # We want a lower bound on the failure rate so we get a lower bound on the
+    # probability that a failure is fixed.
+    # So ideally we would compute the lowest failure rate that has a
+    # probability greater than some arbitrary value of giving us $failures in
+    # $failruns runs.
+    # - For instance a failure rate of 100% would obviously give us 2 failures
+    #   out of 2 runs. But there is also a 49% chance for a 70% failure rate to
+    #   have the same result. So the lowest failure rate that has more than
+    #   an arbitrary 50% chance of matching this result is ~71%.
+    # - But that's hard to compute in the general case.
+    # - So instead just convert one failure to success and compute the ratio.
+    # - This gives a 0% failure rate for the 1/1 case which is fine because
+    #   this case really does not have not enough data to derive a failure rate.
+    # - For the 2/2 case this gives 50% instead of a more likely value like 71%.
+    #   But that's still good enough for our purpose.
+    # - The results continue on the low side up to at least $failruns=10 and
+    #   then the difference is pretty small anyway.
+    my $failrate = ($failures - 1) / $failruns;
+
+    # Then compute the probability of getting $fixedruns successes in a row.
+    # The complement is the probability that the failure has been fixed.
+    return 1 - (1 - $failrate) ** $fixedruns;
+}
+
 foreach my $testname (keys %tests)
 {
     my $test = $tests{$testname};
@@ -404,6 +446,11 @@ foreach my $testname (keys %tests)
         $testreport->{last} = -1;
         # - Type of failure: random or not (missing dll, etc.)
         $testreport->{failtype} = "";
+        # - Statistics to compute the failure rate
+        $testreport->{failures} = 0;
+        $testreport->{failruns} = 0;
+        # - And the number of successful runs after the last failure
+        $testreport->{fixedruns} = 0;
 
         for my $i (0.. at sortedbuilds-1)
         {
@@ -415,6 +462,7 @@ foreach my $testname (keys %tests)
                     $build->{hastest}->{$testname})
                 {
                     $testreport->{failtype} ||= 0; # success
+                    $testreport->{fixedruns}++;
                 }
                 # else WineTest was not run for this build
                 next;
@@ -437,12 +485,39 @@ foreach my $testname (keys %tests)
                 $testreport->{first} = $i;
                 $testreport->{last} = $i;
                 $testreport->{failtype} = $failtype;
+                $testreport->{failures} = 1;
+                # Assume the bug was introduced with the first failure and thus
+                # ignore successful runs that preceded it.
+                $testreport->{failruns} = 1;
+                $testreport->{fixedruns} = 0;
             }
             else
             {
                 $testreport->{last} = $i;
+                $testreport->{failures}++;
+                $testreport->{failruns} += $testreport->{fixedruns} + 1;
+                $testreport->{fixedruns} = 0;
             }
         }
+        next if (!$testreport->{failed});
+        next if (!$testreport->{fixedruns});
+
+        if ($testreport->{failtype} eq "random")
+        {
+            # - failruns counts the number of runs from the first to the last
+            #   failure.
+            # - Both failruns and fixedruns account for the builds where the
+            #   test was not run.
+            # - So for instance '...eeeF._F.._' gives failures=2 (not 5),
+            #   failruns=3 (not 4, 9 or 10), and fixedruns=2 (not 3).
+            $testreport->{fixed} = get_fix_probability($testreport->{failures}, $testreport->{failruns}, $testreport->{fixedruns});
+        }
+        else
+        {
+            # Since this failure is not random, even a single success means
+            # it is fixed.
+            $testreport->{fixed} = 1;
+        }
     }
 }
 
@@ -710,11 +785,13 @@ EOF
 
         my $first = @sortedbuilds;
         my $last = -1;
+        my $fixed = 1;
         foreach my $reportdir (keys %$pagereports)
         {
             my $testreport = $test->{testreports}->{$reportdir};
             next if (!$testreport->{failed});
 
+            $fixed = 0 if (($testreport->{fixed} || 0) < $fixed_threshold);
             $first = $testreport->{first} if ($testreport->{first} < $first);
             # For non-random failures we only care about the transition to
             # the failure state, which is recorded in 'first'.
@@ -732,7 +809,7 @@ EOF
         }
         next if ($last == -1); # no report had a pattern of interest
 
-        my $listid = ($last < @sortedbuilds - $patternbuilds) ? "old" :
+        my $listid = ($fixed or $last < @sortedbuilds - $patternbuilds) ? "old" :
                      ($first > $patternbuilds) ? "recent" :
                      "regular";
         push @{$lists{$listid}->{testnames}}, $testname;
diff --git a/winetest/winetest.conf b/winetest/winetest.conf
index f193f8e..1f587b3 100644
--- a/winetest/winetest.conf
+++ b/winetest/winetest.conf
@@ -27,4 +27,7 @@ $maxfilesize = 1.5 * 1024 * 1024;
 # The number of builds after which a failure is considered old / new
 $patternbuilds = 10;
 
+# Probability above which a failure is considered to be fixed (0..1)
+$fixed_threshold = 0.99;
+
 1;                              # keep require happy