RFC: Allow marking unreliable tests as flaky.

Wed Jun 1 08:26:52 CDT 2022

Wine has many unreliable tests which makes it hard to know whether a 
patch causes new failures. So one proposal was to mark unreliable tests 
as flaky, and have a mode where flaky failures can ignored by setting 
$WINETEST_ALLOW_FLAKY.

So one would be able to apply a patch, run 'WINETEST_ALLOW_FLAKY=1 make 
test' and if that fails that means the patch introduces a new failure.

The way it works is that when a flaky test fails the message is 'Flaky 
failed...' instead of 'Test failed...'. I also added the count of flaky 
failures to the summary line. So the flaky failures are not totally 
swept under the rug. The main difference is that if 
$WINETEST_ALLOW_FLAKY is set, the exit code is 0 if the only failures 
are flaky ones.


That still leaves some open questions though:

* Should the message be 'Test failed' instead of 'Flaky failed' when 
  $WINETEST_ALLOW_FLAKY is not set? I opted for the latter because it 
  adds information that the failed test has been marked as flaky.

* Should tests that fail systematically on some platforms but not others 
  (for instance specific Windows versions, specific locales, specific 
  GPUs) be marked as flaky? Or should flaky be reserved for tests that 
  fail randomly?

* What about failures where the message is different every time so 
  that the failure always looks new? Should these not be marked as flaky 
  since it only impacts the TestBot?

* On a related note the patch below has flaky_windows and flaky_wine 
  macros. Is that too fine grained? Maybe we don't want to run the risk 
  of marking a test as flaky_windows only to discover later that it can 
  also fail in Wine? But conversely, wouldn't it be valuable to know 
  that a test is only flaky in Wine and not in Windows?

  Note: flaky_windows is a bit long but I was worried that flaky_win 
        would be too similar to flaky_wine.

* I decided having flaky_{windows,wine}_if() macros would be overkill.

* How does one know a flaky directive can be removed?

* Is it allowed to submit a patch that contains new flaky directives?

* Also the flaky directive cannot deal with tests that randomly crash, 
  prematurely exit, or time out. These issues are rarer so maybe we can 
  hope they can all be fixed?

* How does this impact bug 48912, aka "Allow blacklisting unreliable 
  and always new failures"?
  https://bugs.winehq.org/show_bug.cgi?id=48912

* test.winehq.org and the TestBot will need to be updated to highlight
  the new messages and, more importantly, to recognize the new summary 
  line.


The patch is below and I bundled it with tweaks to a few tests for 
illustration purposes:
d3d8:device - Failure happens on Windows (rarely) and Wine.
dinput:mouse - Failure happens on Windows 7 and Wine.
mmdevapi:render - Timing issue.
schedsvc:atsvcapi - Wine-only failure.
---
 dlls/d3d8/tests/device.c       |  2 +-
 dlls/dinput/tests/mouse.c      |  2 +-
 dlls/mmdevapi/tests/render.c   |  2 +-
 dlls/schedsvc/tests/atsvcapi.c |  2 +-
 include/wine/test.h            | 78 +++++++++++++++++++++++++++++-----
 5 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/dlls/d3d8/tests/device.c b/dlls/d3d8/tests/device.c
index cb1e83b64b3..f0e8c27fafd 100644
--- a/dlls/d3d8/tests/device.c
+++ b/dlls/d3d8/tests/device.c
@@ -3356,7 +3356,7 @@ static void test_wndproc(void)
 
     expect_messages = sc_maximize_messages;
     SendMessageA(focus_window, WM_SYSCOMMAND, SC_MAXIMIZE, 0);
-    ok(!expect_messages->message, "Expected message %#x for window %#x, but didn't receive it.\n",
+    flaky ok(!expect_messages->message, "Expected message %#x for window %#x, but didn't receive it.\n",
             expect_messages->message, expect_messages->window);
     expect_messages = NULL;
     flush_events();
diff --git a/dlls/dinput/tests/mouse.c b/dlls/dinput/tests/mouse.c
index 8be808e5da6..dcb5453a825 100644
--- a/dlls/dinput/tests/mouse.c
+++ b/dlls/dinput/tests/mouse.c
@@ -156,7 +156,7 @@ static void test_acquire(IDirectInputA *pDI, HWND hwnd)
 
     SetActiveWindow( hwnd );
     hr = IDirectInputDevice_Acquire(pMouse);
-    ok(hr == S_OK, "Acquire() failed: %#lx\n", hr);
+    flaky ok(hr == S_OK, "Acquire() failed: %#lx\n", hr);
 
     mouse_event(MOUSEEVENTF_MOVE, 10, 10, 0, 0);
     cnt = 1;
diff --git a/dlls/mmdevapi/tests/render.c b/dlls/mmdevapi/tests/render.c
index 4e19b91c5f5..882be89b7ff 100644
--- a/dlls/mmdevapi/tests/render.c
+++ b/dlls/mmdevapi/tests/render.c
@@ -1098,7 +1098,7 @@ static void test_clock(int share)
     ok(pos == 0, "GetPosition returned non-zero pos before being started\n");
 
     hr = IAudioClient_Start(ac); /* #1 */
-    ok(hr == S_OK, "Start failed: %08lx\n", hr);
+    flaky ok(hr == S_OK, "Start failed: %08lx\n", hr);
 
     Sleep(100);
     slept += 100;
diff --git a/dlls/schedsvc/tests/atsvcapi.c b/dlls/schedsvc/tests/atsvcapi.c
index 3aaad8b5cfe..83774804b40 100644
--- a/dlls/schedsvc/tests/atsvcapi.c
+++ b/dlls/schedsvc/tests/atsvcapi.c
@@ -168,7 +168,7 @@ START_TEST(atsvcapi)
 
 skip_tests_delete:
     ret = NetrJobDel(server_name, jobid, jobid);
-    ok(ret == ERROR_SUCCESS, "NetrJobDel error %lu\n", ret);
+    flaky_wine ok(ret == ERROR_SUCCESS, "NetrJobDel error %lu\n", ret);
 
 skip_tests:
     SetUnhandledExceptionFilter(old_exception_filter);
diff --git a/include/wine/test.h b/include/wine/test.h
index 01ba81f4857..422ba3dce38 100644
--- a/include/wine/test.h
+++ b/include/wine/test.h
@@ -50,6 +50,9 @@ extern int winetest_time;
 /* running in interactive mode? */
 extern int winetest_interactive;
 
+/* always count flaky tests as successful (BOOL) */
+extern int winetest_allow_flaky;
+
 /* report successful tests (BOOL) */
 extern int winetest_report_success;
 
@@ -117,6 +120,13 @@ extern void winetest_pop_context(void);
 #define trace    trace_(__FILE__, __LINE__)
 #define wait_child_process wait_child_process_(__FILE__, __LINE__)
 
+#define flaky_if(is_flaky) for (winetest_start_flaky(is_flaky); \
+                                winetest_loop_flaky(); \
+                                winetest_end_flaky())
+#define flaky         flaky_if(1)
+#define flaky_windows flaky_if(!strcmp(winetest_platform, "windows"))
+#define flaky_wine    flaky_if(!strcmp(winetest_platform, "wine"))
+
 #define todo_if(is_todo) for (winetest_start_todo(is_todo); \
                               winetest_loop_todo(); \
                               winetest_end_todo())
@@ -200,6 +210,9 @@ int winetest_interactive = 0;
 /* current platform */
 const char *winetest_platform = "windows";
 
+/* always count flaky tests as successful (BOOL) */
+int winetest_allow_flaky;
+
 /* report successful tests (BOOL) */
 int winetest_report_success = 0;
 
@@ -214,6 +227,7 @@ static const struct test *current_test; /* test currently being run */
 
 static LONG successes;       /* number of successful tests */
 static LONG failures;        /* number of failures */
+static LONG flaky_failures;  /* number of failures inside flaky block */
 static LONG skipped;         /* number of skipped test chunks */
 static LONG todo_successes;  /* number of successful tests inside todo block */
 static LONG todo_failures;   /* number of failures inside todo block */
@@ -229,6 +243,8 @@ struct tls_data
 {
     const char* current_file;        /* file of current check */
     int current_line;                /* line of current check */
+    unsigned int flaky_level;        /* current flaky nesting level */
+    int flaky_do_loop;
     unsigned int todo_level;         /* current todo nesting level */
     int todo_do_loop;
     char *str_pos;                   /* position in debug buffer */
@@ -355,9 +371,18 @@ int winetest_vok( int condition, const char *msg, va_list args )
     {
         if (condition)
         {
-            winetest_print_context( "Test succeeded inside todo block: " );
-            vprintf(msg, args);
-            InterlockedIncrement(&todo_failures);
+            if (data->flaky_level)
+            {
+                winetest_print_context( "Flaky succeeded inside todo block: " );
+                vprintf(msg, args);
+                InterlockedIncrement(&flaky_failures);
+            }
+            else
+            {
+                winetest_print_context( "Test succeeded inside todo block: " );
+                vprintf(msg, args);
+                InterlockedIncrement(&todo_failures);
+            }
             return 0;
         }
         else
@@ -381,9 +406,18 @@ int winetest_vok( int condition, const char *msg, va_list args )
     {
         if (!condition)
         {
-            winetest_print_context( "Test failed: " );
-            vprintf(msg, args);
-            InterlockedIncrement(&failures);
+            if (data->flaky_level)
+            {
+                winetest_print_context( "Flaky failed: " );
+                vprintf(msg, args);
+                InterlockedIncrement(&flaky_failures);
+            }
+            else
+            {
+                winetest_print_context( "Test failed: " );
+                vprintf(msg, args);
+                InterlockedIncrement(&failures);
+            }
             return 0;
         }
         else
@@ -456,6 +490,27 @@ void winetest_win_skip( const char *msg, ... )
     va_end(valist);
 }
 
+void winetest_start_flaky( int is_flaky )
+{
+    struct tls_data *data = get_tls_data();
+    data->flaky_level = (data->flaky_level << 1) | (is_flaky != 0);
+    data->flaky_do_loop=1;
+}
+
+int winetest_loop_flaky(void)
+{
+    struct tls_data *data = get_tls_data();
+    int do_flaky=data->flaky_do_loop;
+    data->flaky_do_loop=0;
+    return do_flaky;
+}
+
+void winetest_end_flaky(void)
+{
+    struct tls_data *data = get_tls_data();
+    data->flaky_level >>= 1;
+}
+
 void winetest_start_todo( int is_todo )
 {
     struct tls_data *data = get_tls_data();
@@ -601,14 +656,16 @@ static int run_test( const char *name )
             printf( "%04x:%s:%s Silenced %d todos, %d skips and %d traces.\n",
                     (UINT)GetCurrentProcessId(), test->name, winetest_elapsed(),
                     (UINT)muted_todo_successes, (UINT)muted_skipped, (UINT)muted_traces);
-        printf( "%04x:%s:%s %d tests executed (%d marked as todo, %d %s), %d skipped.\n",
+        printf( "%04x:%s:%s %d tests executed (%d marked as todo, %d as flaky, %d %s), %d skipped.\n",
                 (UINT)GetCurrentProcessId(), test->name, winetest_elapsed(),
-                (UINT)(successes + failures + todo_successes + todo_failures),
-                (UINT)todo_successes, (UINT)(failures + todo_failures),
+                (UINT)(successes + failures + flaky_failures + todo_successes + todo_failures),
+                (UINT)todo_successes, (UINT)flaky_failures, (UINT)(failures + todo_failures),
                 (failures + todo_failures != 1) ? "failures" : "failure",
                 (UINT)skipped );
     }
-    status = (failures + todo_failures < 255) ? failures + todo_failures : 255;
+    status = failures + todo_failures;
+    if (!winetest_allow_flaky) status += flaky_failures;
+    if (status > 255) status = 255;
     return status;
 }
 
@@ -665,6 +722,7 @@ int main( int argc, char **argv )
 
     if (GetEnvironmentVariableA( "WINETEST_DEBUG", p, sizeof(p) )) winetest_debug = atoi(p);
     if (GetEnvironmentVariableA( "WINETEST_INTERACTIVE", p, sizeof(p) )) winetest_interactive = atoi(p);
+    if (GetEnvironmentVariableA( "WINETEST_ALLOW_FLAKY", p, sizeof(p) )) winetest_allow_flaky = atoi(p);
     if (GetEnvironmentVariableA( "WINETEST_REPORT_SUCCESS", p, sizeof(p) )) winetest_report_success = atoi(p);
     if (GetEnvironmentVariableA( "WINETEST_TIME", p, sizeof(p) )) winetest_time = atoi(p);
     winetest_last_time = winetest_start_time = GetTickCount();
-- 
2.30.2