Make error reporting a bit better.

- Split out TOSA in the CI. - Add summary of unexpected test outcomes. This works better when there are many XFAIL'ing tests, as it only prints out the error_str on FAIL, not on XFAIL. Example here: https://gist.github.com/silvasean/c7886ec7b3d35c21563cb09f7c3407da
2021-10-28 18:25:53 +00:00 · 2021-10-28 18:25:53 +00:00 · c46d48f9f5
parent b02b65cf6e
commit c46d48f9f5
3 changed files with 44 additions and 25 deletions
--- a/.github/workflows/buildAndTest.yml
+++ b/.github/workflows/buildAndTest.yml
@ -58,11 +58,15 @@ jobs:
          -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
          -DLLVM_TARGETS_TO_BUILD=host
        ninja check-torch-mlir-all
-    - name: RefBackend integration tests
+    - name: RefBackend - TorchScript end-to-end tests
      run: |
        cd $GITHUB_WORKSPACE
        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=refbackend -v
+    - name: TOSA backend - TorchScript end-to-end tests
+      run: |
+        cd $GITHUB_WORKSPACE
+        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=tosa -v

    # TODO: Only build packages in full Release mode.
--- a/python/test/torchscript_e2e_test/error_reports.py
+++ b/python/test/torchscript_e2e_test/error_reports.py
@ -14,6 +14,7 @@ from torch_mlir_e2e_test.torchscript.reporting import report_results
 from torch_mlir_e2e_test.torchscript.registry import register_test_case, GLOBAL_TEST_REGISTRY
 from torch_mlir_e2e_test.torchscript.configs import TorchScriptTestConfig

+# CHECK: Unexpected outcome summary:
 # CHECK: FAIL - "ErroneousModule_basic"


--- a/python/torch_mlir_e2e_test/torchscript/reporting.py
+++ b/python/torch_mlir_e2e_test/torchscript/reporting.py
@ -275,42 +275,56 @@ def report_results(results: List[TestResult],
    Returns True if the run resulted in any unexpected pass/fail behavior.
    Otherwise False.
    """
-    summary = collections.Counter()
+    results_by_outcome = collections.defaultdict(list)
    for result in results:
        report = SingleTestReport(result, ErrorContext.empty())
        expected_failure = result.unique_name in expected_failures
        if expected_failure:
            if report.failed:
-                error_str = ''
-                if verbose:
-                    error_str = '\n' + textwrap.indent(report.error_str(),
-                                                       '    ')
-                print(f'XFAIL - "{result.unique_name}"' + error_str)
-                summary['XFAIL'] += 1
+                print(f'XFAIL - "{result.unique_name}"')
+                results_by_outcome['XFAIL'].append(result)
            else:
                print(f'XPASS - "{result.unique_name}"')
-                summary['XPASS'] += 1
+                results_by_outcome['XPASS'].append(result)
        else:
            if not report.failed:
                print(f'PASS - "{result.unique_name}"')
-                summary['PASS'] += 1
+                results_by_outcome['PASS'].append(result)
            else:
-                error_str = ''
-                if verbose:
-                    error_str = '\n' + textwrap.indent(report.error_str(),
-                                                       '    ')
-                print(f'FAIL - "{result.unique_name}"' + error_str)
-                summary['FAIL'] += 1
+                print(f'FAIL - "{result.unique_name}"')
+                results_by_outcome['FAIL'].append(result)
+
+    OUTCOME_MEANINGS = collections.OrderedDict()
+    OUTCOME_MEANINGS['PASS'] = 'Passed'
+    OUTCOME_MEANINGS['FAIL'] = 'Failed'
+    OUTCOME_MEANINGS['XFAIL'] = 'Expectedly Failed'
+    OUTCOME_MEANINGS['XPASS'] = 'Unexpectedly Passed'
+
+    had_unexpected_results = len(results_by_outcome['FAIL']) != 0 or len(
+        results_by_outcome['XPASS']) != 0
+
+    if had_unexpected_results:
+        print('\nUnexpected outcome summary:')
+
+    # For FAIL and XPASS (unexpected outcomes), print a summary.
+    for outcome, results in results_by_outcome.items():
+        # PASS and XFAIL are "good"/"successful" outcomes.
+        if outcome == 'PASS' or outcome == 'XFAIL':
+            continue
+        # If there is nothing to report, be quiet.
+        if len(results) == 0:
+            continue
+        print(f'\n****** {OUTCOME_MEANINGS[outcome]} tests - {len(results)} tests')
+        for result in results:
+            print(f'    {outcome} - "{result.unique_name}"')
+            # If the test failed, print the error message.
+            if outcome == 'FAIL' and verbose:
+                print(textwrap.indent(report.error_str(), ' ' * 8))

    # Print a summary for easy scanning.
    print('\nSummary:')
-    KEY_MEANINGS = {
-        'PASS': 'Passed',
-        'FAIL': 'Failed',
-        'XFAIL': 'Expectedly Failed',
-        'XPASS': 'Unexpectedly Passed',
-    }
+
    for key in ['PASS', 'FAIL', 'XFAIL', 'XPASS']:
-        if summary[key]:
-            print(f'    {KEY_MEANINGS[key]}: {summary[key]}')
-    return summary['FAIL'] != 0 or summary['XPASS'] != 0
+        if results_by_outcome[key]:
+            print(f'    {OUTCOME_MEANINGS[key]}: {len(results_by_outcome[key])}')
+    return had_unexpected_results