Make error reporting a bit better.

- Split out TOSA in the CI. - Add summary of unexpected test outcomes. This works better when there are many XFAIL'ing tests, as it only prints out the error_str on FAIL, not on XFAIL. Example here: https://gist.github.com/silvasean/c7886ec7b3d35c21563cb09f7c3407da
2021-10-28 18:25:53 +00:00 · 2021-10-28 18:25:53 +00:00 · c46d48f9f5
parent b02b65cf6e
commit c46d48f9f5
3 changed files with 44 additions and 25 deletions
--- a/.github/workflows/buildAndTest.yml
+++ b/.github/workflows/buildAndTest.yml
@ -58,11 +58,15 @@ jobs:
          -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
          -DLLVM_TARGETS_TO_BUILD=host
        ninja check-torch-mlir-all
-    - name: RefBackend integration tests
+    - name: RefBackend - TorchScript end-to-end tests
      run: |
        cd $GITHUB_WORKSPACE
        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=refbackend -v
    - name: TOSA backend - TorchScript end-to-end tests
      run: |
        cd $GITHUB_WORKSPACE
        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=tosa -v
    # TODO: Only build packages in full Release mode.
--- a/python/test/torchscript_e2e_test/error_reports.py
+++ b/python/test/torchscript_e2e_test/error_reports.py
@ -14,6 +14,7 @@ from torch_mlir_e2e_test.torchscript.reporting import report_results
 from torch_mlir_e2e_test.torchscript.registry import register_test_case, GLOBAL_TEST_REGISTRY
 from torch_mlir_e2e_test.torchscript.configs import TorchScriptTestConfig
 # CHECK: Unexpected outcome summary:
 # CHECK: FAIL - "ErroneousModule_basic"
--- a/python/torch_mlir_e2e_test/torchscript/reporting.py
+++ b/python/torch_mlir_e2e_test/torchscript/reporting.py
@ -275,42 +275,56 @@ def report_results(results: List[TestResult],
    Returns True if the run resulted in any unexpected pass/fail behavior.
    Otherwise False.
    """
-    summary = collections.Counter()
+    results_by_outcome = collections.defaultdict(list)
    for result in results:
        report = SingleTestReport(result, ErrorContext.empty())
        expected_failure = result.unique_name in expected_failures
        if expected_failure:
            if report.failed:
-                error_str = ''
+                print(f'XFAIL - "{result.unique_name}"')
-                if verbose:
+                results_by_outcome['XFAIL'].append(result)
                    error_str = '\n' + textwrap.indent(report.error_str(),
                                                       '    ')
                print(f'XFAIL - "{result.unique_name}"' + error_str)
                summary['XFAIL'] += 1
            else:
                print(f'XPASS - "{result.unique_name}"')
-                summary['XPASS'] += 1
+                results_by_outcome['XPASS'].append(result)
        else:
            if not report.failed:
                print(f'PASS - "{result.unique_name}"')
-                summary['PASS'] += 1
+                results_by_outcome['PASS'].append(result)
            else:
-                error_str = ''
+                print(f'FAIL - "{result.unique_name}"')
-                if verbose:
+                results_by_outcome['FAIL'].append(result)
-                    error_str = '\n' + textwrap.indent(report.error_str(),
+
-                                                       '    ')
+    OUTCOME_MEANINGS = collections.OrderedDict()
-                print(f'FAIL - "{result.unique_name}"' + error_str)
+    OUTCOME_MEANINGS['PASS'] = 'Passed'
-                summary['FAIL'] += 1
+    OUTCOME_MEANINGS['FAIL'] = 'Failed'
    OUTCOME_MEANINGS['XFAIL'] = 'Expectedly Failed'
    OUTCOME_MEANINGS['XPASS'] = 'Unexpectedly Passed'
    had_unexpected_results = len(results_by_outcome['FAIL']) != 0 or len(
        results_by_outcome['XPASS']) != 0
    if had_unexpected_results:
        print('\nUnexpected outcome summary:')
    # For FAIL and XPASS (unexpected outcomes), print a summary.
    for outcome, results in results_by_outcome.items():
        # PASS and XFAIL are "good"/"successful" outcomes.
        if outcome == 'PASS' or outcome == 'XFAIL':
            continue
        # If there is nothing to report, be quiet.
        if len(results) == 0:
            continue
        print(f'\n****** {OUTCOME_MEANINGS[outcome]} tests - {len(results)} tests')
        for result in results:
            print(f'    {outcome} - "{result.unique_name}"')
            # If the test failed, print the error message.
            if outcome == 'FAIL' and verbose:
                print(textwrap.indent(report.error_str(), ' ' * 8))
    # Print a summary for easy scanning.
    print('\nSummary:')
-    KEY_MEANINGS = {
+
        'PASS': 'Passed',
        'FAIL': 'Failed',
        'XFAIL': 'Expectedly Failed',
        'XPASS': 'Unexpectedly Passed',
    }
    for key in ['PASS', 'FAIL', 'XFAIL', 'XPASS']:
-        if summary[key]:
+        if results_by_outcome[key]:
-            print(f'    {KEY_MEANINGS[key]}: {summary[key]}')
+            print(f'    {OUTCOME_MEANINGS[key]}: {len(results_by_outcome[key])}')
-    return summary['FAIL'] != 0 or summary['XPASS'] != 0
+    return had_unexpected_results