From c46d48f9f57a16797773f87bf0aa7a1973165b8c Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Thu, 28 Oct 2021 18:25:53 +0000 Subject: [PATCH] Make error reporting a bit better. - Split out TOSA in the CI. - Add summary of unexpected test outcomes. This works better when there are many XFAIL'ing tests, as it only prints out the error_str on FAIL, not on XFAIL. Example here: https://gist.github.com/silvasean/c7886ec7b3d35c21563cb09f7c3407da --- .github/workflows/buildAndTest.yml | 6 +- .../torchscript_e2e_test/error_reports.py | 1 + .../torchscript/reporting.py | 62 ++++++++++++------- 3 files changed, 44 insertions(+), 25 deletions(-) diff --git a/.github/workflows/buildAndTest.yml b/.github/workflows/buildAndTest.yml index e8233e94f..615e36a4c 100644 --- a/.github/workflows/buildAndTest.yml +++ b/.github/workflows/buildAndTest.yml @@ -58,11 +58,15 @@ jobs: -DMLIR_ENABLE_BINDINGS_PYTHON=ON \ -DLLVM_TARGETS_TO_BUILD=host ninja check-torch-mlir-all - - name: RefBackend integration tests + - name: RefBackend - TorchScript end-to-end tests run: | cd $GITHUB_WORKSPACE export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir" python -m e2e_testing.torchscript.main --config=refbackend -v + - name: TOSA backend - TorchScript end-to-end tests + run: | + cd $GITHUB_WORKSPACE + export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir" python -m e2e_testing.torchscript.main --config=tosa -v # TODO: Only build packages in full Release mode. diff --git a/python/test/torchscript_e2e_test/error_reports.py b/python/test/torchscript_e2e_test/error_reports.py index b0fc5a392..686522644 100644 --- a/python/test/torchscript_e2e_test/error_reports.py +++ b/python/test/torchscript_e2e_test/error_reports.py @@ -14,6 +14,7 @@ from torch_mlir_e2e_test.torchscript.reporting import report_results from torch_mlir_e2e_test.torchscript.registry import register_test_case, GLOBAL_TEST_REGISTRY from torch_mlir_e2e_test.torchscript.configs import TorchScriptTestConfig +# CHECK: Unexpected outcome summary: # CHECK: FAIL - "ErroneousModule_basic" diff --git a/python/torch_mlir_e2e_test/torchscript/reporting.py b/python/torch_mlir_e2e_test/torchscript/reporting.py index 45f381375..75ec3e781 100644 --- a/python/torch_mlir_e2e_test/torchscript/reporting.py +++ b/python/torch_mlir_e2e_test/torchscript/reporting.py @@ -275,42 +275,56 @@ def report_results(results: List[TestResult], Returns True if the run resulted in any unexpected pass/fail behavior. Otherwise False. """ - summary = collections.Counter() + results_by_outcome = collections.defaultdict(list) for result in results: report = SingleTestReport(result, ErrorContext.empty()) expected_failure = result.unique_name in expected_failures if expected_failure: if report.failed: - error_str = '' - if verbose: - error_str = '\n' + textwrap.indent(report.error_str(), - ' ') - print(f'XFAIL - "{result.unique_name}"' + error_str) - summary['XFAIL'] += 1 + print(f'XFAIL - "{result.unique_name}"') + results_by_outcome['XFAIL'].append(result) else: print(f'XPASS - "{result.unique_name}"') - summary['XPASS'] += 1 + results_by_outcome['XPASS'].append(result) else: if not report.failed: print(f'PASS - "{result.unique_name}"') - summary['PASS'] += 1 + results_by_outcome['PASS'].append(result) else: - error_str = '' - if verbose: - error_str = '\n' + textwrap.indent(report.error_str(), - ' ') - print(f'FAIL - "{result.unique_name}"' + error_str) - summary['FAIL'] += 1 + print(f'FAIL - "{result.unique_name}"') + results_by_outcome['FAIL'].append(result) + + OUTCOME_MEANINGS = collections.OrderedDict() + OUTCOME_MEANINGS['PASS'] = 'Passed' + OUTCOME_MEANINGS['FAIL'] = 'Failed' + OUTCOME_MEANINGS['XFAIL'] = 'Expectedly Failed' + OUTCOME_MEANINGS['XPASS'] = 'Unexpectedly Passed' + + had_unexpected_results = len(results_by_outcome['FAIL']) != 0 or len( + results_by_outcome['XPASS']) != 0 + + if had_unexpected_results: + print('\nUnexpected outcome summary:') + + # For FAIL and XPASS (unexpected outcomes), print a summary. + for outcome, results in results_by_outcome.items(): + # PASS and XFAIL are "good"/"successful" outcomes. + if outcome == 'PASS' or outcome == 'XFAIL': + continue + # If there is nothing to report, be quiet. + if len(results) == 0: + continue + print(f'\n****** {OUTCOME_MEANINGS[outcome]} tests - {len(results)} tests') + for result in results: + print(f' {outcome} - "{result.unique_name}"') + # If the test failed, print the error message. + if outcome == 'FAIL' and verbose: + print(textwrap.indent(report.error_str(), ' ' * 8)) # Print a summary for easy scanning. print('\nSummary:') - KEY_MEANINGS = { - 'PASS': 'Passed', - 'FAIL': 'Failed', - 'XFAIL': 'Expectedly Failed', - 'XPASS': 'Unexpectedly Passed', - } + for key in ['PASS', 'FAIL', 'XFAIL', 'XPASS']: - if summary[key]: - print(f' {KEY_MEANINGS[key]}: {summary[key]}') - return summary['FAIL'] != 0 or summary['XPASS'] != 0 + if results_by_outcome[key]: + print(f' {OUTCOME_MEANINGS[key]}: {len(results_by_outcome[key])}') + return had_unexpected_results