Make error reporting a bit better.

- Split out TOSA in the CI.
- Add summary of unexpected test outcomes. This works better when there
  are many XFAIL'ing tests, as it only prints out the error_str on
  FAIL, not on XFAIL. Example here:
  https://gist.github.com/silvasean/c7886ec7b3d35c21563cb09f7c3407da
pull/391/head snapshot-20211028.50
Sean Silva 2021-10-28 18:25:53 +00:00
parent b02b65cf6e
commit c46d48f9f5
3 changed files with 44 additions and 25 deletions

View File

@ -58,11 +58,15 @@ jobs:
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \ -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
-DLLVM_TARGETS_TO_BUILD=host -DLLVM_TARGETS_TO_BUILD=host
ninja check-torch-mlir-all ninja check-torch-mlir-all
- name: RefBackend integration tests - name: RefBackend - TorchScript end-to-end tests
run: | run: |
cd $GITHUB_WORKSPACE cd $GITHUB_WORKSPACE
export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir" export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
python -m e2e_testing.torchscript.main --config=refbackend -v python -m e2e_testing.torchscript.main --config=refbackend -v
- name: TOSA backend - TorchScript end-to-end tests
run: |
cd $GITHUB_WORKSPACE
export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
python -m e2e_testing.torchscript.main --config=tosa -v python -m e2e_testing.torchscript.main --config=tosa -v
# TODO: Only build packages in full Release mode. # TODO: Only build packages in full Release mode.

View File

@ -14,6 +14,7 @@ from torch_mlir_e2e_test.torchscript.reporting import report_results
from torch_mlir_e2e_test.torchscript.registry import register_test_case, GLOBAL_TEST_REGISTRY from torch_mlir_e2e_test.torchscript.registry import register_test_case, GLOBAL_TEST_REGISTRY
from torch_mlir_e2e_test.torchscript.configs import TorchScriptTestConfig from torch_mlir_e2e_test.torchscript.configs import TorchScriptTestConfig
# CHECK: Unexpected outcome summary:
# CHECK: FAIL - "ErroneousModule_basic" # CHECK: FAIL - "ErroneousModule_basic"

View File

@ -275,42 +275,56 @@ def report_results(results: List[TestResult],
Returns True if the run resulted in any unexpected pass/fail behavior. Returns True if the run resulted in any unexpected pass/fail behavior.
Otherwise False. Otherwise False.
""" """
summary = collections.Counter() results_by_outcome = collections.defaultdict(list)
for result in results: for result in results:
report = SingleTestReport(result, ErrorContext.empty()) report = SingleTestReport(result, ErrorContext.empty())
expected_failure = result.unique_name in expected_failures expected_failure = result.unique_name in expected_failures
if expected_failure: if expected_failure:
if report.failed: if report.failed:
error_str = '' print(f'XFAIL - "{result.unique_name}"')
if verbose: results_by_outcome['XFAIL'].append(result)
error_str = '\n' + textwrap.indent(report.error_str(),
' ')
print(f'XFAIL - "{result.unique_name}"' + error_str)
summary['XFAIL'] += 1
else: else:
print(f'XPASS - "{result.unique_name}"') print(f'XPASS - "{result.unique_name}"')
summary['XPASS'] += 1 results_by_outcome['XPASS'].append(result)
else: else:
if not report.failed: if not report.failed:
print(f'PASS - "{result.unique_name}"') print(f'PASS - "{result.unique_name}"')
summary['PASS'] += 1 results_by_outcome['PASS'].append(result)
else: else:
error_str = '' print(f'FAIL - "{result.unique_name}"')
if verbose: results_by_outcome['FAIL'].append(result)
error_str = '\n' + textwrap.indent(report.error_str(),
' ') OUTCOME_MEANINGS = collections.OrderedDict()
print(f'FAIL - "{result.unique_name}"' + error_str) OUTCOME_MEANINGS['PASS'] = 'Passed'
summary['FAIL'] += 1 OUTCOME_MEANINGS['FAIL'] = 'Failed'
OUTCOME_MEANINGS['XFAIL'] = 'Expectedly Failed'
OUTCOME_MEANINGS['XPASS'] = 'Unexpectedly Passed'
had_unexpected_results = len(results_by_outcome['FAIL']) != 0 or len(
results_by_outcome['XPASS']) != 0
if had_unexpected_results:
print('\nUnexpected outcome summary:')
# For FAIL and XPASS (unexpected outcomes), print a summary.
for outcome, results in results_by_outcome.items():
# PASS and XFAIL are "good"/"successful" outcomes.
if outcome == 'PASS' or outcome == 'XFAIL':
continue
# If there is nothing to report, be quiet.
if len(results) == 0:
continue
print(f'\n****** {OUTCOME_MEANINGS[outcome]} tests - {len(results)} tests')
for result in results:
print(f' {outcome} - "{result.unique_name}"')
# If the test failed, print the error message.
if outcome == 'FAIL' and verbose:
print(textwrap.indent(report.error_str(), ' ' * 8))
# Print a summary for easy scanning. # Print a summary for easy scanning.
print('\nSummary:') print('\nSummary:')
KEY_MEANINGS = {
'PASS': 'Passed',
'FAIL': 'Failed',
'XFAIL': 'Expectedly Failed',
'XPASS': 'Unexpectedly Passed',
}
for key in ['PASS', 'FAIL', 'XFAIL', 'XPASS']: for key in ['PASS', 'FAIL', 'XFAIL', 'XPASS']:
if summary[key]: if results_by_outcome[key]:
print(f' {KEY_MEANINGS[key]}: {summary[key]}') print(f' {OUTCOME_MEANINGS[key]}: {len(results_by_outcome[key])}')
return summary['FAIL'] != 0 or summary['XPASS'] != 0 return had_unexpected_results