[torch-mlir] bump to llvm/llvm-project@9b78ddf3b2 (#3491)

This bump triggered an upstream assert. Includes a WAR for #3506. Also includes several things I needed to do to repro: * When TORCH_MLIR_TEST_CONCURRENCY=1, test runs will be printed. * Added TORCH_MLIR_TEST_VERBOSE=1 handling to enable verbose mode (useful on CI). --------- Co-authored-by: Stella Laurenzo <stellaraccident@gmail.com>
2024-06-27 19:28:02 -07:00 · 2024-06-27 19:28:02 -07:00 · 1f73895f93
parent 6d0ca499e6
commit 1f73895f93
7 changed files with 46 additions and 10 deletions
--- a/docs/development.md
+++ b/docs/development.md
@ -429,6 +429,20 @@ cd projects/pt1
 python -m e2e_testing.main -f 'AtenEmbeddingBag'
 ```

+The default mode of running tests uses the multi-processing framework and is
+not tolerant of certain types of errors. If encountering native crashes/hangs,
+enable debug variables to run sequentially/in-process with more verbosity:
+
+```
+export TORCH_MLIR_TEST_CONCURRENCY=1
+export TORCH_MLIR_TEST_VERBOSE=1
+```
+
+In this way, you can run under `gdb`, etc and get useful results. Having env
+vars like this makes it easy to set in GH action files, etc. Note that the
+verbose flags are very verbose. Basic sequential progress reports will be
+printed regardless when not running in parallel.
+
 ## Running unit tests.

 To run all of the unit tests, run:
--- a/externals/llvm-project
+++ b/externals/llvm-project
@ -1 +1 @@
-Subproject commit 5207632f8698a2fab0c4cdcdf2f7ad9aaf96e06f
+Subproject commit 9b78ddf3b2abfb3e2063e3dad2a326f5eabc1618
--- a/lib/Dialect/TMTensor/IR/TMTensorOps.cpp
+++ b/lib/Dialect/TMTensor/IR/TMTensorOps.cpp
@ -46,16 +46,17 @@ using namespace mlir::torch::TMTensor;
 static void getEffectsImpl(
    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
        &effects,
-    ValueRange results, ValueRange inputBuffers, ValueRange outputBuffers) {
-  for (Value value : results) {
+    ResultRange results, ArrayRef<OpOperand *> inputBuffers,
+    ArrayRef<OpOperand *> outputBuffers) {
+  for (OpResult value : results) {
    effects.emplace_back(MemoryEffects::Allocate::get(), value,
                         SideEffects::DefaultResource::get());
  }
-  for (Value value : inputBuffers) {
+  for (OpOperand *value : inputBuffers) {
    effects.emplace_back(MemoryEffects::Read::get(), value,
                         SideEffects::DefaultResource::get());
  }
-  for (Value value : outputBuffers) {
+  for (OpOperand *value : outputBuffers) {
    effects.emplace_back(MemoryEffects::Read::get(), value,
                         SideEffects::DefaultResource::get());
    effects.emplace_back(MemoryEffects::Write::get(), value,
@ -1121,8 +1122,8 @@ bool TopkOp::payloadUsesValueFromOperand(OpOperand *opOperand) {
  void OP_NAME::getEffects(                                                    \
      SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>      \
          &effects) {                                                          \
-    SmallVector<Value> inputBuffers = getInputBufferOperands();                \
-    SmallVector<Value> outputBuffers = getOutputBufferOperands();              \
+    OpOperandVector inputBuffers = getInputBufferOperands();                   \
+    OpOperandVector outputBuffers = getOutputBufferOperands();                 \
    getEffectsImpl(effects, getOperation()->getResults(), inputBuffers,        \
                   outputBuffers);                                             \
  }
--- a/lib/Dialect/Torch/IR/TorchOps.cpp
+++ b/lib/Dialect/Torch/IR/TorchOps.cpp
@ -2810,7 +2810,8 @@ LogicalResult CopyToNonValueTensorOp::inferReturnTypes(
 void CopyToNonValueTensorOp::getEffects(
    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
        &effects) {
-  effects.emplace_back(MemoryEffects::Allocate::get(), getResult());
+  effects.emplace_back(MemoryEffects::Allocate::get(),
+                       getOperation()->getOpResult(0));
 }

 //===----------------------------------------------------------------------===//
@ -2837,7 +2838,8 @@ LogicalResult CopyToValueTensorOp::inferReturnTypes(
 void CopyToValueTensorOp::getEffects(
    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
        &effects) {
-  effects.emplace_back(MemoryEffects::Read::get(), getOperand());
+  effects.emplace_back(MemoryEffects::Read::get(),
+                       &getOperation()->getOpOperand(0));
 }

 //===----------------------------------------------------------------------===//
--- a/projects/pt1/e2e_testing/main.py
+++ b/projects/pt1/e2e_testing/main.py
@ -7,6 +7,10 @@ import argparse
 import re
 import sys

+import torch
+
+torch.device("cpu")
+
 from torch_mlir_e2e_test.framework import run_tests
 from torch_mlir_e2e_test.reporting import report_results
 from torch_mlir_e2e_test.registry import GLOBAL_TEST_REGISTRY
--- a/projects/pt1/python/torch_mlir_e2e_test/framework.py
+++ b/projects/pt1/python/torch_mlir_e2e_test/framework.py
@ -358,6 +358,15 @@ def run_tests(
    if env_concurrency > 0:
        num_processes = min(num_processes, env_concurrency)

+    try:
+        env_verbose = os.getenv("TORCH_MLIR_TEST_VERBOSE", "0")
+        if env_verbose is not None:
+            verbose = bool(int(env_verbose))
+    except ValueError as e:
+        raise ValueError(
+            "Bad value for TORCH_MLIR_TEST_VERBOSE env var: " "Expected integer."
+        ) from e
+
    # TODO: We've noticed that on certain 2 core machine parallelizing the tests
    # makes the llvm backend legacy pass manager 20x slower than using a
    # single process. Need to investigate the root cause eventually. This is a
@ -375,7 +384,10 @@ def run_tests(
    # seems to cause a cascade of failures resulting in undecipherable error
    # messages.
    if num_processes == 1 or sequential:
-        return [compile_and_run_test(test, config, verbose) for test in tests]
+        print("Running tests sequentially with progress status")
+        for test in tests:
+            print(f"*** RUNNING TEST: {test.unique_name} ***")
+            compile_and_run_test(test, config, verbose)

    # This is needed because autograd does not support crossing process
    # boundaries.
--- a/python/torch_mlir/compiler_utils.py
+++ b/python/torch_mlir/compiler_utils.py
@ -40,6 +40,9 @@ def run_pipeline_with_repro_report(
        )
        # Lower module in place to make it ready for compiler backends.
        with module.context as ctx:
+            # TODO(#3506): Passes can emit errors but not signal failure,
+            # which causes a native assert.
+            ctx.emit_error_diagnostics = True
            pm = PassManager.parse(pipeline)
            if enable_ir_printing:
                ctx.enable_multithreading(False)