Add Stable PyTorch CI Pipeline (#2038)

* feat: split pytorch requirements into stable and nightly * fix: add true to tests to see full output * refactor: add comments to explain true statement * feat: move some tests to experimental mode * refactor: refactor pipeline into more fine grained difference * feat: add version differentiation for some tests * feat: activate more configs * refactor: change implementation to use less requirement files * refactor: remove contraints used for testing * fix: revert some requirement file names * refactor: remove unnecessary ninja install * fix: fix version parsing * refactor: remove dependency on torchvision in main requirements file * refactor: remove index url * style: remove unnecesary line switch * fix: readd index url
2023-05-30 21:16:24 +02:00 · 2023-05-30 21:16:24 +02:00 · db3f2e3fde
parent 959f4f48d5
commit db3f2e3fde
7 changed files with 69 additions and 19 deletions
--- a/.github/actions/setup-build/action.yml
+++ b/.github/actions/setup-build/action.yml
@ -13,6 +13,12 @@ inputs:
      but the content is irrelevant.
    required: false
    default: ''
+  torch-version:
+    description: |
+      Additional string to determine wether to test against a stable
+      torch release or against the nightly build
+    required: false
+    default: 'nightly'

 runs:
  using: "composite"
--- a/.github/workflows/buildAndTest.yml
+++ b/.github/workflows/buildAndTest.yml
@ -28,6 +28,7 @@ jobs:
        os-arch: [ubuntu-x86_64, macos-arm64, windows-x86_64]
        llvm-build: [in-tree, out-of-tree]
        torch-binary: [ON, OFF]
+        torch-version: [nightly, stable]
        exclude:
          # Exclude llvm in-tree and pytorch source
          - llvm-build: in-tree
@ -38,8 +39,12 @@ jobs:
          # Exclude macos-arm64 and llvm out-of-tree altogether
          - os-arch: macos-arm64
            llvm-build: out-of-tree
+          - os-arch: macos-arm64
+            torch-version: stable
          - os-arch: windows-x86_64
            llvm-build: out-of-tree
+          - os-arch: windows-x86_64
+            torch-version: stable
        include:
          # Specify OS versions
          - os-arch: ubuntu-x86_64
@ -74,7 +79,8 @@ jobs:
    - name: Setup ccache
      uses: ./.github/actions/setup-build
      with:
-        cache-suffix: 'build-${{ matrix.llvm-build }}'
+        cache-suffix: 'build-${{ matrix.llvm-build }}-${{ matrix.torch-version }}'
+        torch-version: ${{ matrix.torch-version }}

    - name: Set up Visual Studio shell
      if: ${{ matrix.os-arch == 'windows-x86_64' }}
@ -98,6 +104,7 @@ jobs:
        TM_PACKAGES="${{ matrix.llvm-build }}" \
        TM_USE_PYTORCH_BINARY="${{ matrix.torch-binary }}" \
        TM_PYTORCH_INSTALL_WITHOUT_REBUILD="${{ steps.cache-pytorch.outputs.cache-hit }}" \
+        TM_TORCH_VERSION="${{ matrix.torch-version }}" \
        ./build_tools/python_deploy/build_linux_packages.sh

    - name: Configure os-arch='macos-arm64' llvm-build='in-tree' torch-binary='${{ matrix.torch-binary }}'
--- a/build_tools/python_deploy/build_linux_packages.sh
+++ b/build_tools/python_deploy/build_linux_packages.sh
@ -192,7 +192,7 @@ function run_in_docker() {
            popd
          fi
          if [ "${TM_SKIP_TESTS}" == "OFF" ]; then
-            test_in_tree;
+            test_in_tree "$TM_TORCH_VERSION";
          fi
          ;;
        *)
@ -268,17 +268,41 @@ function _check_file_not_changed_by() {
 }

 function test_in_tree() {
+  local torch_version="$1"
+  
  echo ":::: Test in-tree"
  cmake --build /main_checkout/torch-mlir/build --target check-torch-mlir-all

  cd /main_checkout/torch-mlir/
  export PYTHONPATH="/main_checkout/torch-mlir/build/tools/torch-mlir/python_packages/torch_mlir"

-  echo ":::: Check that update_abstract_interp_lib.sh has been run"
-  _check_file_not_changed_by ./build_tools/update_abstract_interp_lib.sh lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp
+  case $torch_version in
+    nightly)
+      echo ":::: Test with nightly torch"

-  echo ":::: Check that update_torch_ods.sh has been run"
-  _check_file_not_changed_by ./build_tools/update_torch_ods.sh include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
+      echo ":::: Check that update_abstract_interp_lib.sh has been run"
+      _check_file_not_changed_by ./build_tools/update_abstract_interp_lib.sh lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp
+
+      echo ":::: Check that update_torch_ods.sh has been run"
+      _check_file_not_changed_by ./build_tools/update_torch_ods.sh include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
+
+      echo ":::: Run Lazy Tensor Core e2e integration tests"
+      python -m e2e_testing.main --config=lazy_tensor_core -v
+      ;;
+    stable)
+      echo ":::: Test with stable torch"
+
+      echo ":::: Run Lazy Tensor Core e2e integration tests in experimental mode"
+      python -m e2e_testing.main --config=lazy_tensor_core -v --ignore_failures
+      ;;
+    *)
+      echo "Unrecognized torch version '$torch_version'"
+      exit 1
+      ;;
+    esac
+  
+  echo ":::: Run TorchDynamo e2e integration tests"
+  python -m e2e_testing.main --config=torchdynamo -v

  echo ":::: Run Linalg e2e integration tests"
  python -m e2e_testing.main --config=linalg -v
@ -288,12 +312,6 @@ function test_in_tree() {

  echo ":::: Run TOSA e2e integration tests"
  python -m e2e_testing.main --config=tosa -v
-
-  echo ":::: Run Lazy Tensor Core e2e integration tests"
-  python -m e2e_testing.main --config=lazy_tensor_core -v
-
-  echo ":::: Run TorchDynamo e2e integration tests"
-  python -m e2e_testing.main --config=torchdynamo -v
 }

 function setup_venv() {
@ -306,16 +324,16 @@ function setup_venv() {

  echo ":::: pip installing dependencies"
  python3 -m pip install --no-cache-dir -r /main_checkout/torch-mlir/externals/llvm-project/mlir/python/requirements.txt
-  python3 -m pip install --no-cache-dir -r /main_checkout/torch-mlir/requirements.txt

  case $torch_version in
    nightly)
      echo ":::: Using nightly dependencies"
      python3 -m pip install --no-cache-dir -r /main_checkout/torch-mlir/requirements.txt
+      python3 -m pip install --no-cache-dir -r /main_checkout/torch-mlir/torchvision-requirements.txt
      ;;
    stable)
      echo ":::: Using stable dependencies"
-      python3 -m pip install --no-cache-dir torch torchvision
+      python3 -m pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu
      python3 -m pip install --no-cache-dir -r /main_checkout/torch-mlir/build-requirements.txt
      python3 -m pip install --no-cache-dir -r /main_checkout/torch-mlir/test-requirements.txt
      ;;
--- a/e2e_testing/main.py
+++ b/e2e_testing/main.py
@ -72,6 +72,10 @@ which make it easier to attach a debugger or get a stack trace.""")
    parser.add_argument("--crashing_tests_to_not_attempt_to_run_and_a_bug_is_filed",
                        metavar="TEST", type=str, nargs="+",
                        help="A set of tests to not attempt to run, since they crash and cannot be XFAILed.")
+    parser.add_argument("--ignore_failures", 
+                        default=False,
+                        action="store_true",
+                        help="return exit code 0 even if the test fails to unblock pipeline")
    return parser

 def main():
@ -137,6 +141,8 @@ def main():

    # Report the test results.
    failed = report_results(results, xfail_set, args.verbose)
+    if args.ignore_failures:
+        sys.exit(0)
    sys.exit(1 if failed else 0)

 def _suppress_warnings():
--- a/python/torch_mlir/dynamo.py
+++ b/python/torch_mlir/dynamo.py
@ -4,6 +4,7 @@
 # Also available under a BSD-style license. See LICENSE.

 from typing import List
+from packaging import version

 import torch
 from torch._functorch.compile_utils import strip_overloads
@ -35,7 +36,7 @@ def _get_decomposition_table():
    the new decomposition infra and PrimTorch.
    """
    aten = torch.ops.aten
-    return get_decompositions([
+    decomp_list = [
        aten._adaptive_avg_pool2d,
        aten.std.correction,
        aten.dot,
@ -62,9 +63,12 @@ def _get_decomposition_table():
        aten.native_group_norm_backward,
        aten.sigmoid_backward,
        aten._native_batch_norm_legit,
-        aten._native_batch_norm_legit_no_training,
        aten.squeeze,
-    ])
+    ]
+    # TODO: enable test once 2.1.0 is stable
+    if version.parse(torch.__version__) > version.parse("2.0.1+cpu"):
+        decomp_list += [aten._native_batch_norm_legit_no_training]
+    return get_decompositions(decomp_list)


 def _adjust_calling_convention(gm: torch.fx.GraphModule) -> bool:
--- a/python/torch_mlir_e2e_test/test_suite/init.py
+++ b/python/torch_mlir_e2e_test/test_suite/init.py
@ -13,6 +13,17 @@ COMMON_TORCH_MLIR_LOWERING_XFAILS = {
    "ReduceMaxAlongDimUnsignedInt_basic",
 }

+# TODO: Delete once torch 2.1.0 is released
+# check for torch version and disable tests
+TORCH_2_1_REQUIRED = {
+    "ScaledDotProductAttentionDifferentModule_basic",
+    "ScaledDotProductAttentionSameModule_basic"
+}
+import torch
+from packaging import version
+if not version.parse(torch.__version__) > version.parse("2.0.1+cpu"):
+    COMMON_TORCH_MLIR_LOWERING_XFAILS.update(TORCH_2_1_REQUIRED)
+
 def register_all_tests():
    """Registers all the built-in E2E tests that Torch-MLIR provides."""
    # Side-effecting import statements.
--- a/test-requirements.txt
+++ b/test-requirements.txt
@ -1,5 +1,3 @@
-r torchvision-requirements.txt
-
 pillow
 dill
 multiprocess