Merge matrix runs to fail fast globally (#1216)

My earlier[ PR](https://github.com/llvm/torch-mlir/pull/1213) had (among other things) decoupled ubuntu and macos builds into separate matrix runs. This is not working well due to limited number of MacOS GHA VMs causing long queue times and backlog. There are two reasons causing this backlog: 1. macos arm64 builds with pytorch source are getting erratically cancelled due to resource / network constraints. This is addressed with this: https://github.com/llvm/torch-mlir/pull/1215 > "macos-arm64 (in-tree, OFF) The hosted runner: GitHub Actions 3 lost communication with the server. Anything in your workflow that terminates the runner process, starves it for CPU/Memory, or blocks its network access can cause this error." 2. macos runs don't fail-fast when ubuntu runs fail due to being in separate matrix setups. This PR couples them again.
2022-08-12 11:30:09 -07:00 · 2022-08-12 11:30:09 -07:00 · aed0ec3a2c
parent b8bd0a46cc
commit aed0ec3a2c
1 changed files with 55 additions and 70 deletions
--- a/.github/workflows/buildAndTest.yml
+++ b/.github/workflows/buildAndTest.yml
@ -14,12 +14,11 @@ on:


 jobs:
-  ubuntu-build:
-    name: ubuntu-x86_64
-    runs-on: ubuntu-22.04
+  build-test:
    strategy:
      fail-fast: true
      matrix:
+        os-arch: [ubuntu-x86_64, macos-arm64]
        llvm-build: [in-tree, out-of-tree]
        torch-binary: [ON, OFF]
        exclude:
@ -29,6 +28,16 @@ jobs:
          # Exclude llvm out-of-tree and pytorch binary
          - llvm-build: out-of-tree
            torch-binary: ON
+          # Exclude macos-arm64 and llvm out-of-tree altogether
+          - os-arch: macos-arm64
+            llvm-build: out-of-tree
+        include:
+          # Specify OS versions
+          - os-arch: ubuntu-x86_64
+            os: ubuntu-22.04
+          - os-arch: macos-arm64
+            os: macos-12
+    runs-on: ${{ matrix.os }}

    steps:
    - name: Checkout torch-mlir
@ -39,11 +48,11 @@ jobs:
    - name: Setup ccache
      uses: ./.github/actions/setup-build
      with:
-        cache-suffix: ubuntu-x86_64-${{ matrix.llvm-build }}-${{ matrix.torch-binary }}
+        cache-suffix: ${{ matrix.os-arch }}-${{ matrix.llvm-build }}-${{ matrix.torch-binary }}

-    - name: Configure llvm-build='in-tree' torch-binary='${{ matrix.torch-binary }}'
+    - name: Configure os-arch='ubuntu-x86_64' llvm-build='in-tree' torch-binary='${{ matrix.torch-binary }}'
      # Fastest build, most used dev flow
-      if: matrix.llvm-build == 'in-tree'
+      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'in-tree' }}
      run: |
        cmake -GNinja -Bbuild \
          -DCMAKE_BUILD_TYPE=Release \
@ -64,10 +73,9 @@ jobs:
          -DPython3_EXECUTABLE="$(which python)" \
          $GITHUB_WORKSPACE/externals/llvm-project/llvm

-    - name: Configure llvm-build='out-of-tree' torch-binary='${{ matrix.torch-binary }}'
+    - name: Configure os-arch='ubuntu-x86_64' llvm-build='out-of-tree' torch-binary='${{ matrix.torch-binary }}'
      # Most elaborate build, but cached
-      # A cache invalidation occurs when the committed LLVM version is changed.
-      if: matrix.llvm-build == 'out-of-tree'
+      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'out-of-tree' }}
      run: |
        cmake -GNinja -Bllvm-build \
          -DCMAKE_BUILD_TYPE=Release \
@ -100,67 +108,9 @@ jobs:
          -DPython3_EXECUTABLE="$(which python)" \
          $GITHUB_WORKSPACE

-    - name: Build torch-mlir
-      run: |
-        cmake --build build
-    
-    - name: Run torch-mlir unit tests
-      run: |
-        cmake --build build --target check-torch-mlir-all
-
-    - name: Run refbackend e2e integration tests
-      if: matrix.llvm-build == 'in-tree'
-      run: |
-        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
-        python -m e2e_testing.torchscript.main --config=refbackend -v
-
-    - name: Run eager_mode e2e integration tests
-      if: matrix.llvm-build == 'in-tree'
-      run: |
-        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
-        python -m e2e_testing.torchscript.main --config=eager_mode -v
-
-    - name: Run tosa e2e integration tests
-      if: matrix.llvm-build == 'in-tree'
-      run: |
-        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
-        python -m e2e_testing.torchscript.main --config=tosa -v
-
-    - name: Run lazy_tensor_core e2e integration tests
-      if: matrix.llvm-build == 'in-tree'
-      run: |
-        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
-        python -m e2e_testing.torchscript.main --config=lazy_tensor_core -v
-
-
-  macos-build:
-    name: macos-arm64
-    runs-on: macos-12
-    strategy:
-      fail-fast: true
-      matrix:
-        llvm-build: [in-tree, out-of-tree]
-        torch-binary: [ON, OFF]
-        exclude:
-          # Exclude llvm in-tree and pytorch source
-          - llvm-build: in-tree
-            torch-binary: OFF
-          # Exclude llvm out-of-tree altogether
-          - llvm-build: out-of-tree
-
-    steps:
-    - name: Checkout torch-mlir
-      uses: actions/checkout@v2
-      with:
-        submodules: 'true'
-
-    - name: Setup ccache
-      uses: ./.github/actions/setup-build
-      with:
-        cache-suffix: macos-arm64-${{ matrix.llvm-build }}-${{ matrix.torch-binary }}
-
-    - name: Configure llvm-build='in-tree' torch-binary='${{ matrix.torch-binary }}'
-      # libzstd on GH Runners are only x86_64 to remove them.
+    - name: Configure os-arch='macos-arm64' llvm-build='in-tree' torch-binary='${{ matrix.torch-binary }}'
+      # cross compile, can't test arm64
+      if: ${{ matrix.os-arch == 'macos-arm64' && matrix.llvm-build == 'in-tree' }}
      run: |
        cmake -GNinja -Bbuild_arm64 \
          -DCMAKE_BUILD_TYPE=Release \
@ -186,6 +136,41 @@ jobs:
          -DPython3_EXECUTABLE="$(which python)" \
          $GITHUB_WORKSPACE/externals/llvm-project/llvm

+    - name: Build torch-mlir
+      if: ${{ matrix.os-arch == 'ubuntu-x86_64' }}
+      run: |
+        cmake --build build
+
    - name: Build torch-mlir (cross-compile)
+      if: ${{ matrix.os-arch == 'macos-arm64' }}
      run: |
        cmake --build build_arm64
+
+    - name: Run torch-mlir unit tests
+      if: ${{ matrix.os-arch == 'ubuntu-x86_64' }}
+      run: |
+        cmake --build build --target check-torch-mlir-all
+
+    - name: Run refbackend e2e integration tests
+      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'in-tree' }}
+      run: |
+        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
+        python -m e2e_testing.torchscript.main --config=refbackend -v
+
+    - name: Run eager_mode e2e integration tests
+      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'in-tree' }}
+      run: |
+        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
+        python -m e2e_testing.torchscript.main --config=eager_mode -v
+
+    - name: Run tosa e2e integration tests
+      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'in-tree' }}
+      run: |
+        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
+        python -m e2e_testing.torchscript.main --config=tosa -v
+
+    - name: Run lazy_tensor_core e2e integration tests
+      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'in-tree' }}
+      run: |
+        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
+        python -m e2e_testing.torchscript.main --config=lazy_tensor_core -v