Merge matrix runs to fail fast globally (#1216)

My earlier[ PR](https://github.com/llvm/torch-mlir/pull/1213) had (among other things) decoupled ubuntu and macos builds into separate matrix runs. This is not working well due to limited number of MacOS GHA VMs causing long queue times and backlog. There are two reasons causing this backlog: 1. macos arm64 builds with pytorch source are getting erratically cancelled due to resource / network constraints. This is addressed with this: https://github.com/llvm/torch-mlir/pull/1215 > "macos-arm64 (in-tree, OFF) The hosted runner: GitHub Actions 3 lost communication with the server. Anything in your workflow that terminates the runner process, starves it for CPU/Memory, or blocks its network access can cause this error." 2. macos runs don't fail-fast when ubuntu runs fail due to being in separate matrix setups. This PR couples them again.
2022-08-12 11:30:09 -07:00 · 2022-08-12 11:30:09 -07:00 · aed0ec3a2c
parent b8bd0a46cc
commit aed0ec3a2c
1 changed files with 55 additions and 70 deletions
--- a/.github/workflows/buildAndTest.yml
+++ b/.github/workflows/buildAndTest.yml
@ -14,12 +14,11 @@ on:
 jobs:
-  ubuntu-build:
+  build-test:
    name: ubuntu-x86_64
    runs-on: ubuntu-22.04
    strategy:
      fail-fast: true
      matrix:
        os-arch: [ubuntu-x86_64, macos-arm64]
        llvm-build: [in-tree, out-of-tree]
        torch-binary: [ON, OFF]
        exclude:
@ -29,6 +28,16 @@ jobs:
          # Exclude llvm out-of-tree and pytorch binary
          - llvm-build: out-of-tree
            torch-binary: ON
          # Exclude macos-arm64 and llvm out-of-tree altogether
          - os-arch: macos-arm64
            llvm-build: out-of-tree
        include:
          # Specify OS versions
          - os-arch: ubuntu-x86_64
            os: ubuntu-22.04
          - os-arch: macos-arm64
            os: macos-12
    runs-on: ${{ matrix.os }}
    steps:
    - name: Checkout torch-mlir
@ -39,11 +48,11 @@ jobs:
    - name: Setup ccache
      uses: ./.github/actions/setup-build
      with:
-        cache-suffix: ubuntu-x86_64-${{ matrix.llvm-build }}-${{ matrix.torch-binary }}
+        cache-suffix: ${{ matrix.os-arch }}-${{ matrix.llvm-build }}-${{ matrix.torch-binary }}
-    - name: Configure llvm-build='in-tree' torch-binary='${{ matrix.torch-binary }}'
+    - name: Configure os-arch='ubuntu-x86_64' llvm-build='in-tree' torch-binary='${{ matrix.torch-binary }}'
      # Fastest build, most used dev flow
-      if: matrix.llvm-build == 'in-tree'
+      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'in-tree' }}
      run: |
        cmake -GNinja -Bbuild \
          -DCMAKE_BUILD_TYPE=Release \
@ -64,10 +73,9 @@ jobs:
          -DPython3_EXECUTABLE="$(which python)" \
          $GITHUB_WORKSPACE/externals/llvm-project/llvm
-    - name: Configure llvm-build='out-of-tree' torch-binary='${{ matrix.torch-binary }}'
+    - name: Configure os-arch='ubuntu-x86_64' llvm-build='out-of-tree' torch-binary='${{ matrix.torch-binary }}'
      # Most elaborate build, but cached
-      # A cache invalidation occurs when the committed LLVM version is changed.
+      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'out-of-tree' }}
      if: matrix.llvm-build == 'out-of-tree'
      run: |
        cmake -GNinja -Bllvm-build \
          -DCMAKE_BUILD_TYPE=Release \
@ -100,67 +108,9 @@ jobs:
          -DPython3_EXECUTABLE="$(which python)" \
          $GITHUB_WORKSPACE
-    - name: Build torch-mlir
+    - name: Configure os-arch='macos-arm64' llvm-build='in-tree' torch-binary='${{ matrix.torch-binary }}'
-      run: |
+      # cross compile, can't test arm64
-        cmake --build build
+      if: ${{ matrix.os-arch == 'macos-arm64' && matrix.llvm-build == 'in-tree' }}
    - name: Run torch-mlir unit tests
      run: |
        cmake --build build --target check-torch-mlir-all
    - name: Run refbackend e2e integration tests
      if: matrix.llvm-build == 'in-tree'
      run: |
        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=refbackend -v
    - name: Run eager_mode e2e integration tests
      if: matrix.llvm-build == 'in-tree'
      run: |
        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=eager_mode -v
    - name: Run tosa e2e integration tests
      if: matrix.llvm-build == 'in-tree'
      run: |
        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=tosa -v
    - name: Run lazy_tensor_core e2e integration tests
      if: matrix.llvm-build == 'in-tree'
      run: |
        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=lazy_tensor_core -v
  macos-build:
    name: macos-arm64
    runs-on: macos-12
    strategy:
      fail-fast: true
      matrix:
        llvm-build: [in-tree, out-of-tree]
        torch-binary: [ON, OFF]
        exclude:
          # Exclude llvm in-tree and pytorch source
          - llvm-build: in-tree
            torch-binary: OFF
          # Exclude llvm out-of-tree altogether
          - llvm-build: out-of-tree
    steps:
    - name: Checkout torch-mlir
      uses: actions/checkout@v2
      with:
        submodules: 'true'
    - name: Setup ccache
      uses: ./.github/actions/setup-build
      with:
        cache-suffix: macos-arm64-${{ matrix.llvm-build }}-${{ matrix.torch-binary }}
    - name: Configure llvm-build='in-tree' torch-binary='${{ matrix.torch-binary }}'
      # libzstd on GH Runners are only x86_64 to remove them.
      run: |
        cmake -GNinja -Bbuild_arm64 \
          -DCMAKE_BUILD_TYPE=Release \
@ -186,6 +136,41 @@ jobs:
          -DPython3_EXECUTABLE="$(which python)" \
          $GITHUB_WORKSPACE/externals/llvm-project/llvm
    - name: Build torch-mlir
      if: ${{ matrix.os-arch == 'ubuntu-x86_64' }}
      run: |
        cmake --build build
    - name: Build torch-mlir (cross-compile)
      if: ${{ matrix.os-arch == 'macos-arm64' }}
      run: |
        cmake --build build_arm64
    - name: Run torch-mlir unit tests
      if: ${{ matrix.os-arch == 'ubuntu-x86_64' }}
      run: |
        cmake --build build --target check-torch-mlir-all
    - name: Run refbackend e2e integration tests
      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'in-tree' }}
      run: |
        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=refbackend -v
    - name: Run eager_mode e2e integration tests
      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'in-tree' }}
      run: |
        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=eager_mode -v
    - name: Run tosa e2e integration tests
      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'in-tree' }}
      run: |
        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=tosa -v
    - name: Run lazy_tensor_core e2e integration tests
      if: ${{ matrix.os-arch == 'ubuntu-x86_64' && matrix.llvm-build == 'in-tree' }}
      run: |
        export PYTHONPATH="$GITHUB_WORKSPACE/build/tools/torch-mlir/python_packages/torch_mlir"
        python -m e2e_testing.torchscript.main --config=lazy_tensor_core -v