Fix symint ops and blacklist `lift_fresh_copy` (#1373)

* Add symint to native functions yaml * Re-enable LTC * Fix new_empty_strided and narrow_copy
2022-09-20 10:16:04 -04:00 · 2022-09-20 10:16:04 -04:00 · 8967463980
parent 4f3cd236dd
commit 8967463980
5 changed files with 49 additions and 13 deletions
--- a/build_tools/autogen_ltc_backend.py
+++ b/build_tools/autogen_ltc_backend.py
@ -264,6 +264,9 @@ class GenTorchMlirLTC:
        # Additional ops to support that are not supported by Torch-MLIR explicitly
        supported |= set(config.get("additional_ops", []))

+        # List of ops that will take in symints for its size
+        symint = set(config.get("symint", []))
+
        self.ops = sorted(ops)

        with self.source_yaml.open("w") as f:
@ -272,6 +275,7 @@ class GenTorchMlirLTC:
                "cpp_namespace": "torch::lazy",
                "full_codegen": self.ops,
                "supported": sorted(supported),
+                "symint": sorted(symint),
                "non_native": non_native,
            }
            yaml.dump(source_yaml, f, default_flow_style=False)
--- a/build_tools/autogen_ltc_backend.yaml
+++ b/build_tools/autogen_ltc_backend.yaml
@ -16,6 +16,7 @@ blacklist:
 - copy_

 # Disabled for consistency with TS backend
+- lift_fresh_copy
 - new_empty
 - rsub
 - slice.Tensor  # Disabled in favour of slice_copy.Tensor
@ -60,6 +61,7 @@ supported:
 # but their implementations call view operators (which we need to functionalize away).
 - block_diag
 - new_empty_strided
+- narrow_copy
 - pixel_shuffle
 - pixel_unshuffle
 - select_backward
@ -69,6 +71,16 @@ supported:
 - linalg_pinv.atol_rtol_tensor
 - logsumexp.out

+# List of ops that will take in symints for the size instead of ints
+symint:
+- empty.memory_format
+- new_empty_strided
+- expand
+- expand_copy
+- narrow_copy
+- view
+- view_copy
+

 additional_ops:
 # Additional ops to support that are not supported by Torch-MLIR explicitly
--- a/build_tools/python_deploy/build_linux_packages.sh
+++ b/build_tools/python_deploy/build_linux_packages.sh
@ -177,7 +177,7 @@ function build_in_tree() {
      -DLLVM_EXTERNAL_TORCH_MLIR_DIALECTS_SOURCE_DIR="/main_checkout/torch-mlir/externals/llvm-external-projects/torch-mlir-dialects" \
      -DLLVM_TARGETS_TO_BUILD=host \
      -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
-      -DTORCH_MLIR_ENABLE_LTC=OFF \
+      -DTORCH_MLIR_ENABLE_LTC=ON \
      -DTORCH_MLIR_USE_INSTALLED_PYTORCH="$torch_from_src" \
      -DPython3_EXECUTABLE="$(which python3)" \
      /main_checkout/torch-mlir/externals/llvm-project/llvm
@ -289,7 +289,7 @@ function build_out_of_tree() {
      -DLLVM_DIR="/main_checkout/torch-mlir/llvm-build/lib/cmake/llvm/" \
      -DMLIR_DIR="/main_checkout/torch-mlir/llvm-build/lib/cmake/mlir/" \
      -DMLIR_ENABLE_BINDINGS_PYTHON=OFF \
-      -DTORCH_MLIR_ENABLE_LTC=OFF \
+      -DTORCH_MLIR_ENABLE_LTC=ON \
      -DTORCH_MLIR_USE_INSTALLED_PYTORCH="$torch_from_src" \
      -DPython3_EXECUTABLE="$(which python3)" \
      /main_checkout/torch-mlir
--- a/python/torch_mlir/csrc/base_lazy_backend/mlir_native_functions.cpp
+++ b/python/torch_mlir/csrc/base_lazy_backend/mlir_native_functions.cpp
@ -301,9 +301,11 @@ at::Tensor LazyNativeFunctions::_to_copy(
  }
 };

-at::Tensor LazyNativeFunctions::empty(
-    at::SymIntArrayRef sym_size, c10::optional<at::ScalarType> dtype,
-    c10::optional<at::Layout> layout, c10::optional<at::Device> device,
+at::Tensor LazyNativeFunctions::empty_symint(
+    at::SymIntArrayRef sym_size,
+    c10::optional<at::ScalarType> dtype,
+    c10::optional<at::Layout> layout,
+    c10::optional<at::Device> device,
    c10::optional<bool> pin_memory,
    c10::optional<at::MemoryFormat> memory_format) {
  // TODO: support this directly
@ -333,8 +335,8 @@ at::Tensor LazyNativeFunctions::empty_strided(
    c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout,
    c10::optional<at::Device> device, c10::optional<bool> pin_memory) {
  TORCH_LAZY_FN_COUNTER("lazy::");
-  at::Tensor t = empty(
-    c10::SymIntArrayRef::fromIntArrayRef(size),
+  at::Tensor t = empty_symint(
+    c10::fromIntArrayRef(size),
    dtype, layout, device, pin_memory, c10::nullopt);
  return t.as_strided(size, stride, /*storage_offset=*/0);
 }
@ -354,7 +356,7 @@ LazyNativeFunctions::fill_(at::Tensor& self, const at::Scalar& value) {
 at::Tensor LazyNativeFunctions::_unsafe_view(
    const at::Tensor& self, at::IntArrayRef size) {
  TORCH_LAZY_FN_COUNTER("lazy::");
-  return LazyNativeFunctions::view_copy(self, c10::SymIntArrayRef::fromIntArrayRef(size));
+  return LazyNativeFunctions::view_copy_symint(self, c10::fromIntArrayRef(size));
 }

 // This is needed by the torch.tensor constructor.
@ -380,15 +382,27 @@ at::Tensor LazyNativeFunctions::block_diag(at::TensorList tensors) {
  return at::functionalization::functionalize_aten_op<ATEN_OP(
      block_diag)>::call(tensors);
 }
-at::Tensor LazyNativeFunctions::new_empty_strided(
-    const at::Tensor& self, at::IntArrayRef size, at::IntArrayRef stride,
-    c10::optional<at::ScalarType> dtype, c10::optional<at::Layout> layout,
-    c10::optional<at::Device> device, c10::optional<bool> pin_memory) {
+at::Tensor LazyNativeFunctions::new_empty_strided_symint(
+    const at::Tensor& self,
+    c10::SymIntArrayRef size,
+    c10::SymIntArrayRef stride,
+    c10::optional<at::ScalarType> dtype,
+    c10::optional<at::Layout> layout,
+    c10::optional<at::Device> device,
+    c10::optional<bool> pin_memory) {
  return at::functionalization::
      functionalize_aten_op<ATEN_OP(new_empty_strided)>::call(
          self, size, stride, dtype, layout, device, pin_memory);
 }

+at::Tensor LazyNativeFunctions::narrow_copy_symint(
+    const at::Tensor& self,
+    int64_t dim,
+    c10::SymInt start,
+    c10::SymInt length) {
+  return at::functionalization::functionalize_aten_op<ATEN_OP(
+      narrow_copy)>::call(self, dim, start, length);
+}
 at::Tensor LazyNativeFunctions::pixel_shuffle(
    const at::Tensor& self, int64_t upscale_factor) {
  return at::functionalization::functionalize_aten_op<ATEN_OP(
--- a/setup.py
+++ b/setup.py
@ -45,6 +45,9 @@ import torch

 PACKAGE_VERSION = os.environ.get("TORCH_MLIR_PYTHON_PACKAGE_VERSION") or "0.0.1"

+# If true, enable LTC build by default
+TORCH_MLIR_ENABLE_LTC_DEFAULT = True
+
 # Build phase discovery is unreliable. Just tell it what phases to run.
 class CustomBuild(_build):

@ -68,6 +71,9 @@ class CMakeBuild(build_py):
            src_dir = os.path.abspath(os.path.dirname(__file__))
            llvm_dir = os.path.join(
                src_dir, "externals", "llvm-project", "llvm")
+
+            enable_ltc = int(os.environ.get('TORCH_MLIR_ENABLE_LTC', TORCH_MLIR_ENABLE_LTC_DEFAULT))
+
            cmake_args = [
                f"-DCMAKE_BUILD_TYPE=Release",
                f"-DPython3_EXECUTABLE={sys.executable}",
@ -82,7 +88,7 @@ class CMakeBuild(build_py):
                f"-DCMAKE_VISIBILITY_INLINES_HIDDEN=ON",
                f"-DCMAKE_C_VISIBILITY_PRESET=hidden",
                f"-DCMAKE_CXX_VISIBILITY_PRESET=hidden",
-                f"-DTORCH_MLIR_ENABLE_LTC={'OFF' if int(os.environ.get('TORCH_MLIR_ENABLE_LTC', 1)) else 'OFF'}",
+                f"-DTORCH_MLIR_ENABLE_LTC={'ON' if enable_ltc else 'OFF'}",
            ]

            os.makedirs(cmake_build_dir, exist_ok=True)