diff --git a/.gitignore b/.gitignore index 3691deec1..543d617fe 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,3 @@ bazel-* # Autogenerated files /python/torch_mlir/csrc/base_lazy_backend/generated - -# Example backend -examples/ltc_backend/ltc_backend/_EXAMPLE_MLIR_BACKEND.cpython-37m-x86_64-linux-gnu.so diff --git a/CMakeLists.txt b/CMakeLists.txt index b1df0f81b..10432d1e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -192,4 +192,3 @@ else() endif() add_subdirectory(test) -add_subdirectory(examples) diff --git a/build_tools/autogen_ltc_backend.py b/build_tools/autogen_ltc_backend.py index 50176a592..658a58a26 100644 --- a/build_tools/autogen_ltc_backend.py +++ b/build_tools/autogen_ltc_backend.py @@ -377,7 +377,7 @@ class GenTorchMlirLTC: // for ops that dont have a corresponding structured kernel or shape definition #include "shape_inference.h" - #include "../../utils/exception.h" + #include "../utils/exception.h" namespace torch {{ namespace lazy {{ {} diff --git a/docs/ltc_backend.md b/docs/ltc_backend.md index 1e60fffe4..16e5863e5 100644 --- a/docs/ltc_backend.md +++ b/docs/ltc_backend.md @@ -60,12 +60,15 @@ Generated files are created in this directory, which is ignored by version contr - `shape_inference.cpp` - Implementation of select shape inference functions (most functions are [implemented upstream](https://github.com/pytorch/pytorch/blob/master/torch/csrc/lazy/core/shape_inference.cpp)) +### Reference Backend ([`python/torch_mlir/csrc/reference_lazy_backend`](../python/torch_mlir/csrc/reference_lazy_backend)) + +- `backend_impl.{cpp,h}` + - Reference Torch-MLIR LTC backend implementation, which simply stores the MLIR as a string and executes computation on CPU +- `reference_lazy_backend_pybind.cpp` + - pybind for reference Torch-MLIR LTC backend + ### Examples ([`examples`](../examples)) -- `examples/ltc_backend/ltc_backend/csrc/backend/backend_impl.{cpp,h}` - - Example Torch-MLIR LTC backend implementation, which simply stores the MLIR as a string and executes computation on CPU -- `examples/ltc_backend/ltc_backend/csrc/example_mlir_backend_pybind.cpp` - - pybind for example Torch-MLIR LTC backend - `ltc_backend_bert.py` - Example HuggingFace BERT model traced by LTC to MLIR - `ltc_backend_mnist.py` @@ -77,7 +80,7 @@ Generated files are created in this directory, which is ignored by version contr The journey begins with a tensor in PyTorch on the `lazy` device, which may undergo a number of operations during its lifetime. ```python ->>> ltc_backend._initialize() +>>> lazy_backend._initialize() >>> x = torch.tensor(..., device='lazy') >>> y = torch.tanh(x) ... @@ -116,17 +119,17 @@ Finally, the compiled computation is sent to `TorchMlirBackendImpl::ExecuteCompu ## Implementing a custom backend -An example implementation of a custom backend is available [here](../examples/ltc_backend/ltc_backend). +A reference implementation of a custom backend is available [here](../python/torch_mlir/csrc/reference_lazy_backend/). All the work involved with generating MLIR is handled in the base LTC backend, so vendors only need to worry about implementing `Compile`, `ExecuteComputation`, and some other minor methods to interface with the device. A pybind is needed to invoke C++ code to register the autogen PyTorch kernels and the custom backend itself. -Most of the code in the example implementation should be reusable, excluding some debug related function (e.g. `get_latest_computation`). +Most of the code in the reference implementation should be reusable, excluding some debug related function (e.g. `get_latest_computation`). ## Future Expansion There are a number of areas for future improvement: - Generate source information in `jit::Graph` so it can be embedded in the MLIR -- Currently the example backend implementation executes via the `jit::Graph` instead of the MLIR since we currently lack lowerings for many ops, which would make it difficult to run models such as HF BERT +- Currently the reference backend implementation executes via the `jit::Graph` instead of the MLIR since we currently lack lowerings for many ops, which would make it difficult to run models such as HF BERT - In the future, we should change the implementation to lower the MLIR to linalg and execute on a reference backend - As new models get tested, we will inevitably run into errors related to unimplemented shape inference functions. This problem is simply solved by implementing the missing function, or adding a structured kernel to PyTorch. diff --git a/docs/ltc_examples.md b/docs/ltc_examples.md index 306dabb8a..526d88d78 100644 --- a/docs/ltc_examples.md +++ b/docs/ltc_examples.md @@ -6,10 +6,10 @@ Refer to the main documentation [here](ltc_backend.md). ```python import torch import torch._lazy -import ltc_backend.ltc_backend._EXAMPLE_MLIR_BACKEND as ltc_backend +import torch_mlir.reference_lazy_backend._REFERENCE_LAZY_BACKEND as lazy_backend # Register the example LTC backend. -ltc_backend._initialize() +lazy_backend._initialize() device = 'lazy' @@ -22,7 +22,7 @@ torch._lazy.mark_step() print('Results:', outputs) # Optionally dump MLIR graph generated from LTC trace. -computation = ltc_backend.get_latest_computation() +computation = lazy_backend.get_latest_computation() if computation: print(computation.debug_string()) ``` diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt deleted file mode 100644 index d390ea366..000000000 --- a/examples/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_subdirectory(ltc_backend) diff --git a/examples/ltc_backend/ltc_backend/csrc/utils/sys_utils.h b/examples/ltc_backend/ltc_backend/csrc/utils/sys_utils.h deleted file mode 100644 index 640d872cf..000000000 --- a/examples/ltc_backend/ltc_backend/csrc/utils/sys_utils.h +++ /dev/null @@ -1,26 +0,0 @@ -//===- sys_utils.h --------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// Also available under a BSD-style license. See LICENSE. -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include - -namespace sys_util { - -template -T GetEnv(const std::string &name, const T &default_value = T(0)) { - const char *env = std::getenv(name.c_str()); - if (!env) { - return default_value; - } - return T(std::atoi(env)); -} - -} // namespace sys_util diff --git a/examples/ltc_backend_bert.py b/examples/ltc_backend_bert.py index 5e43c0d0d..2d27fd0ab 100644 --- a/examples/ltc_backend_bert.py +++ b/examples/ltc_backend_bert.py @@ -113,8 +113,8 @@ def main(device='lazy', full_size=False): losses = train(model, num_epochs, num_training_steps, train_dataloader, device) # Get debug information from LTC - if 'ltc_backend' in sys.modules: - computation = ltc_backend.get_latest_computation() + if 'torch_mlir.reference_lazy_backend._REFERENCE_LAZY_BACKEND' in sys.modules: + computation = lazy_backend.get_latest_computation() if computation: print(computation.debug_string()) @@ -148,9 +148,9 @@ if __name__ == "__main__": torch._lazy.ts_backend.init() elif args.device == "MLIR_EXAMPLE": - import ltc_backend.ltc_backend._EXAMPLE_MLIR_BACKEND as ltc_backend + import torch_mlir.reference_lazy_backend._REFERENCE_LAZY_BACKEND as lazy_backend - ltc_backend._initialize() + lazy_backend._initialize() device = "lazy" print("Initialized backend") diff --git a/examples/ltc_backend_mnist.py b/examples/ltc_backend_mnist.py index b7659f4f9..3e37fc7fd 100644 --- a/examples/ltc_backend_mnist.py +++ b/examples/ltc_backend_mnist.py @@ -65,8 +65,8 @@ def main(device='lazy'): torch._lazy.mark_step() # Get debug information from LTC - if 'ltc_backend' in sys.modules: - computation = ltc_backend.get_latest_computation() + if 'torch_mlir.reference_lazy_backend._REFERENCE_LAZY_BACKEND' in sys.modules: + computation = lazy_backend.get_latest_computation() if computation: print(computation.debug_string()) @@ -93,9 +93,9 @@ if __name__ == "__main__": torch._lazy.ts_backend.init() elif args.device == "MLIR_EXAMPLE": - import ltc_backend.ltc_backend._EXAMPLE_MLIR_BACKEND as ltc_backend + import torch_mlir.reference_lazy_backend._REFERENCE_LAZY_BACKEND as lazy_backend - ltc_backend._initialize() + lazy_backend._initialize() device = "lazy" print("Initialized backend") diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 2552e7cf6..86e222beb 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -60,7 +60,8 @@ declare_mlir_python_extension(TorchMLIRPythonExtensions.Main # Lazy Tensor Core ################################################################################ -add_subdirectory(torch_mlir/csrc) +add_subdirectory(torch_mlir/csrc/base_lazy_backend) +add_subdirectory(torch_mlir/csrc/reference_lazy_backend) ################################################################################ # Optionally handle JIT IR importer. @@ -155,6 +156,6 @@ endif() # Add Torch-MLIR LTC backend as dependency add_dependencies(TorchMLIRPythonModules torch_mlir_ltc_backend) +add_dependencies(TorchMLIRPythonModules reference_lazy_backend) add_subdirectory(test) - diff --git a/python/torch_mlir/csrc/CMakeLists.txt b/python/torch_mlir/csrc/base_lazy_backend/CMakeLists.txt similarity index 82% rename from python/torch_mlir/csrc/CMakeLists.txt rename to python/torch_mlir/csrc/base_lazy_backend/CMakeLists.txt index abf5e7634..33dfa2717 100644 --- a/python/torch_mlir/csrc/CMakeLists.txt +++ b/python/torch_mlir/csrc/base_lazy_backend/CMakeLists.txt @@ -20,15 +20,15 @@ include_directories(BEFORE link_directories("${TORCH_INSTALL_PREFIX}/lib") set(LTC_GENERATED - base_lazy_backend/generated/LazyNativeFunctions.cpp - base_lazy_backend/generated/RegisterLazy.cpp - base_lazy_backend/generated/shape_inference.cpp + generated/LazyNativeFunctions.cpp + generated/RegisterLazy.cpp + generated/shape_inference.cpp ) set(LTC_BACKEND_DEPENDS - base_lazy_backend/mlir_lowering_context.cpp - base_lazy_backend/mlir_native_functions.cpp - base_lazy_backend/mlir_node_lowering.cpp - base_lazy_backend/shape_inference.cpp + mlir_lowering_context.cpp + mlir_native_functions.cpp + mlir_node_lowering.cpp + shape_inference.cpp ) # Generate Lazy IR Nodes @@ -57,10 +57,10 @@ add_custom_target( add_library(torch_mlir_ltc_backend SHARED ${LTC_GENERATED} ${LTC_BACKEND_DEPENDS} - base_lazy_backend/backend_impl.cpp - base_lazy_backend/mlir_node.cpp - base_lazy_backend/ops/device_data.cpp - base_lazy_backend/ops/generic.cpp + backend_impl.cpp + mlir_node.cpp + ops/device_data.cpp + ops/generic.cpp ) target_compile_features(torch_mlir_ltc_backend PRIVATE cxx_std_17) diff --git a/python/torch_mlir/csrc/base_lazy_backend/backend_impl.cpp b/python/torch_mlir/csrc/base_lazy_backend/backend_impl.cpp index 250c76eaf..7d7f16125 100644 --- a/python/torch_mlir/csrc/base_lazy_backend/backend_impl.cpp +++ b/python/torch_mlir/csrc/base_lazy_backend/backend_impl.cpp @@ -15,12 +15,12 @@ #include #include -#include "../utils/debug.h" -#include "../utils/exception.h" #include "backend_impl.h" #include "ir_builder.h" #include "mlir_lowering_context.h" #include "ops/device_data.h" +#include "utils/debug.h" +#include "utils/exception.h" namespace torch { namespace lazy { diff --git a/python/torch_mlir/csrc/base_lazy_backend/ir_builder.h b/python/torch_mlir/csrc/base_lazy_backend/ir_builder.h index 00b7df189..5d1a2f852 100644 --- a/python/torch_mlir/csrc/base_lazy_backend/ir_builder.h +++ b/python/torch_mlir/csrc/base_lazy_backend/ir_builder.h @@ -22,7 +22,7 @@ #include "mlir_node.h" #include "ops/device_data.h" #include "ops/generic.h" -#include "../utils/exception.h" +#include "utils/exception.h" // This file contains the TorchMlir IrBuilder diff --git a/python/torch_mlir/csrc/base_lazy_backend/mlir_lowering_context.cpp b/python/torch_mlir/csrc/base_lazy_backend/mlir_lowering_context.cpp index fb21dcd85..e6d099595 100644 --- a/python/torch_mlir/csrc/base_lazy_backend/mlir_lowering_context.cpp +++ b/python/torch_mlir/csrc/base_lazy_backend/mlir_lowering_context.cpp @@ -17,13 +17,13 @@ #include #include "../../dialects/torch/importer/jit_ir/csrc/function_importer.h" -#include "../utils/debug.h" -#include "../utils/exception.h" #include "backend_impl.h" #include "mlir-c/Registration.h" #include "mlir_lowering_context.h" #include "mlir_node.h" #include "torch-mlir-c/Registration.h" +#include "utils/debug.h" +#include "utils/exception.h" namespace torch { namespace lazy { diff --git a/python/torch_mlir/csrc/base_lazy_backend/mlir_native_functions.cpp b/python/torch_mlir/csrc/base_lazy_backend/mlir_native_functions.cpp index 33136a215..13522c605 100644 --- a/python/torch_mlir/csrc/base_lazy_backend/mlir_native_functions.cpp +++ b/python/torch_mlir/csrc/base_lazy_backend/mlir_native_functions.cpp @@ -10,11 +10,11 @@ // https://github.com/pytorch/pytorch/blob/master/torch/csrc/lazy/ts_backend/ts_native_functions.cpp //===----------------------------------------------------------------------===// -#include -#include #include +#include #include #include +#include #include #include #include @@ -28,12 +28,11 @@ #include #include - -#include "../utils/exception.h" -#include "../utils/sys_utils.h" -#include "generated/shape_inference.h" #include "generated/LazyNativeFunctions.h" +#include "generated/shape_inference.h" #include "ops/to_copy.h" +#include "utils/exception.h" +#include "utils/sys_utils.h" namespace torch { namespace lazy { @@ -174,7 +173,6 @@ at::Tensor LazyNativeFunctions::cat(at::TensorList tensors, int64_t dim) { // return result; } - // clone is special in LT because we make it a no-op. // This should be safe to do, because every operator in the LT is functional. at::Tensor LazyNativeFunctions::clone( @@ -290,12 +288,16 @@ at::Tensor LazyNativeFunctions::_to_copy( TORCH_LAZY_FN_COUNTER("lazy::"); auto lazy_self = torch::lazy::TryGetLtcTensor(self); if (!lazy_self && device && device->type() == c10::kLazy) { - // Case 1: eager->lazy (we create a new lazy tensor) - // See Note [Lazy Tensor Functionalization] - // Invariant: if the functionalization key is in the exclude set, then we're expected - // to return an ordinary tensor, which will be "lifted" into a functional wrapper later. - bool functionalize_output = !c10::impl::tls_local_dispatch_key_set().excluded_.has(c10::DispatchKey::Functionalize); - return torch::lazy::to_lazy_tensor(self, options, *device, /*non_blocking=*/non_blocking, /*functionalize_output=*/functionalize_output); + // Case 1: eager->lazy (we create a new lazy tensor) + // See Note [Lazy Tensor Functionalization] + // Invariant: if the functionalization key is in the exclude set, then we're expected + // to return an ordinary tensor, which will be "lifted" into a functional wrapper later. + bool functionalize_output = + !c10::impl::tls_local_dispatch_key_set().excluded_.has( + c10::DispatchKey::Functionalize); + return torch::lazy::to_lazy_tensor( + self, options, *device, /*non_blocking=*/non_blocking, + /*functionalize_output=*/functionalize_output); } else if (device && device->type() != c10::kLazy) { // Case 2: lazy->eager (forces a graph break since we are materializing a tensor) @@ -368,7 +370,8 @@ at::Tensor LazyNativeFunctions::empty( auto x_result = at::empty(size, options, memory_format); auto tensor = CreateLtcTensor(x_result, GetLtcDevice(device)); // See Note [Lazy Tensor Functionalization] - if (c10::impl::tls_local_dispatch_key_set().excluded_.has(c10::DispatchKey::Functionalize)) { + if (c10::impl::tls_local_dispatch_key_set().excluded_.has( + c10::DispatchKey::Functionalize)) { // Invariant: if the functionalization key is in the exclude set, then we're expected // to return an ordinary tensor, which will be "lifted" into a functional wrapper later. return tensor; @@ -409,7 +412,8 @@ at::Tensor LazyNativeFunctions::_unsafe_view( // LazyTensor always opts into functionalization. // "lifting" a tensor for functionalization means wrapping it in a FunctionalTensorWrapper object. at::Tensor LazyNativeFunctions::lift(const at::Tensor& tensor) { - TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(tensor)); + TORCH_INTERNAL_ASSERT( + !at::functionalization::impl::isFunctionalTensor(tensor)); return at::functionalization::impl::to_functional_tensor(tensor); } @@ -418,43 +422,75 @@ at::Tensor LazyNativeFunctions::lift(const at::Tensor& tensor) { // These are all composite ops that LTC can technically re-use / get for free, // but we need to "functionalize" them to remove the view ops before we can use them. at::Tensor LazyNativeFunctions::block_diag(at::TensorList tensors) { - return at::functionalization::functionalize_aten_op::call(tensors); + return at::functionalization::functionalize_aten_op::call(tensors); } -at::Tensor LazyNativeFunctions::new_empty_strided(const at::Tensor& self, at::IntArrayRef size, at::IntArrayRef stride, c10::optional dtype, c10::optional layout, c10::optional device, c10::optional pin_memory) { - return at::functionalization::functionalize_aten_op::call(self, size, stride, dtype, layout, device, pin_memory); +at::Tensor LazyNativeFunctions::new_empty_strided( + const at::Tensor& self, at::IntArrayRef size, at::IntArrayRef stride, + c10::optional dtype, c10::optional layout, + c10::optional device, c10::optional pin_memory) { + return at::functionalization:: + functionalize_aten_op::call( + self, size, stride, dtype, layout, device, pin_memory); } -at::Tensor LazyNativeFunctions::narrow_copy(const at::Tensor& self, int64_t dim, int64_t start, int64_t length) { - return at::functionalization::functionalize_aten_op::call(self, dim, start, length); +at::Tensor LazyNativeFunctions::narrow_copy( + const at::Tensor& self, int64_t dim, int64_t start, int64_t length) { + return at::functionalization::functionalize_aten_op::call(self, dim, start, length); } -at::Tensor LazyNativeFunctions::pixel_shuffle(const at::Tensor & self, int64_t upscale_factor) { - return at::functionalization::functionalize_aten_op::call(self, upscale_factor); +at::Tensor LazyNativeFunctions::pixel_shuffle( + const at::Tensor& self, int64_t upscale_factor) { + return at::functionalization::functionalize_aten_op::call(self, upscale_factor); } -at::Tensor LazyNativeFunctions::pixel_unshuffle(const at::Tensor & self, int64_t downscale_factor) { - return at::functionalization::functionalize_aten_op::call(self, downscale_factor); +at::Tensor LazyNativeFunctions::pixel_unshuffle( + const at::Tensor& self, int64_t downscale_factor) { + return at::functionalization::functionalize_aten_op::call(self, downscale_factor); } -at::Tensor LazyNativeFunctions::select_backward(const at::Tensor & grad_output, at::IntArrayRef input_sizes, int64_t dim, int64_t index) { - return at::functionalization::functionalize_aten_op::call(grad_output, input_sizes, dim, index); +at::Tensor LazyNativeFunctions::select_backward( + const at::Tensor& grad_output, at::IntArrayRef input_sizes, int64_t dim, + int64_t index) { + return at::functionalization::functionalize_aten_op::call(grad_output, input_sizes, dim, index); } -at::Tensor LazyNativeFunctions::slice_backward(const at::Tensor & grad_output, at::IntArrayRef input_sizes, int64_t dim, int64_t start, int64_t end, int64_t step) { - return at::functionalization::functionalize_aten_op::call(grad_output, input_sizes, dim, start, end, step); +at::Tensor LazyNativeFunctions::slice_backward( + const at::Tensor& grad_output, at::IntArrayRef input_sizes, int64_t dim, + int64_t start, int64_t end, int64_t step) { + return at::functionalization::functionalize_aten_op::call(grad_output, input_sizes, dim, start, end, step); } -at::Tensor LazyNativeFunctions::diagonal_backward(const at::Tensor & grad_output, at::IntArrayRef input_sizes, int64_t offset, int64_t dim1, int64_t dim2) { - return at::functionalization::functionalize_aten_op::call(grad_output, input_sizes, offset, dim1, dim2); +at::Tensor LazyNativeFunctions::diagonal_backward( + const at::Tensor& grad_output, at::IntArrayRef input_sizes, int64_t offset, + int64_t dim1, int64_t dim2) { + return at::functionalization::functionalize_aten_op::call(grad_output, input_sizes, offset, dim1, dim2); } -at::Tensor LazyNativeFunctions::_trilinear(const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim) { - return at::functionalization::functionalize_aten_op::call(i1, i2, i3, expand1, expand2, expand3, sumdim, unroll_dim); +at::Tensor LazyNativeFunctions::_trilinear( + const at::Tensor& i1, const at::Tensor& i2, const at::Tensor& i3, + at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, + at::IntArrayRef sumdim, int64_t unroll_dim) { + return at::functionalization::functionalize_aten_op:: + call(i1, i2, i3, expand1, expand2, expand3, sumdim, unroll_dim); } -::std::tuple LazyNativeFunctions::linalg_inv_ex(const at::Tensor & self, bool check_errors) { - return at::functionalization::functionalize_aten_op::call(self, check_errors); +::std::tuple +LazyNativeFunctions::linalg_inv_ex(const at::Tensor& self, bool check_errors) { + return at::functionalization::functionalize_aten_op::call(self, check_errors); } -at::Tensor LazyNativeFunctions::linalg_pinv(const at::Tensor & self, const c10::optional & atol, const c10::optional & rtol, bool hermitian) { - return at::functionalization::functionalize_aten_op::call(self, atol, rtol, hermitian); +at::Tensor LazyNativeFunctions::linalg_pinv( + const at::Tensor& self, const c10::optional& atol, + const c10::optional& rtol, bool hermitian) { + return at::functionalization::functionalize_aten_op::call(self, atol, rtol, hermitian); } // functionalize_aten_op can't handle out= ops directly. // Instead, we can call the composite kernel from core, and copy and mutations back to the inputs. -at::Tensor & LazyNativeFunctions::logsumexp_out(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, at::Tensor& out) { +at::Tensor& LazyNativeFunctions::logsumexp_out( + const at::Tensor& self, at::IntArrayRef dim, bool keepdim, + at::Tensor& out) { auto self_wrapped = at::functionalization::impl::to_functional_tensor(self); auto out_wrapped = at::functionalization::impl::to_functional_tensor(out); // directly call the composite kernel from core. diff --git a/python/torch_mlir/csrc/base_lazy_backend/mlir_node.cpp b/python/torch_mlir/csrc/base_lazy_backend/mlir_node.cpp index 2a56dd0fd..51907c9b4 100644 --- a/python/torch_mlir/csrc/base_lazy_backend/mlir_node.cpp +++ b/python/torch_mlir/csrc/base_lazy_backend/mlir_node.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "mlir_node.h" -#include "../utils/exception.h" +#include "utils/exception.h" namespace torch { namespace lazy { @@ -74,7 +74,8 @@ hash_t TorchMlirNode::shapeHash() const { return shape_hash_; } OpKind TorchMlirTensorList::ClassOpKind() { // Note: this OpKind is separate from ltc_ops.h since it would be a circular // import otherwise - static const OpKind tensor_list_opkind = OpKind::Get("lazy_tensors::tensor_list"); + static const OpKind tensor_list_opkind = + OpKind::Get("lazy_tensors::tensor_list"); return tensor_list_opkind; } diff --git a/python/torch_mlir/csrc/base_lazy_backend/mlir_node.h b/python/torch_mlir/csrc/base_lazy_backend/mlir_node.h index 858ab0461..c7e10d8cf 100644 --- a/python/torch_mlir/csrc/base_lazy_backend/mlir_node.h +++ b/python/torch_mlir/csrc/base_lazy_backend/mlir_node.h @@ -18,9 +18,9 @@ #include #include -#include "../utils/debug.h" -#include "../utils/exception.h" #include "mlir_lowering_context.h" +#include "utils/debug.h" +#include "utils/exception.h" namespace torch { namespace lazy { @@ -60,7 +60,6 @@ private: hash_t dag_hash_; }; - // TensorList represents an at::TensorList which is a vector[Tensor] but is also // a first-class IValue and can be fed as a single input to a TS program. It is // much easier to handle TensorLists in Lazy Tensor code if they are represented diff --git a/python/torch_mlir/csrc/base_lazy_backend/mlir_node_lowering.cpp b/python/torch_mlir/csrc/base_lazy_backend/mlir_node_lowering.cpp index 8e504b9e3..a18a20e78 100644 --- a/python/torch_mlir/csrc/base_lazy_backend/mlir_node_lowering.cpp +++ b/python/torch_mlir/csrc/base_lazy_backend/mlir_node_lowering.cpp @@ -209,17 +209,17 @@ GenerateClone(torch::jit::Value* val, TorchMlirFunction function) { return cloned.front(); } - -void GenerateCopy(torch::jit::Value* destination, torch::jit::Value* source, TorchMlirFunction function) { - std::vector arguments; - arguments.emplace_back(destination); - arguments.emplace_back(source); - LowerBuiltin( - at::aten::copy_, - c10::ArrayRef(compute_shape_copy(source->type())), function, arguments); +void GenerateCopy( + torch::jit::Value* destination, torch::jit::Value* source, + TorchMlirFunction function) { + std::vector arguments; + arguments.emplace_back(destination); + arguments.emplace_back(source); + LowerBuiltin( + at::aten::copy_, c10::ArrayRef(compute_shape_copy(source->type())), + function, arguments); } - torch::jit::Value* GenerateSlice( torch::jit::Value* base, int64_t dim, int64_t start, int64_t end, int64_t step, TorchMlirFunction function) { @@ -234,8 +234,7 @@ torch::jit::Value* GenerateSlice( at::aten::slice, c10::ArrayRef( compute_shape_slice(base->type(), dim, start, end, step)), - function, - arguments); + function, arguments); CHECK_EQ(selected.size(), 1); return selected.front(); } diff --git a/python/torch_mlir/csrc/base_lazy_backend/shape_inference.cpp b/python/torch_mlir/csrc/base_lazy_backend/shape_inference.cpp index 7e2c4f1b9..2ad1c962d 100644 --- a/python/torch_mlir/csrc/base_lazy_backend/shape_inference.cpp +++ b/python/torch_mlir/csrc/base_lazy_backend/shape_inference.cpp @@ -11,8 +11,8 @@ #include #include -#include "../utils/exception.h" #include "generated/shape_inference.h" +#include "utils/exception.h" namespace torch { namespace lazy { @@ -20,7 +20,7 @@ namespace lazy { // TODO(henrytu): Upstream these shape inference functions to PyTorch in the future. std::vector -compute_shape_div(const at::Tensor& self, const at::Scalar & other) { +compute_shape_div(const at::Tensor& self, const at::Scalar& other) { return {Shape(self.scalar_type(), self.sizes().vec())}; } diff --git a/python/torch_mlir/csrc/utils/debug.h b/python/torch_mlir/csrc/base_lazy_backend/utils/debug.h similarity index 100% rename from python/torch_mlir/csrc/utils/debug.h rename to python/torch_mlir/csrc/base_lazy_backend/utils/debug.h diff --git a/python/torch_mlir/csrc/utils/exception.h b/python/torch_mlir/csrc/base_lazy_backend/utils/exception.h similarity index 100% rename from python/torch_mlir/csrc/utils/exception.h rename to python/torch_mlir/csrc/base_lazy_backend/utils/exception.h diff --git a/python/torch_mlir/csrc/utils/sys_utils.h b/python/torch_mlir/csrc/base_lazy_backend/utils/sys_utils.h similarity index 64% rename from python/torch_mlir/csrc/utils/sys_utils.h rename to python/torch_mlir/csrc/base_lazy_backend/utils/sys_utils.h index 363bc24f6..6cb47895a 100644 --- a/python/torch_mlir/csrc/utils/sys_utils.h +++ b/python/torch_mlir/csrc/base_lazy_backend/utils/sys_utils.h @@ -5,6 +5,15 @@ namespace sys_util { +template +static T GetEnv(const std::string& name, const T& default_value = T(0)) { + const char* env = std::getenv(name.c_str()); + if (!env) { + return default_value; + } + return T(std::atoi(env)); +} + static bool GetEnvBool(const char* name, bool defval) { const char* env = std::getenv(name); if (env == nullptr) { diff --git a/examples/ltc_backend/CMakeLists.txt b/python/torch_mlir/csrc/reference_lazy_backend/CMakeLists.txt similarity index 69% rename from examples/ltc_backend/CMakeLists.txt rename to python/torch_mlir/csrc/reference_lazy_backend/CMakeLists.txt index e08d32d6d..8585aaf73 100644 --- a/examples/ltc_backend/CMakeLists.txt +++ b/python/torch_mlir/csrc/reference_lazy_backend/CMakeLists.txt @@ -30,22 +30,18 @@ include_directories(BEFORE ${PROJECT_SOURCE_DIR}/python ) link_directories("${TORCH_INSTALL_PREFIX}/lib") -link_directories(${CMAKE_CURRENT_SOURCE_DIR}/ltc_backend/lib) -add_link_options(-Wl,-rpath,$ORIGIN/ltc_backend/lib) +link_directories(${CMAKE_CURRENT_SOURCE_DIR}/lib) +add_link_options(-Wl,-rpath,$ORIGIN/lib) -file(GLOB LTC_BACKEND_CSRC CONFIGURE_DEPENDS - "ltc_backend/csrc/*.h" - "ltc_backend/csrc/*.cc" - "ltc_backend/csrc/*.cpp" - "ltc_backend/csrc/*/*.h" - "ltc_backend/csrc/*/*.cc" - "ltc_backend/csrc/*/*.cpp" +set(REFERENCE_LAZY_BACKEND_CSRC + backend_impl.cpp + reference_lazy_backend_pybind.cpp ) -add_library(example_mlir_ltc_backend SHARED ${LTC_BACKEND_CSRC}) -add_dependencies(example_mlir_ltc_backend +add_library(reference_lazy_backend SHARED ${REFERENCE_LAZY_BACKEND_CSRC}) +add_dependencies(reference_lazy_backend torch_mlir_ltc_backend ) -target_link_libraries(example_mlir_ltc_backend +target_link_libraries(reference_lazy_backend ${TORCH_LIBRARIES} ${Python3_LIBRARIES} torch_python @@ -53,9 +49,9 @@ target_link_libraries(example_mlir_ltc_backend ) message(STATUS "TORCH_CXXFLAGS=${TORCH_CXXFLAGS} -Wno-pedantic") -set_target_properties(example_mlir_ltc_backend PROPERTIES - LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/ltc_backend/" - OUTPUT_NAME _EXAMPLE_MLIR_BACKEND +set_target_properties(reference_lazy_backend PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${TORCH_MLIR_PYTHON_PACKAGES_DIR}/torch_mlir/torch_mlir/reference_lazy_backend" + OUTPUT_NAME _REFERENCE_LAZY_BACKEND PREFIX "${PYTHON_MODULE_PREFIX}" SUFFIX "${PYTHON_MODULE_EXTENSION}" CXX_VISIBILITY_PRESET "hidden" diff --git a/examples/ltc_backend/ltc_backend/__init__.py b/python/torch_mlir/csrc/reference_lazy_backend/__init__.py similarity index 100% rename from examples/ltc_backend/ltc_backend/__init__.py rename to python/torch_mlir/csrc/reference_lazy_backend/__init__.py diff --git a/examples/ltc_backend/ltc_backend/csrc/backend/backend_impl.cpp b/python/torch_mlir/csrc/reference_lazy_backend/backend_impl.cpp similarity index 76% rename from examples/ltc_backend/ltc_backend/csrc/backend/backend_impl.cpp rename to python/torch_mlir/csrc/reference_lazy_backend/backend_impl.cpp index e81a3486d..8599d6db4 100644 --- a/examples/ltc_backend/ltc_backend/csrc/backend/backend_impl.cpp +++ b/python/torch_mlir/csrc/reference_lazy_backend/backend_impl.cpp @@ -15,8 +15,8 @@ #include #include #include -#include -#include +#include +#include #include "backend_impl.h" @@ -25,8 +25,8 @@ using namespace torch::lazy; namespace torch { namespace lazy { -struct ExampleMlirBackendDeviceType : public BackendDeviceType { - ExampleMlirBackendDeviceType(std::string device_type) +struct ReferenceLazyBackendDeviceType : public BackendDeviceType { + ReferenceLazyBackendDeviceType(std::string device_type) : device_type_(device_type) {} std::string toString() const override { return device_type_; } @@ -34,9 +34,9 @@ struct ExampleMlirBackendDeviceType : public BackendDeviceType { std::string device_type_; }; -class ExampleMlirBackendImpl : public torch::lazy::TorchMlirBackendImpl { +class ReferenceLazyBackendImpl : public torch::lazy::TorchMlirBackendImpl { public: - ExampleMlirBackendImpl() : default_device_type_("Magic") {} + ReferenceLazyBackendImpl() : default_device_type_("Magic") {} /** * Configuration @@ -48,9 +48,9 @@ public: /** * Lowering, Compilation, Execution * */ - std::vector - GetCompilationDevices(const std::string &device, - c10::ArrayRef devices) const override { + std::vector GetCompilationDevices( + const std::string& device, + c10::ArrayRef devices) const override { return std::vector(devices.begin(), devices.end()); }; @@ -59,7 +59,7 @@ public: PRINT_FUNCTION(); // Vendor backend specific lowering can be exec here before returning. - for (const auto &instance : instances) { + for (const auto& instance : instances) { // Store computation instance for external access after compilation. GetLatestComputation() = instance; } @@ -70,17 +70,18 @@ public: return instances; } - std::vector - ExecuteComputation(torch::lazy::ComputationPtr computation, - c10::ArrayRef arguments, - const BackendDevice &device) const override { + std::vector ExecuteComputation( + torch::lazy::ComputationPtr computation, + c10::ArrayRef arguments, + const BackendDevice& device) const override { PRINT_FUNCTION(); // `arguments` maps 1:1 with the parameters in the generated MLIR. In this // function, we will generate a list of BackendData that corresponds to the // return values in the MLIR. - auto mlir_computation = static_cast(computation.get()); + auto mlir_computation = + static_cast(computation.get()); // Vendor backend specific execution can be inserted here. // @@ -91,7 +92,7 @@ public: // https://github.com/pytorch/pytorch/blob/master/torch/csrc/lazy/ts_backend/ts_backend_impl.cpp torch::jit::GraphExecutor graph_executor(mlir_computation->graph(), ""); std::vector stack; - for (const auto &argument : arguments) { + for (const auto& argument : arguments) { const auto mlir_data = std::static_pointer_cast(argument); if (mlir_data->mlir_info()->scalar.has_value()) { @@ -128,7 +129,7 @@ public: } void SetDefaultDeviceType(std::string device_type) { - default_device_type_ = ExampleMlirBackendDeviceType(device_type); + default_device_type_ = ReferenceLazyBackendDeviceType(device_type); } /** @@ -146,22 +147,22 @@ public: } private: - ExampleMlirBackendDeviceType default_device_type_; + ReferenceLazyBackendDeviceType default_device_type_; }; -BackendImplInterface *GetExampleMlirBackendImpl() { - static ExampleMlirBackendImpl *example_mlir_backend_impl = - new ExampleMlirBackendImpl(); - return example_mlir_backend_impl; +BackendImplInterface* GetReferenceLazyBackendImpl() { + static ReferenceLazyBackendImpl* reference_lazy_backend_impl = + new ReferenceLazyBackendImpl(); + return reference_lazy_backend_impl; } -void InitExampleMlirBackend() { +void InitReferenceLazyBackend() { at::RegisterTorchMlirLazyNativeFunctions(); static std::unique_ptr g_registrar; - g_registrar.reset(new BackendRegistrar(GetExampleMlirBackendImpl())); + g_registrar.reset(new BackendRegistrar(GetReferenceLazyBackendImpl())); } -ComputationPtr &GetLatestComputation() { +ComputationPtr& GetLatestComputation() { // Store the computation from the most recent compile. static ComputationPtr computation; return computation; diff --git a/examples/ltc_backend/ltc_backend/csrc/backend/backend_impl.h b/python/torch_mlir/csrc/reference_lazy_backend/backend_impl.h similarity index 84% rename from examples/ltc_backend/ltc_backend/csrc/backend/backend_impl.h rename to python/torch_mlir/csrc/reference_lazy_backend/backend_impl.h index 4c915fa9f..6366fe9fd 100644 --- a/examples/ltc_backend/ltc_backend/csrc/backend/backend_impl.h +++ b/python/torch_mlir/csrc/reference_lazy_backend/backend_impl.h @@ -19,11 +19,11 @@ TORCH_API void RegisterTorchMlirLazyNativeFunctions(); namespace torch { namespace lazy { -torch::lazy::BackendImplInterface *GetExampleMlirBackendImpl(); +torch::lazy::BackendImplInterface* GetReferenceLazyBackendImpl(); -void InitExampleMlirBackend(); +void InitReferenceLazyBackend(); -ComputationPtr &GetLatestComputation(); +ComputationPtr& GetLatestComputation(); } // namespace lazy } // namespace torch diff --git a/examples/ltc_backend/ltc_backend/csrc/example_mlir_backend_pybind.cpp b/python/torch_mlir/csrc/reference_lazy_backend/reference_lazy_backend_pybind.cpp similarity index 77% rename from examples/ltc_backend/ltc_backend/csrc/example_mlir_backend_pybind.cpp rename to python/torch_mlir/csrc/reference_lazy_backend/reference_lazy_backend_pybind.cpp index ff1aa7666..e453cbcf7 100644 --- a/examples/ltc_backend/ltc_backend/csrc/example_mlir_backend_pybind.cpp +++ b/python/torch_mlir/csrc/reference_lazy_backend/reference_lazy_backend_pybind.cpp @@ -1,4 +1,4 @@ -//===- example_mlir_backend_pybind.cpp ------------------------------------===// +//===- reference_lazy_backend_pybind.cpp ----------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,13 +11,13 @@ #include "torch/csrc/lazy/backend/backend_interface.h" #include +#include #include #include #include -#include "backend/backend_impl.h" -#include "utils/sys_utils.h" +#include "backend_impl.h" namespace py = pybind11; @@ -27,20 +27,20 @@ bool verbose = sys_util::GetEnv("VERBOSE", false); struct NoGilSection { NoGilSection() : state(PyEval_SaveThread()) {} ~NoGilSection() { PyEval_RestoreThread(state); } - PyThreadState *state = nullptr; + PyThreadState* state = nullptr; }; /** * @brief Install the plugin */ void Initialize() { - // Initialize the Example MLIR LTC Backend - torch::lazy::InitExampleMlirBackend(); + // Initialize the Reference Lazy Backend + torch::lazy::InitReferenceLazyBackend(); // sanity check - const torch::lazy::BackendImplInterface *mlir_backend = - torch::lazy::GetExampleMlirBackendImpl(); - const torch::lazy::BackendImplInterface *lazy_backend = + const torch::lazy::BackendImplInterface* mlir_backend = + torch::lazy::GetReferenceLazyBackendImpl(); + const torch::lazy::BackendImplInterface* lazy_backend = torch::lazy::getBackend(); if (lazy_backend != mlir_backend) { std::cout << "Failed to initialize MLIR Lazy Backend" << std::endl; @@ -62,14 +62,14 @@ void Shutdown() { } } // anonymous namespace -PYBIND11_MODULE(_EXAMPLE_MLIR_BACKEND, m) { +PYBIND11_MODULE(_REFERENCE_LAZY_BACKEND, m) { py::class_(m, "TorchMlirComputation") .def("to_string", &torch::lazy::TorchMlirComputation::to_string) .def("debug_string", &torch::lazy::TorchMlirComputation::debug_string); - m.doc() = ("pybind11 for example MLIR LTC backend."); + m.doc() = ("pybind11 for the Reference Lazy backend."); m.def("get_latest_computation", []() { - auto computation = static_cast( + auto computation = static_cast( torch::lazy::GetLatestComputation().get()); return py::cast(computation); }); diff --git a/python/torch_mlir_e2e_test/torchscript/configs/lazy_tensor_core.py b/python/torch_mlir_e2e_test/torchscript/configs/lazy_tensor_core.py index 95598a447..44c067b71 100644 --- a/python/torch_mlir_e2e_test/torchscript/configs/lazy_tensor_core.py +++ b/python/torch_mlir_e2e_test/torchscript/configs/lazy_tensor_core.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # Also available under a BSD-style license. See LICENSE. -import ltc_backend.ltc_backend._EXAMPLE_MLIR_BACKEND as ltc_backend +import torch_mlir.reference_lazy_backend._REFERENCE_LAZY_BACKEND as lazy_backend import torch from torch.utils._pytree import tree_map @@ -20,7 +20,7 @@ class LazyTensorCoreTestConfig(TestConfig): def __init__(self): super().__init__() - ltc_backend._initialize() + lazy_backend._initialize() def compile(self, program: torch.nn.Module) -> torch.nn.Module: return program.to('lazy')