Delete old PyTorch 1.3 type dispatch oriented code paths.

* We aren't quite at e2e parity, but we aren't going back and the old path is bit-rotted.
pull/115/head
Stella Laurenzo 2020-11-12 22:27:05 -08:00
parent e359167562
commit 47ac80491c
72 changed files with 22 additions and 34570 deletions

View File

@ -1,48 +0,0 @@
FROM nvcr.io/nvidia/pytorch:19.10-py3
MAINTAINER Stephen Neuendorffer <stephenn@xilinx.com>
#
# get the basics
#
USER root
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update
RUN apt-get install software-properties-common --assume-yes
RUN apt-get install wget curl unzip libxml2-dev --assume-yes
RUN apt-get install autoconf libtool g++ g++-multilib --assume-yes
RUN apt-get install build-essential python3 cmake git gitk --assume-yes
RUN apt-get install clang-8 lld-8 ninja-build --assume-yes
RUN apt-get install libncurses5-dev --assume-yes
RUN /opt/conda/bin/conda install matplotlib pybind11
#torchvision
ENV LD_LIBRARY_PATH "${LD_LIBRARY_PATH}:/opt/conda/lib"
# Rebuild pytorch
WORKDIR /opt/pytorch/pytorch
# this is the recommended rebuild command from NVIDIA
# with the cleanup of the build area omitted.
RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5+PTX" \
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
NCCL_INCLUDE_DIR="/usr/include/" \
NCCL_LIB_DIR="/usr/lib/" \
python setup.py install
WORKDIR /workspace
# Additional packages for building npcomp
RUN apt-get install clang-10 lld-10 --assume-yes
RUN conda install -c gaiar nnpack
# Make it possible to symbolize stack traces in crashes.
RUN ln -s /usr/bin/llvm-symbolizer-10 /usr/bin/llvm-symbolizer
# Additional env for building npcomp and running tests.
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda-10.1/compat/lib.real"
ENV CC=clang-10
ENV CXX=clang++-10
ENV CXXFLAGS "-I/opt/conda/include"
ENV LDFLAGS "-fuse-ld=/usr/bin/ld.lld-10 -L/opt/conda/lib"

View File

@ -1,60 +0,0 @@
# Deprecated PyTorch 1.3 based build
These instructions are retained for the transition. Refer to top-level README for up to date instructions.
### PyTorch 1.3 - ATen pseudo-device type dispatch
The currently functional approach to PyTorch integration uses an ATen pseudo
device for program capture. It is activated by including the PyTorch cmake
path and settind `-DNPCOMP_ENABLE_TORCH_TYPE_DISPATCH=ON`. This approach has a
very fragile dependency on a specific PyTorch revisions in the ~1.3 era and
currently must be built via the docker image in `docker/pytorch-1.3`.
We are migrating to newer approaches that build with more recent PyTorch
versions, but these are not yet functional (see below).
Docker container setup:
```shell
# One of the maintainers does periodically push new images. To use one of these,
# skip the build step and use:
# BUILD_IMAGE_TAG="stellaraccident/npcomp:build-pytorch-1.3"
# Since we are not planning to support this branch long term, this process is
# entirely ad-hoc at present and geared for project maintainers and build bots
# to be able to make progress.
# See https://hub.docker.com/repository/docker/stellaraccident/npcomp
BUILD_IMAGE_TAG="local/npcomp:build-pytorch-1.3"
# Build the docker image (rebuilds PyTorch, so takes quite some time).
docker build docker/pytorch-1.3 --tag $BUILD_IMAGE_TAG
# Docker workflow (or use your own preferences).
# Create a volume for npcomp build artifacts.
docker volume create npcomp-pytorch-1.3-build
# Run the container, mounting /npcomp to the source directory and the volume
# above to the /build directory. The source directory is mounted read-only to
# avoid the container putting root owned files there.
# Replace `$HOME/src/mlir-npcomp` with an appropriate path to where the project
# is checked out.
docker run \
--mount type=bind,source=$HOME/src/mlir-npcomp,target=/npcomp,readonly \
--mount source=npcomp-pytorch-1.3-build,target=/build \
--rm -it $BUILD_IMAGE_TAG /bin/bash
```
```shell
# From within the docker image.
# Install MLIR and configure project.
cd /npcomp
BUILD_DIR=/build ./build_tools/install_mlir.sh
BUILD_DIR=/build ./build_tools/cmake_configure.sh \
-DCMAKE_PREFIX_PATH=/opt/conda/lib/python3.6/site-packages/torch/share/cmake \
-DNPCOMP_ENABLE_TORCH_TYPE_DISPATCH=ON
# Build.
cd /build
ninja
ninja check-npcomp
ninja check-frontends-pytorch
```

View File

@ -1,24 +1,7 @@
#-------------------------------------------------------------------------------
# Options and settings
#-------------------------------------------------------------------------------
option(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH "Enables the legacy ATen Type dispatch code path" OFF)
if(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
add_compile_definitions(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
message(STATUS "Legacy Torch type dispatch mechanism enabled")
endif()
#-------------------------------------------------------------------------------
# Subdirectories
#-------------------------------------------------------------------------------
# TODO: This sub-directory does not need to be gated on the type dispatch
# mechanism, but it presently has some dependencies on an older pytorch version
# and is being excluded until those can be resolved.
if(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
add_subdirectory(lib)
endif()
add_subdirectory(csrc)
add_subdirectory(python)
add_subdirectory(test)

View File

@ -1,8 +1,4 @@
if(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
add_subdirectory(type_dispatch)
else()
add_subdirectory(c10_dispatch)
endif()
add_subdirectory(builder)
include(NpcompPython)
@ -16,25 +12,14 @@ include_directories(
)
link_directories("${TORCH_INSTALL_PREFIX}/lib")
set(torch_mlir_optional_libraries)
if(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
list(APPEND torch_mlir_optional_libraries
npcomp_torch_type_dispatch_bindings
)
else()
list(APPEND torch_mlir_optional_libraries
npcomp_torch_c10_dispatch_bindings
)
endif()
add_library(NPCOMPTorchMLIRExt SHARED
init_python_bindings.cpp
)
target_link_libraries(NPCOMPTorchMLIRExt
${TORCH_LIBRARIES}
${PYTHON_LIBRARIES}
${torch_mlir_optional_libraries}
torch_python
npcomp_torch_builder_bindings
# NPCOMP shared library.
NPCOMP

View File

@ -7,7 +7,7 @@ include_directories(
${PYTHON_INCLUDE_DIRS}
)
link_directories("${TORCH_INSTALL_PREFIX}/lib")
add_library(npcomp_torch_c10_dispatch_bindings
add_library(npcomp_torch_builder_bindings
acap_dispatch.cpp
debug.cpp
func_builder.cpp
@ -15,7 +15,7 @@ add_library(npcomp_torch_c10_dispatch_bindings
python_bindings.cpp
)
target_link_libraries(npcomp_torch_c10_dispatch_bindings
target_link_libraries(npcomp_torch_builder_bindings
${TORCH_LIBRARIES}
${PYTHON_LIBRARIES}
torch_python

View File

@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_ACAP_DISPATCH_H
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_ACAP_DISPATCH_H
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_ACAP_DISPATCH_H
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_ACAP_DISPATCH_H
#include <list>
#include <memory>

View File

@ -5,6 +5,9 @@
//
//===----------------------------------------------------------------------===//
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_DEBUG_H
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_DEBUG_H
#include <string>
namespace torch_mlir {
@ -20,3 +23,5 @@ void debugTrace(const std::string &message);
void enableDebugTraceToStderr();
} // namespace torch_mlir
#endif // NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_DEBUG_H

View File

@ -5,8 +5,8 @@
//
//===----------------------------------------------------------------------===//
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_FUNC_BUILDER_H
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_FUNC_BUILDER_H
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_FUNC_BUILDER_H
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_FUNC_BUILDER_H
#include "mlir-c/IR.h"
#include "llvm/ADT/DenseMap.h"
@ -169,4 +169,4 @@ private:
} // namespace torch_mlir
#endif // NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_MODULE_BUILDER_H
#endif // NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_FUNC_BUILDER_H

View File

@ -5,8 +5,8 @@
//
//===----------------------------------------------------------------------===//
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_MODULE_BUILDER_H
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_MODULE_BUILDER_H
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_H
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_H
#include "../pybind.h"

View File

@ -126,7 +126,9 @@ py::list GetRegisteredOps() {
return results;
}
void InitModuleBindings(py::module &m) {
} // namespace
void torch_mlir::InitBuilderBindings(py::module &m) {
m.def("debug_trace_to_stderr", &enableDebugTraceToStderr);
py::class_<AcapController, std::shared_ptr<AcapController>>(m,
@ -139,8 +141,3 @@ void InitModuleBindings(py::module &m) {
ModuleBuilder::bind(m);
}
} // namespace
void torch_mlir::InitC10DispatchBindings(py::module &m) {
InitModuleBindings(m);
}

View File

@ -148,12 +148,7 @@ void InitModuleBindings(py::module &m) {
void InitBindings(py::module &m) {
InitModuleBindings(m);
#if defined(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
InitTypeDispatchBindings(m);
#else
InitC10DispatchBindings(m);
#endif
InitBuilderBindings(m);
}
} // namespace torch_mlir

View File

@ -15,13 +15,8 @@ namespace torch_mlir {
// Perform top-level initialization for the module.
void InitBindings(pybind11::module &m);
// Adds bindings related to the type-dispatch program capture mechanism.
// Only defined if NPCOMP_ENABLE_TORCH_TYPE_DISPATCH (optional feature).
void InitTypeDispatchBindings(pybind11::module &m);
// Adds bindings related to the c10-dispatch program capture mechanism.
// Only defined if !NPCOMP_ENABLE_TORCH_TYPE_DISPATCH (default).
void InitC10DispatchBindings(pybind11::module &m);
// Adds bindings related to building modules.
void InitBuilderBindings(pybind11::module &m);
} // namespace torch_mlir

View File

@ -1,31 +0,0 @@
include_directories(
${TORCH_INCLUDE_DIRS}
${TORCH_INSTALL_PREFIX}/include/TH
${TORCH_INSTALL_PREFIX}/include/THC/opt/pytorch/pytorch
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
${PYTHON_INCLUDE_DIRS}
)
link_directories("${TORCH_INSTALL_PREFIX}/lib")
add_library(npcomp_torch_type_dispatch_bindings
aten_mlir_bridge.cpp
aten_mlir_type.cpp
aten_mlir_type_default.cpp
device.cpp
ir.cpp
jit.cpp
mlir_gen.cpp
python_bindings.cpp
tensor.cpp
tensor_impl.cpp
torch_util.cpp
)
get_property(mlir_libs GLOBAL PROPERTY MLIR_ALL_LIBS)
target_link_libraries(npcomp_torch_type_dispatch_bindings
NPCOMPATenDialect
${TORCH_LIBRARIES}
${mlir_libs}
${PYTHON_LIBRARIES}
torch_python
)

View File

@ -1,5 +0,0 @@
# Type dispatch MLIR capture interface
This directory contains code related to the legacy ATen "type dispatch"
interface (which uses a large table of virtual functions). It is being
superceded by the c10 dispatcher mechanism.

View File

@ -1,192 +0,0 @@
//===- aten_mlir_bridge.cpp -------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
// Structured similarly to code from git@github.com:pytorch/xla.git
#include "aten_mlir_bridge.h"
#include <string>
#include <vector>
#include "device.h"
#include "tensor_impl.h"
namespace torch_mlir {
namespace bridge {
namespace {
class AtenMLIRDeviceMapper {
public:
static AtenMLIRDeviceMapper *Get();
size_t GetDeviceOrdinal(const Device &device) const {
auto it = devices_ordinals_.find(device);
assert(it != devices_ordinals_.end());
return it->second;
}
const Device &GetDeviceFromOrdinal(size_t ordinal) const {
return devices_.at(ordinal);
}
private:
AtenMLIRDeviceMapper() {
std::vector<std::string> local_devices{"mlir:0", "mlir:1", "mlir:2"};
for (auto &device_str : local_devices) {
devices_.emplace_back(device_str);
devices_ordinals_[devices_.back()] = devices_.size() - 1;
}
}
std::vector<Device> devices_;
std::map<Device, size_t> devices_ordinals_;
};
AtenMLIRDeviceMapper *AtenMLIRDeviceMapper::Get() {
static AtenMLIRDeviceMapper *device_mapper = new AtenMLIRDeviceMapper();
return device_mapper;
}
} // namespace
c10::optional<MLIRTensor> TryGetMLIRTensor(const at::Tensor &tensor) {
MLIRTensorImpl *impl =
dynamic_cast<MLIRTensorImpl *>(tensor.unsafeGetTensorImpl());
if (impl == nullptr) {
return c10::nullopt;
}
return impl->tensor();
}
MLIRTensor GetMLIRTensor(const at::Tensor &tensor) {
auto xtensor = TryGetMLIRTensor(tensor);
assert(xtensor && "Input tensor is not an MLIR tensor");
return *xtensor;
}
MLIRTensor GetOrCreateMLIRTensor(const at::Tensor &tensor,
const Device &device) {
if (!tensor.defined()) {
return MLIRTensor();
}
auto xtensor = TryGetMLIRTensor(tensor);
return xtensor ? *xtensor : MLIRTensor::Create(tensor, device);
}
std::vector<at::Tensor> MLIRCreateTensorList(const at::TensorList &tensors) {
std::vector<at::Tensor> aten_device_tensors(tensors.size());
std::vector<MLIRTensor> device_tensors;
std::vector<bool> to_translate(tensors.size());
for (size_t i = 0; i < tensors.size(); ++i) {
const at::Tensor &tensor = tensors[i];
if (tensor.defined()) {
auto xtensor = TryGetMLIRTensor(tensor);
if (xtensor) {
to_translate[i] = true;
device_tensors.push_back(*xtensor);
} else {
aten_device_tensors[i] = tensor;
}
}
}
for (size_t i = 0, defined_pos = 0; i < tensors.size(); ++i) {
if (to_translate[i]) {
aten_device_tensors[i] =
std::move(device_tensors[defined_pos++].ToTensor());
}
}
return aten_device_tensors;
}
c10::optional<Device> GetMLIRDevice(const at::TensorList &tensors) {
for (const auto &tensor : tensors) {
auto device = GetMLIRDevice(tensor);
if (device) {
return device;
}
}
return c10::nullopt;
}
c10::optional<Device> GetMLIRDevice(const at::TensorOptions &tensor_options) {
if (!tensor_options.has_device()) {
return c10::nullopt;
}
return GetMLIRDevice(tensor_options.device());
}
c10::optional<Device> GetMLIRDevice(const c10::Device &device) {
if (device.type() != at::kXLA) {
return c10::nullopt;
}
return AtenDeviceToMLIRDevice(device);
}
c10::optional<Device> GetMLIRDevice(const at::Tensor &tensor) {
auto xtensor = TryGetMLIRTensor(tensor);
if (!xtensor) {
return c10::nullopt;
}
return xtensor->GetDevice();
}
Device AtenDeviceToMLIRDevice(const c10::Device &device) {
assert(device.type() == at::kXLA);
int ordinal = device.has_index() ? device.index() : -1;
if (ordinal < 0) {
c10::Device current_device = MLIRTensorImpl::GetCurrentAtenDevice();
if (current_device.has_index()) {
ordinal = current_device.index();
}
}
if (ordinal < 0) {
return *GetDefaultDevice();
}
return AtenMLIRDeviceMapper::Get()->GetDeviceFromOrdinal(ordinal);
}
c10::Device MLIRDeviceToAtenDevice(const Device &device) {
// TODO: define our own device and stop hijacking the xla device.
return c10::Device(at::kXLA,
AtenMLIRDeviceMapper::Get()->GetDeviceOrdinal(device));
}
at::Tensor MLIRToAtenTensor(MLIRTensor device_tensor,
const at::TensorOptions &tensor_options) {
if (tensor_options.has_device()) {
assert(tensor_options.device().type() != at::kXLA);
}
at::Tensor tensor = device_tensor.ToTensor();
// We need to copy the tensor since it is cached within the MLIRTensor, and
// returning it directly might expose it to in place changes.
return tensor.to(tensor_options, /*non_blocking=*/false, /*copy=*/true);
}
at::Tensor AtenFromMLIRTensor(MLIRTensor device_tensor) {
assert(!device_tensor.is_null());
at::Tensor ret =
at::Tensor(c10::make_intrusive<MLIRTensorImpl>(std::move(device_tensor)));
return ret;
}
at::Tensor CreateMLIRTensor(at::Tensor tensor,
const c10::optional<Device> &device) {
if (tensor.defined() && device) {
MLIRTensor device_tensor = MLIRTensor::Create(std::move(tensor), *device);
tensor = AtenFromMLIRTensor(device_tensor);
}
return tensor;
}
} // namespace bridge
} // namespace torch_mlir

View File

@ -1,61 +0,0 @@
//===- aten_mlir_bridge.h ---------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
#pragma once
// Structured similarly to code from git@github.com:pytorch/xla.git
// This file implements a bridge which moves data back and forth from torch
// tensors (at::Tensor) to MLIRTensor, which represents a tensor associated
// with our virtual 'MLIR' device.
#include "device.h"
#include "tensor.h"
#include <ATen/Device.h>
#include <ATen/Functions.h>
#include <ATen/Tensor.h>
namespace torch_mlir {
namespace bridge {
c10::optional<MLIRTensor> TryGetMLIRTensor(const at::Tensor &tensor);
// Return an MLIR tensor that is computed the same way as the given at::Tensor
MLIRTensor GetMLIRTensor(const at::Tensor &tensor);
MLIRTensor GetOrCreateMLIRTensor(const at::Tensor &tensor,
const Device &device);
// Creates a vector of at::Tensor objects extracted from a list of MLIR tensors.
std::vector<at::Tensor> MLIRCreateTensorList(const at::TensorList &tensors);
c10::optional<Device> GetMLIRDevice(const at::TensorList &tensors);
c10::optional<Device> GetMLIRDevice(const at::TensorOptions &tensor_options);
c10::optional<Device> GetMLIRDevice(const c10::Device &device);
c10::optional<Device> GetMLIRDevice(const at::Tensor &tensor);
Device AtenDeviceToMLIRDevice(const c10::Device &device);
c10::Device MLIRDeviceToAtenDevice(const Device &device);
at::Tensor MLIRToAtenTensor(MLIRTensor device_tensor,
const at::TensorOptions &tensor_options);
// Create an Aten tensor with MLIR type id from MLIRTensor
at::Tensor AtenFromMLIRTensor(MLIRTensor device_tensor);
// Creates an MLIR tensor holding the data in tensor, on the given device.
at::Tensor CreateMLIRTensor(at::Tensor tensor,
const c10::optional<Device> &device);
} // namespace bridge
} // namespace torch_mlir

View File

@ -1,669 +0,0 @@
//===- aten_mlir_type.cpp ---------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
// Structured similarly to code from git@github.com:pytorch/xla.git
#include "llvm/Support/Debug.h"
#include "aten_mlir_bridge.h"
#include "aten_mlir_type.h"
#include "aten_mlir_type_default.h"
#include "ir.h"
#include "tensor_impl.h"
#include "torch_util.h"
#include <mutex>
#define DEBUG_TYPE "torch_mlir"
namespace torch_mlir {
namespace {
struct MLIROptions {
MLIROptions(const at::TensorOptions &options,
c10::optional<Device> device_opt = c10::nullopt,
c10::optional<at::ScalarType> scalar_type_opt = c10::nullopt)
: device(std::move(device_opt)), scalar_type(std::move(scalar_type_opt)) {
if (options.has_device()) {
device = bridge::AtenDeviceToMLIRDevice(options.device());
}
if (options.has_dtype()) {
scalar_type = c10::typeMetaToScalarType(options.dtype());
}
}
Device get_device() const { return device ? *device : *GetDefaultDevice(); }
at::ScalarType
get_scalar_type(at::ScalarType defval = at::ScalarType::Float) const {
return scalar_type ? *scalar_type : defval;
}
c10::optional<Device> device;
c10::optional<at::ScalarType> scalar_type;
};
std::tuple<MLIRTensor, MLIRTensor>
GetPromotedMLIRTensorsForBinaryOp(const at::Tensor &self,
const at::Tensor &other) {
// this requires slightly newer than pytorch 1.3.0, disable for now.
// at::ScalarType dtype = at::result_type(self, other);
MLIRTensor tensor1 = bridge::GetMLIRTensor(self);
MLIRTensor tensor2 =
bridge::GetOrCreateMLIRTensor(other, tensor1.GetDevice());
// tensor1.SetScalarType(dtype);
// tensor2.SetScalarType(dtype);
return std::make_tuple(tensor1, tensor2);
}
void AtenInitialize() {
RegisterAtenTypeFunctions();
ir::RegisterAtenIR();
}
} // namespace
void ATenMLIRType::InitializeAtenBindings() {
static std::once_flag once;
std::call_once(once, []() { AtenInitialize(); });
}
at::Tensor ATenMLIRType::_adaptive_avg_pool2d(const at::Tensor &self,
at::IntArrayRef output_size) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(MLIRTensor::_adaptive_avg_pool2d(
bridge::GetMLIRTensor(self), output_size));
}
at::Tensor
ATenMLIRType::_adaptive_avg_pool2d_backward(const at::Tensor &grad_output,
const at::Tensor &self) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto grad_output_tensor =
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
return bridge::AtenFromMLIRTensor(MLIRTensor::_adaptive_avg_pool2d_backward(
grad_output_tensor, input_tensor));
}
at::Tensor ATenMLIRType::add(const at::Tensor &self, const at::Tensor &other,
at::Scalar alpha) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
return bridge::AtenFromMLIRTensor(
MLIRTensor::add(std::get<0>(tensors), std::get<1>(tensors), alpha));
}
at::Tensor &ATenMLIRType::add_(at::Tensor &self, const at::Tensor &other,
at::Scalar alpha) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
auto result = bridge::AtenFromMLIRTensor(
MLIRTensor::add_(std::get<0>(tensors), std::get<1>(tensors), alpha));
MLIRTensorImpl *self_impl =
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
self_impl->shallow_copy_from(result.getIntrusivePtr());
return self;
}
at::Tensor ATenMLIRType::addmm(const at::Tensor &self, const at::Tensor &mat1,
const at::Tensor &mat2, at::Scalar beta,
at::Scalar alpha) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto tensor = bridge::GetMLIRTensor(self);
return bridge::AtenFromMLIRTensor(MLIRTensor::addmm(
tensor, bridge::GetOrCreateMLIRTensor(mat1, tensor.GetDevice()),
bridge::GetOrCreateMLIRTensor(mat2, tensor.GetDevice()), beta, alpha));
}
at::Tensor ATenMLIRType::as_strided(const at::Tensor &self,
at::IntArrayRef size,
at::IntArrayRef stride,
c10::optional<int64_t> storage_offset) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(MLIRTensor::as_strided(
bridge::GetMLIRTensor(self), size, stride, storage_offset));
}
at::Tensor ATenMLIRType::clone(const at::Tensor &self) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(
MLIRTensor::clone(bridge::GetMLIRTensor(self)));
}
at::Tensor &ATenMLIRType::copy_(at::Tensor &self, const at::Tensor &src,
bool non_blocking) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto self_tensor = bridge::TryGetMLIRTensor(self);
auto src_tensor = bridge::TryGetMLIRTensor(src);
if (!src_tensor) {
assert(self_tensor);
self_tensor->SetTensor(util::CopyTensor(src, self.scalar_type()));
} else if (!self_tensor) {
at::Tensor t = src_tensor->ToTensor();
const_cast<at::Tensor &>(self).unsafeGetTensorImpl()->shallow_copy_from(
t.getIntrusivePtr());
} else {
MLIRTensor::copy_(*self_tensor, *src_tensor);
}
return self;
}
at::Tensor ATenMLIRType::_copy_from(const at::Tensor &self,
const at::Tensor &dst, bool non_blocking) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
std::vector<at::Tensor> tensors = {self};
auto device_tensors = bridge::MLIRCreateTensorList(tensors);
// Hack in an overwrite of a const tensor.
at::Tensor t = util::CopyTensor(device_tensors.front(), dst.scalar_type());
const_cast<at::Tensor &>(dst).unsafeGetTensorImpl()->shallow_copy_from(
t.getIntrusivePtr());
return dst;
}
std::tuple<at::Tensor, at::Tensor, at::Tensor>
ATenMLIRType::convolution_backward_overrideable(
const at::Tensor &grad_output, const at::Tensor &input,
const at::Tensor &weight, at::IntArrayRef stride, at::IntArrayRef padding,
at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding,
int64_t groups, std::array<bool, 3> output_mask) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(input);
auto weight_tensor =
bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice());
auto grad_output_tensor =
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
auto ret = MLIRTensor::convolution_backward(
grad_output_tensor, input_tensor, weight_tensor, stride, padding,
dilation, transposed, output_padding, groups, output_mask);
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
bridge::AtenFromMLIRTensor(std::get<1>(ret)),
bridge::AtenFromMLIRTensor(std::get<2>(ret)));
}
at::Tensor ATenMLIRType::convolution_overrideable(
const at::Tensor &input, const at::Tensor &weight, const at::Tensor &bias,
at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation,
bool transposed, at::IntArrayRef output_padding, int64_t groups) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(input);
auto weight_tensor =
bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice());
auto bias_tensor =
bias.defined()
? bridge::GetOrCreateMLIRTensor(bias, input_tensor.GetDevice())
: bridge::GetOrCreateMLIRTensor(
at::zeros(at::IntArrayRef{weight.sizes()[0]}),
input_tensor.GetDevice());
return bridge::AtenFromMLIRTensor(MLIRTensor::convolution(
input_tensor, weight_tensor, bias_tensor, stride, padding, dilation,
transposed, output_padding, groups));
}
at::Tensor ATenMLIRType::div(const at::Tensor &self, at::Scalar other) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
return bridge::AtenFromMLIRTensor(MLIRTensor::div(input_tensor, other));
}
at::Tensor ATenMLIRType::div(const at::Tensor &self, const at::Tensor &other) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
return bridge::AtenFromMLIRTensor(
MLIRTensor::div(std::get<0>(tensors), std::get<1>(tensors)));
}
at::Tensor &ATenMLIRType::div_(at::Tensor &self, const at::Tensor &other) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
auto result = bridge::AtenFromMLIRTensor(
MLIRTensor::div_(std::get<0>(tensors), std::get<1>(tensors)));
MLIRTensorImpl *self_impl =
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
self_impl->shallow_copy_from(result.getIntrusivePtr());
return self;
}
at::Tensor ATenMLIRType::expand(const at::Tensor &self, at::IntArrayRef size,
bool implicit) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
return bridge::AtenFromMLIRTensor(
MLIRTensor::expand(input_tensor, size, implicit));
}
at::Tensor ATenMLIRType::gather(const at::Tensor &self, int64_t dim,
const at::Tensor &index, bool sparse_grad) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto index_tensor =
bridge::GetOrCreateMLIRTensor(index, input_tensor.GetDevice());
return bridge::AtenFromMLIRTensor(
MLIRTensor::gather(input_tensor, dim, index_tensor, sparse_grad));
}
at::Tensor ATenMLIRType::hardtanh(const at::Tensor &self, at::Scalar min_val,
at::Scalar max_val) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto result = bridge::AtenFromMLIRTensor(
MLIRTensor::hardtanh(input_tensor, min_val, max_val));
MLIRTensorImpl *self_impl =
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
self_impl->shallow_copy_from(result.getIntrusivePtr());
return self;
}
at::Tensor &ATenMLIRType::hardtanh_(at::Tensor &self, at::Scalar min_val,
at::Scalar max_val) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto result = bridge::AtenFromMLIRTensor(
MLIRTensor::hardtanh_(input_tensor, min_val, max_val));
MLIRTensorImpl *self_impl =
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
self_impl->shallow_copy_from(result.getIntrusivePtr());
return self;
}
at::Tensor ATenMLIRType::hardtanh_backward(const at::Tensor &grad_output,
const at::Tensor &self,
at::Scalar min_val,
at::Scalar max_val) {
auto input_tensor = bridge::GetMLIRTensor(self);
auto grad_output_tensor =
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
return bridge::AtenFromMLIRTensor(MLIRTensor::hardtanh_backward(
grad_output_tensor, input_tensor, min_val, max_val));
}
at::Tensor ATenMLIRType::_log_softmax(const at::Tensor &self, int64_t dim,
bool half_to_float) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
return bridge::AtenFromMLIRTensor(
MLIRTensor::_log_softmax(input_tensor, dim, half_to_float));
}
at::Tensor
ATenMLIRType::_log_softmax_backward_data(const at::Tensor &grad_output,
const at::Tensor &output, int64_t dim,
const at::Tensor &self) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto output_tensor =
bridge::GetOrCreateMLIRTensor(output, input_tensor.GetDevice());
auto grad_output_tensor =
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
return bridge::AtenFromMLIRTensor(MLIRTensor::_log_softmax_backward_data(
grad_output_tensor, output_tensor, dim, input_tensor));
}
std::tuple<at::Tensor, at::Tensor> ATenMLIRType::max_pool2d_with_indices(
const at::Tensor &self, at::IntArrayRef kernel_size, at::IntArrayRef stride,
at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto ret = MLIRTensor::max_pool2d_with_indices(
input_tensor, kernel_size, stride, padding, dilation, ceil_mode);
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
bridge::AtenFromMLIRTensor(std::get<1>(ret)));
}
at::Tensor ATenMLIRType::max_pool2d_with_indices_backward(
const at::Tensor &grad_output, const at::Tensor &self,
at::IntArrayRef kernel_size, at::IntArrayRef stride,
at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode,
const at::Tensor &indices) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto grad_output_tensor =
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
auto indices_tensor =
bridge::GetOrCreateMLIRTensor(indices, input_tensor.GetDevice());
return bridge::AtenFromMLIRTensor(
MLIRTensor::max_pool2d_with_indices_backward(
grad_output_tensor, input_tensor, kernel_size, stride, padding,
dilation, ceil_mode, indices_tensor));
}
at::Tensor ATenMLIRType::mean(const at::Tensor &self,
c10::optional<at::ScalarType> dtype) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(
MLIRTensor::mean(bridge::GetMLIRTensor(self), dtype));
}
at::Tensor ATenMLIRType::mean(const at::Tensor &self, at::IntArrayRef dim,
bool keepdim,
c10::optional<at::ScalarType> dtype) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(
MLIRTensor::mean(bridge::GetMLIRTensor(self), dim, keepdim, dtype));
}
at::Tensor ATenMLIRType::mm(const at::Tensor &input, const at::Tensor &mat2) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(input);
auto mat2_tensor =
bridge::GetOrCreateMLIRTensor(mat2, input_tensor.GetDevice());
return bridge::AtenFromMLIRTensor(MLIRTensor::mm(input_tensor, mat2_tensor));
}
at::Tensor ATenMLIRType::mul(const at::Tensor &self, const at::Tensor &other) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
return bridge::AtenFromMLIRTensor(
MLIRTensor::mul(std::get<0>(tensors), std::get<1>(tensors)));
}
at::Tensor &ATenMLIRType::mul_(at::Tensor &self, const at::Tensor &other) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
auto result = bridge::AtenFromMLIRTensor(
MLIRTensor::mul_(std::get<0>(tensors), std::get<1>(tensors)));
MLIRTensorImpl *self_impl =
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
self_impl->shallow_copy_from(result.getIntrusivePtr());
return self;
}
std::tuple<at::Tensor, at::Tensor, at::Tensor> ATenMLIRType::native_batch_norm(
const at::Tensor &input, const at::Tensor &weight, const at::Tensor &bias,
const at::Tensor &running_mean, const at::Tensor &running_var,
bool training, double momentum, double eps) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(input);
auto weight_tensor =
bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice());
auto bias_tensor =
bridge::GetOrCreateMLIRTensor(bias, input_tensor.GetDevice());
auto running_mean_tensor =
bridge::GetOrCreateMLIRTensor(running_mean, input_tensor.GetDevice());
auto running_var_tensor =
bridge::GetOrCreateMLIRTensor(running_var, input_tensor.GetDevice());
auto ret = MLIRTensor::native_batch_norm(
input_tensor, weight_tensor, bias_tensor, running_mean_tensor,
running_var_tensor, training, momentum, eps);
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
bridge::AtenFromMLIRTensor(std::get<1>(ret)),
bridge::AtenFromMLIRTensor(std::get<2>(ret)));
}
std::tuple<at::Tensor, at::Tensor, at::Tensor>
ATenMLIRType::native_batch_norm_backward(
const at::Tensor &grad_out, const at::Tensor &input,
const at::Tensor &weight, const at::Tensor &running_mean,
const at::Tensor &running_var, const at::Tensor &save_mean,
const at::Tensor &save_invstd, bool train, double eps,
std::array<bool, 3> output_mask) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(input);
auto grad_out_tensor =
bridge::GetOrCreateMLIRTensor(grad_out, input_tensor.GetDevice());
auto weight_tensor =
bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice());
auto running_mean_tensor =
bridge::GetOrCreateMLIRTensor(running_mean, input_tensor.GetDevice());
auto running_var_tensor =
bridge::GetOrCreateMLIRTensor(running_var, input_tensor.GetDevice());
auto save_mean_tensor =
bridge::GetOrCreateMLIRTensor(save_mean, input_tensor.GetDevice());
auto save_invstd_tensor =
bridge::GetOrCreateMLIRTensor(save_invstd, input_tensor.GetDevice());
auto ret = MLIRTensor::native_batch_norm_backward(
grad_out_tensor, input_tensor, weight_tensor, running_mean_tensor,
running_var_tensor, save_mean_tensor, save_invstd_tensor, train, eps,
output_mask);
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
bridge::AtenFromMLIRTensor(std::get<1>(ret)),
bridge::AtenFromMLIRTensor(std::get<2>(ret)));
}
at::Tensor ATenMLIRType::neg(const at::Tensor &self) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
return bridge::AtenFromMLIRTensor(MLIRTensor::neg(input_tensor));
}
std::tuple<at::Tensor, at::Tensor> ATenMLIRType::nll_loss2d_forward(
const at::Tensor &self, const at::Tensor &target, const at::Tensor &weight,
int64_t reduction, int64_t ignore_index) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto target_tensor =
bridge::GetOrCreateMLIRTensor(target, input_tensor.GetDevice());
auto weight_tensor =
weight.defined()
? bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice())
: bridge::GetOrCreateMLIRTensor(at::ones(self.sizes()[1]),
input_tensor.GetDevice());
auto ret = MLIRTensor::nll_loss2d_forward(
input_tensor, target_tensor, weight_tensor, reduction, ignore_index);
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
bridge::AtenFromMLIRTensor(std::get<1>(ret)));
}
at::Tensor ATenMLIRType::nll_loss2d_backward(
const at::Tensor &grad_output, const at::Tensor &self,
const at::Tensor &target, const at::Tensor &weight, int64_t reduction,
int64_t ignore_index, const at::Tensor &total_weight) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto grad_output_tensor =
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
auto target_tensor =
bridge::GetOrCreateMLIRTensor(target, input_tensor.GetDevice());
auto weight_tensor =
weight.defined()
? bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice())
: bridge::GetOrCreateMLIRTensor(at::ones(self.sizes()[1]),
input_tensor.GetDevice());
auto total_weight_tensor =
bridge::GetOrCreateMLIRTensor(total_weight, input_tensor.GetDevice());
return bridge::AtenFromMLIRTensor(MLIRTensor::nll_loss2d_backward(
grad_output_tensor, input_tensor, target_tensor, weight_tensor, reduction,
ignore_index, total_weight_tensor));
}
std::tuple<at::Tensor, at::Tensor>
ATenMLIRType::nll_loss_forward(const at::Tensor &self, const at::Tensor &target,
const at::Tensor &weight, int64_t reduction,
int64_t ignore_index) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto target_tensor =
bridge::GetOrCreateMLIRTensor(target, input_tensor.GetDevice());
auto weight_tensor =
weight.defined()
? bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice())
: bridge::GetOrCreateMLIRTensor(at::ones(self.sizes()[1]),
input_tensor.GetDevice());
auto ret = MLIRTensor::nll_loss_forward(
input_tensor, target_tensor, weight_tensor, reduction, ignore_index);
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
bridge::AtenFromMLIRTensor(std::get<1>(ret)));
}
at::Tensor ATenMLIRType::nll_loss_backward(
const at::Tensor &grad_output, const at::Tensor &self,
const at::Tensor &target, const at::Tensor &weight, int64_t reduction,
int64_t ignore_index, const at::Tensor &total_weight) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto grad_output_tensor =
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
auto target_tensor =
bridge::GetOrCreateMLIRTensor(target, input_tensor.GetDevice());
auto weight_tensor =
weight.defined()
? bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice())
: bridge::GetOrCreateMLIRTensor(at::ones(self.sizes()[1]),
input_tensor.GetDevice());
auto total_weight_tensor =
bridge::GetOrCreateMLIRTensor(total_weight, input_tensor.GetDevice());
return bridge::AtenFromMLIRTensor(MLIRTensor::nll_loss_backward(
grad_output_tensor, input_tensor, target_tensor, weight_tensor, reduction,
ignore_index, total_weight_tensor));
}
at::Tensor ATenMLIRType::relu(const at::Tensor &self) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(
MLIRTensor::relu(bridge::GetMLIRTensor(self)));
}
at::Tensor &ATenMLIRType::relu_(at::Tensor &self) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto result = bridge::AtenFromMLIRTensor(MLIRTensor::relu_(input_tensor));
MLIRTensorImpl *self_impl =
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
self_impl->shallow_copy_from(result.getIntrusivePtr());
return self;
}
int64_t ATenMLIRType::size(const at::Tensor &self, int64_t dim) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::GetMLIRTensor(self).sizes()[dim];
}
at::Tensor ATenMLIRType::squeeze(const at::Tensor &self, int64_t dim) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(
MLIRTensor::squeeze(bridge::GetMLIRTensor(self), dim));
}
at::Tensor ATenMLIRType::sub(const at::Tensor &self, const at::Tensor &other,
at::Scalar alpha) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
return bridge::AtenFromMLIRTensor(
MLIRTensor::sub(std::get<0>(tensors), std::get<1>(tensors), alpha));
}
at::Tensor &ATenMLIRType::sub_(at::Tensor &self, const at::Tensor &other,
at::Scalar alpha) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
auto result = bridge::AtenFromMLIRTensor(
MLIRTensor::sub_(std::get<0>(tensors), std::get<1>(tensors), alpha));
MLIRTensorImpl *self_impl =
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
self_impl->shallow_copy_from(result.getIntrusivePtr());
return self;
}
at::Tensor ATenMLIRType::sum(const at::Tensor &self, at::IntArrayRef dim,
bool keepdim,
c10::optional<at::ScalarType> dtype) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(
MLIRTensor::sum(bridge::GetMLIRTensor(self), dim, keepdim, dtype));
}
at::Tensor ATenMLIRType::t(const at::Tensor &self) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(MLIRTensor::t(bridge::GetMLIRTensor(self)));
}
at::Tensor ATenMLIRType::threshold_backward(const at::Tensor &grad_output,
const at::Tensor &self,
at::Scalar threshold) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto input_tensor = bridge::GetMLIRTensor(self);
auto grad_output_tensor =
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
return bridge::AtenFromMLIRTensor(MLIRTensor::threshold_backward(
grad_output_tensor, input_tensor, threshold));
}
at::Tensor ATenMLIRType::to(const at::Tensor &self,
const at::TensorOptions &options,
bool /* non_blocking */, bool /* copy */) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
auto self_tensor = bridge::TryGetMLIRTensor(self);
if (!self_tensor) {
assert(options.has_device());
at::ScalarType dtype = options.has_dtype()
? c10::typeMetaToScalarType(options.dtype())
: self.scalar_type();
MLIRTensor xtensor =
MLIRTensor::Create(util::CopyTensor(self, dtype),
bridge::AtenDeviceToMLIRDevice(options.device()));
return bridge::AtenFromMLIRTensor(xtensor);
}
if (options.has_device() && options.device().type() != at::kXLA) {
return bridge::MLIRToAtenTensor(*self_tensor, options);
}
MLIROptions mlir_options(options, self_tensor->GetDevice(),
self_tensor->dtype());
return bridge::AtenFromMLIRTensor(MLIRTensor::to(
*self_tensor, mlir_options.device, mlir_options.scalar_type));
}
at::Tensor ATenMLIRType::to(const at::Tensor &self, c10::Device device,
at::ScalarType dtype, bool non_blocking,
bool copy) {
return to(self, self.options().device(device).dtype(dtype), non_blocking,
copy);
}
at::Tensor ATenMLIRType::to(const at::Tensor &self, at::ScalarType dtype,
bool non_blocking, bool copy) {
return to(self, self.options().dtype(dtype), non_blocking, copy);
}
at::Tensor ATenMLIRType::to(const at::Tensor &self, const at::Tensor &other,
bool non_blocking, bool copy) {
return to(self, other.options(), non_blocking, copy);
}
at::Tensor ATenMLIRType::_unsafe_view(const at::Tensor &self,
at::IntArrayRef size) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(
MLIRTensor::view(bridge::GetMLIRTensor(self), size));
}
at::Tensor ATenMLIRType::unsqueeze(const at::Tensor &self, int64_t dim) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(
MLIRTensor::unsqueeze(bridge::GetMLIRTensor(self), dim));
}
at::Tensor ATenMLIRType::view(const at::Tensor &self, at::IntArrayRef size) {
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
return bridge::AtenFromMLIRTensor(
MLIRTensor::view(bridge::GetMLIRTensor(self), size));
}
} // namespace torch_mlir

View File

@ -1,212 +0,0 @@
//===- aten_mlir_type.h -----------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
// Structured similarly to code from git@github.com:pytorch/xla.git
#pragma once
#include <ATen/Tensor.h>
namespace torch_mlir {
// Base ATEN Type class where the MLIR specific overrides should be defined.
class ATenMLIRType {
public:
static void InitializeAtenBindings();
//////////////////////////////////////////////////////////////////////////////
// ATEN API overrides in alphabetical order.
// Note: The C++ signatures must match the ones listed within the following
// pytorch folder file:
// build/aten/src/ATen/RegistrationDeclarations.h
/////////////////////////////////////////////////////////////////////////////
// The static method definitions here have multiple uses. Each function
// signature here will override the default implementation provided by
// aten_mlir_type_defaults.h. Most of these overrides are used to construct
// a small internal IR that can be used for different purposes. Primarily,
// in this code, the IR will be converted to MLIR. As such there is a often
// a 1:1 correspondance between code here and operations in the ATen MLIR
// dialect.
// This file is parsed by gen_aten_dialect.py to generate
// aten_mlir_type_defaults.*, including the appropriate bindings in that
// file for all pytorch methods.
static at::Tensor _adaptive_avg_pool2d(const at::Tensor &self,
at::IntArrayRef output_size);
static at::Tensor _adaptive_avg_pool2d_backward(const at::Tensor &grad_output,
const at::Tensor &self);
static at::Tensor add(const at::Tensor &self, const at::Tensor &other,
at::Scalar alpha);
static at::Tensor &add_(at::Tensor &self, const at::Tensor &other,
at::Scalar alpha);
static at::Tensor addmm(const at::Tensor &self, const at::Tensor &mat1,
const at::Tensor &mat2, at::Scalar beta,
at::Scalar alpha);
static at::Tensor as_strided(const at::Tensor &self, at::IntArrayRef size,
at::IntArrayRef stride,
c10::optional<int64_t> storage_offset);
static at::Tensor clone(const at::Tensor &self);
static std::tuple<at::Tensor, at::Tensor, at::Tensor>
convolution_backward_overrideable(
const at::Tensor &grad_output, const at::Tensor &input,
const at::Tensor &weight, at::IntArrayRef stride, at::IntArrayRef padding,
at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding,
int64_t groups, std::array<bool, 3> output_mask);
static at::Tensor convolution_overrideable(
const at::Tensor &input, const at::Tensor &weight, const at::Tensor &bias,
at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation,
bool transposed, at::IntArrayRef output_padding, int64_t groups);
static at::Tensor &copy_(at::Tensor &self, const at::Tensor &src,
bool non_blocking);
static at::Tensor _copy_from(const at::Tensor &self, const at::Tensor &dst,
bool non_blocking);
static at::Tensor div(const at::Tensor &self, const at::Tensor &other);
static at::Tensor &div_(at::Tensor &self, const at::Tensor &other);
static at::Tensor div(const at::Tensor &self, at::Scalar other);
static at::Tensor expand(const at::Tensor &self, at::IntArrayRef size,
bool implicit);
static at::Tensor gather(const at::Tensor &self, int64_t dim,
const at::Tensor &index, bool sparse_grad);
static at::Tensor hardtanh(const at::Tensor &self, at::Scalar min_val,
at::Scalar max_val);
static at::Tensor &hardtanh_(at::Tensor &self, at::Scalar min_val,
at::Scalar max_val);
static at::Tensor hardtanh_backward(const at::Tensor &grad_output,
const at::Tensor &self,
at::Scalar min_val, at::Scalar max_val);
static at::Tensor _log_softmax(const at::Tensor &self, int64_t dim,
bool half_to_float);
static at::Tensor _log_softmax_backward_data(const at::Tensor &grad_output,
const at::Tensor &output,
int64_t dim,
const at::Tensor &self);
static std::tuple<at::Tensor, at::Tensor>
max_pool2d_with_indices(const at::Tensor &self, at::IntArrayRef kernel_size,
at::IntArrayRef stride, at::IntArrayRef padding,
at::IntArrayRef dilation, bool ceil_mode);
static at::Tensor max_pool2d_with_indices_backward(
const at::Tensor &grad_output, const at::Tensor &self,
at::IntArrayRef kernel_size, at::IntArrayRef stride,
at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode,
const at::Tensor &indices);
static at::Tensor mean(const at::Tensor &self,
c10::optional<at::ScalarType> dtype);
static at::Tensor mean(const at::Tensor &self, at::IntArrayRef dim,
bool keepdim, c10::optional<at::ScalarType> dtype);
static at::Tensor mm(const at::Tensor &self, const at::Tensor &mat2);
static at::Tensor mul(const at::Tensor &self, const at::Tensor &other);
static at::Tensor &mul_(at::Tensor &self, const at::Tensor &other);
static std::tuple<at::Tensor, at::Tensor, at::Tensor>
native_batch_norm(const at::Tensor &input, const at::Tensor &weight,
const at::Tensor &bias, const at::Tensor &running_mean,
const at::Tensor &running_var, bool training,
double momentum, double eps);
static std::tuple<at::Tensor, at::Tensor, at::Tensor>
native_batch_norm_backward(const at::Tensor &grad_out,
const at::Tensor &input, const at::Tensor &weight,
const at::Tensor &running_mean,
const at::Tensor &running_var,
const at::Tensor &save_mean,
const at::Tensor &save_invstd, bool train,
double eps, std::array<bool, 3> output_mask);
static at::Tensor neg(const at::Tensor &self);
static std::tuple<at::Tensor, at::Tensor>
nll_loss2d_forward(const at::Tensor &self, const at::Tensor &target,
const at::Tensor &weight, int64_t reduction,
int64_t ignore_index);
static at::Tensor nll_loss2d_backward(const at::Tensor &grad_output,
const at::Tensor &self,
const at::Tensor &target,
const at::Tensor &weight,
int64_t reduction, int64_t ignore_index,
const at::Tensor &total_weight);
static std::tuple<at::Tensor, at::Tensor>
nll_loss_forward(const at::Tensor &self, const at::Tensor &target,
const at::Tensor &weight, int64_t reduction,
int64_t ignore_index);
static at::Tensor nll_loss_backward(const at::Tensor &grad_output,
const at::Tensor &self,
const at::Tensor &target,
const at::Tensor &weight,
int64_t reduction, int64_t ignore_index,
const at::Tensor &total_weight);
static at::Tensor relu(const at::Tensor &self);
static at::Tensor &relu_(at::Tensor &self);
static int64_t size(const at::Tensor &self, int64_t dim);
static at::Tensor squeeze(const at::Tensor &self, int64_t dim);
static at::Tensor sub(const at::Tensor &self, const at::Tensor &other,
at::Scalar alpha);
static at::Tensor &sub_(at::Tensor &self, const at::Tensor &other,
at::Scalar alpha);
static at::Tensor sum(const at::Tensor &self, at::IntArrayRef dim,
bool keepdim, c10::optional<at::ScalarType> dtype);
static at::Tensor t(const at::Tensor &self);
static at::Tensor threshold_backward(const at::Tensor &grad_output,
const at::Tensor &self,
at::Scalar threshold);
static at::Tensor to(const at::Tensor &self, const at::TensorOptions &options,
bool non_blocking, bool copy);
static at::Tensor to(const at::Tensor &self, c10::Device device,
at::ScalarType dtype, bool non_blocking, bool copy);
static at::Tensor to(const at::Tensor &self, at::ScalarType dtype,
bool non_blocking, bool copy);
static at::Tensor to(const at::Tensor &self, const at::Tensor &other,
bool non_blocking, bool copy);
static at::Tensor _unsafe_view(const at::Tensor &self, at::IntArrayRef size);
static at::Tensor unsqueeze(const at::Tensor &self, int64_t dim);
static at::Tensor view(const at::Tensor &self, at::IntArrayRef size);
};
} // namespace torch_mlir

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,67 +0,0 @@
//===- device.cpp -----------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
// Structured similarly to code from git@github.com:pytorch/xla.git
#include "device.h"
namespace torch_mlir {
namespace {
std::string DeviceTypeToString(DeviceType hw_type) {
switch (hw_type) {
case DeviceType::CPU:
return "CPU";
case DeviceType::MLIR:
return "MLIR";
}
return "";
}
void ParseDevice(const std::string &device_spec, Device *device) {
if (device_spec.empty()) {
return ParseDevice(std::string("mlir:0"), device);
}
if (device_spec[0] == ':') {
return ParseDevice(std::string("mlir") + device_spec, device);
}
auto pos = device_spec.find(':');
auto devtype = device_spec.substr(0, pos);
// TODO error check
device->ordinal =
std::stoi(device_spec.substr(pos + 1, device_spec.size() - pos - 1));
if (devtype == "MLIR") {
device->hw_type = DeviceType::MLIR;
} else if (devtype == "CPU") {
device->hw_type = DeviceType::CPU;
} else {
// TODO, error
device->hw_type = DeviceType::MLIR;
}
}
} // namespace
Device::Device(const std::string &device_spec) {
ParseDevice(device_spec, this);
}
std::string Device::ToString() const {
return DeviceTypeToString(hw_type) + std::string(":") +
std::to_string(ordinal);
}
const Device *GetDefaultDevice() {
static const Device *default_device = new Device("");
return default_device;
}
} // namespace torch_mlir

View File

@ -1,59 +0,0 @@
//===- device.h -------------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
// Structured similarly to code from git@github.com:pytorch/xla.git
#pragma once
#include <iostream>
#include <string>
namespace torch_mlir {
enum class DeviceType { CPU, MLIR };
/// Model a pytorch device, which determines the location of a buffer in
/// pytorch.
struct Device {
Device() = default;
explicit Device(const std::string &device_spec);
Device(DeviceType hw_type, int ordinal)
: hw_type(hw_type), ordinal(ordinal) {}
bool operator==(const Device &other) const { return compare(other) == 0; }
bool operator!=(const Device &other) const { return compare(other) != 0; }
bool operator<(const Device &rhs) const { return compare(rhs) < 0; }
int compare(const Device &rhs) const {
if (hw_type != rhs.hw_type) {
return hw_type < rhs.hw_type ? -1 : +1;
}
return ordinal < rhs.ordinal ? -1 : (ordinal > rhs.ordinal ? +1 : 0);
}
std::string ToString() const;
friend std::ostream &operator<<(std::ostream &os, const Device &device) {
os << device.ToString();
return os;
}
size_t hash() const { return std::hash<std::string>{}(ToString()); }
DeviceType hw_type = DeviceType::CPU;
int ordinal = 0;
};
const Device *GetDefaultDevice();
static inline const Device &GetDeviceOrDefault(const Device *device) {
return device != nullptr ? *device : *GetDefaultDevice();
}
} // namespace torch_mlir

File diff suppressed because it is too large Load Diff

View File

@ -1,920 +0,0 @@
//===- ir.h -----------------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
#pragma once
// This file defines an intermediate IR generated from a pytorch model.
#include "llvm/Support/raw_ostream.h"
namespace mlir {
class OpBuilder;
class Value;
class Operation;
class MLIRContext;
} // namespace mlir
#include <map>
#include <vector>
#include <ATen/Tensor.h>
#include <ATen/core/interned_strings.h>
#include <c10/core/Scalar.h>
#include <c10/util/ArrayRef.h>
namespace torch_mlir {
namespace ir {
class Node;
void RegisterAtenIR();
using NodePtr = std::shared_ptr<Node>;
struct Value {
Value() = default;
Value(NodePtr node, size_t index = 0) : node(std::move(node)), index(index) {}
operator bool() const { return node != nullptr; }
bool operator==(const Value &rhs) const {
return node == rhs.node && index == rhs.index;
}
bool operator<(const Value &rhs) const {
if (node == rhs.node)
return index < rhs.index;
return node < rhs.node;
}
std::vector<int64_t> sizes() const;
std::vector<int64_t> strides() const;
NodePtr node;
size_t index = 0;
};
struct OpKind {
OpKind() = default;
explicit OpKind(c10::Symbol op) : op(std::move(op)) {}
bool operator==(const OpKind &rhs) const { return op == rhs.op; }
bool operator!=(const OpKind &rhs) const { return !operator==(rhs); }
bool operator<(const OpKind &rhs) const {
return c10::unique_t(op) < c10::unique_t(rhs.op);
}
// size_t hash() const;
std::string ToString() const { return op.toQualString(); }
static OpKind Get(const std::string &name) {
return OpKind(c10::Symbol::fromQualString(name));
}
c10::Symbol op;
};
inline std::ostream &operator<<(std::ostream &stream, const OpKind &op) {
stream << op.ToString();
return stream;
}
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &stream,
const OpKind &op) {
stream << op.ToString();
return stream;
}
using OpList = std::vector<Value>;
class Node {
public:
Node(OpKind op);
Node(OpKind op, OpList operands, std::vector<int64_t> sizes);
Node(OpKind op, OpList operands, at::IntArrayRef sizes);
const OpKind &op() const { return op_; }
virtual std::vector<int64_t> sizes() const { return sizes_[0]; }
virtual std::vector<int64_t> sizes(size_t i) const { return sizes_[0]; }
virtual std::vector<int64_t> strides() const { return strides(sizes()); }
virtual std::vector<int64_t> strides(size_t i) const {
return strides(sizes(i));
}
OpList &operands() { return operands_; }
Value operand(size_t i) const { return operands_.at(i); }
virtual mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable);
private:
std::vector<int64_t> strides(std::vector<int64_t> sz) const;
OpKind op_;
OpList operands_;
std::array<std::vector<int64_t>, 3> sizes_;
// std::array<std::vector<int64_t>, 3> strides_;
};
class ConstantNode : public Node {
public:
ConstantNode(at::Scalar scalar)
: Node(OpKind::Get("aten::constant")), scalar(scalar) {}
ConstantNode(at::IntArrayRef array)
: Node(OpKind::Get("aten::constant")), array(array.begin(), array.end()) {
}
ConstantNode(bool bool_)
: Node(OpKind::Get("aten::constant")), bool_(bool_) {}
ConstantNode(int int_) : Node(OpKind::Get("aten::constant")), int_(int_) {}
ConstantNode(int64_t int_)
: Node(OpKind::Get("aten::constant")), int_(int_) {}
ConstantNode(float float_)
: Node(OpKind::Get("aten::constant")), float_(float_) {}
ConstantNode(double double_)
: Node(OpKind::Get("aten::constant")), double_(double_) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override { return {1}; }
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
private:
c10::optional<at::Scalar> scalar;
std::vector<int64_t> array;
c10::optional<bool> bool_;
c10::optional<int> int_;
c10::optional<float> float_;
c10::optional<double> double_;
};
class AdaptiveAvgPool2dNode : public Node {
public:
AdaptiveAvgPool2dNode(Value input, at::IntArrayRef kernel_size)
: Node(OpKind::Get("aten::_adaptive_avg_pool2d"),
OpList{input,
ir::Value(std::make_shared<ir::ConstantNode>(kernel_size))},
std::vector<int64_t>{input.sizes()[0], input.sizes()[1],
kernel_size[0], kernel_size[1]}) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class AdaptiveAvgPool2dBackwardNode : public Node {
public:
AdaptiveAvgPool2dBackwardNode(Value grad_output, Value self)
: Node(OpKind::Get("aten::_adaptive_avg_pool2d_backward"),
OpList{grad_output, self}, self.sizes()) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class AddNode : public Node {
public:
AddNode(Value rhs, Value lhs, Value alpha)
: Node(OpKind::Get("aten::add"), OpList{rhs, lhs, alpha}, rhs.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class AddInPlaceNode : public Node {
public:
AddInPlaceNode(Value self, Value other, Value alpha)
: Node(OpKind::Get("aten::add_"), OpList{self, other, alpha},
self.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class AddmmNode : public Node {
public:
AddmmNode(Value input, Value mat1, Value mat2, Value beta, Value alpha)
: Node(OpKind::Get("aten::addmm"), OpList{input, mat1, mat2, beta, alpha},
std::vector<int64_t>{mat1.sizes()[0], mat2.sizes()[1]}){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class AsStridedNode : public Node {
public:
AsStridedNode(Value input, at::IntArrayRef size, at::IntArrayRef stride,
c10::optional<int64_t> storage_offset)
: Node(OpKind::Get("aten::as_strided"),
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(size)),
ir::Value(std::make_shared<ir::ConstantNode>(stride))},
input.sizes()),
size(size.begin(), size.end()), stride(stride.begin(), stride.end()),
storage_offset(storage_offset) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override;
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
std::vector<int64_t> strides() const override { return stride; }
std::vector<int64_t> strides(size_t i) const override { return strides(); }
std::vector<int64_t> size;
std::vector<int64_t> stride;
c10::optional<int64_t> storage_offset;
};
class BatchNormNode : public Node {
public:
BatchNormNode(Value input, Value weight, Value bias, Value running_mean,
Value running_var, bool training, double momentum, double eps)
: Node(OpKind::Get("aten::native_batch_norm"),
OpList{
input, weight, bias, running_mean, running_var,
ir::Value(std::make_shared<ir::ConstantNode>(training)),
ir::Value(std::make_shared<ir::ConstantNode>((float)momentum)),
ir::Value(std::make_shared<ir::ConstantNode>((float)eps))},
input.sizes()),
training(training), momentum(momentum), eps(eps) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
private:
bool training;
double momentum;
double eps;
};
class BatchNormBackwardNode : public Node {
public:
BatchNormBackwardNode(Value grad_out, Value input, Value weight,
Value running_mean, Value running_var, Value save_mean,
Value save_invstd, bool train, double eps,
std::array<bool, 3> output_mask)
: Node(OpKind::Get("aten::native_batch_norm_backward"),
OpList{grad_out, input, weight, running_mean, running_var,
save_mean, save_invstd,
ir::Value(std::make_shared<ir::ConstantNode>(train)),
ir::Value(std::make_shared<ir::ConstantNode>((float)eps))},
input.sizes()),
train(train), eps(eps), output_mask(output_mask) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override {
assert(0 && "Cannot call sizes() for multiple outputs");
}
std::vector<int64_t> sizes(size_t i) const override;
private:
bool train;
double eps;
std::array<bool, 3> output_mask;
};
class Conv2dNode : public Node {
public:
Conv2dNode(Value input, Value weight, Value bias, at::IntArrayRef stride,
at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed,
at::IntArrayRef output_padding, int64_t groups)
: Node(OpKind::Get("aten::_convolution"),
OpList{
input, weight, bias,
ir::Value(std::make_shared<ir::ConstantNode>(stride)),
ir::Value(std::make_shared<ir::ConstantNode>(padding)),
ir::Value(std::make_shared<ir::ConstantNode>(dilation)),
ir::Value(std::make_shared<ir::ConstantNode>(transposed)),
ir::Value(std::make_shared<ir::ConstantNode>(output_padding)),
ir::Value(std::make_shared<ir::ConstantNode>(groups))},
input.sizes()),
stride(stride.begin(), stride.end()),
padding(padding.begin(), padding.end()),
dilation(dilation.begin(), dilation.end()), transposed(transposed),
output_padding(output_padding.begin(), output_padding.end()),
groups(groups), has_bias(true) {}
Conv2dNode(Value input, Value weight, at::IntArrayRef stride,
at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed,
at::IntArrayRef output_padding, int64_t groups)
: Node(OpKind::Get("aten::_convolution"),
OpList{
input, weight,
ir::Value(std::make_shared<ir::ConstantNode>(stride)),
ir::Value(std::make_shared<ir::ConstantNode>(padding)),
ir::Value(std::make_shared<ir::ConstantNode>(dilation)),
ir::Value(std::make_shared<ir::ConstantNode>(transposed)),
ir::Value(std::make_shared<ir::ConstantNode>(output_padding)),
ir::Value(std::make_shared<ir::ConstantNode>(groups))},
input.sizes()),
stride(stride.begin(), stride.end()),
padding(padding.begin(), padding.end()),
dilation(dilation.begin(), dilation.end()), transposed(transposed),
output_padding(output_padding.begin(), output_padding.end()),
groups(groups), has_bias(false) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override;
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
private:
std::vector<int64_t> stride;
std::vector<int64_t> padding;
std::vector<int64_t> dilation;
bool transposed;
std::vector<int64_t> output_padding;
int64_t groups;
bool has_bias;
};
class Conv2dBackwardNode : public Node {
public:
Conv2dBackwardNode(Value grad_output, Value input, Value weight,
at::IntArrayRef stride, at::IntArrayRef padding,
at::IntArrayRef dilation, bool transposed,
at::IntArrayRef output_padding, int64_t groups)
: Node(OpKind::Get("aten::_convolution_backward"),
OpList{
grad_output, input, weight,
ir::Value(std::make_shared<ir::ConstantNode>(stride)),
ir::Value(std::make_shared<ir::ConstantNode>(padding)),
ir::Value(std::make_shared<ir::ConstantNode>(dilation)),
ir::Value(std::make_shared<ir::ConstantNode>(transposed)),
ir::Value(std::make_shared<ir::ConstantNode>(output_padding)),
ir::Value(std::make_shared<ir::ConstantNode>(groups))},
input.sizes()),
stride(stride.begin(), stride.end()),
padding(padding.begin(), padding.end()),
dilation(dilation.begin(), dilation.end()), transposed(transposed),
output_padding(output_padding.begin(), output_padding.end()),
groups(groups) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override {
assert(0 && "Cannot call sizes() for multiple outputs");
}
std::vector<int64_t> sizes(size_t i) const override;
private:
std::vector<int64_t> stride;
std::vector<int64_t> padding;
std::vector<int64_t> dilation;
bool transposed;
std::vector<int64_t> output_padding;
int64_t groups;
};
class DivNode : public Node {
public:
DivNode(Value rhs, Value lhs)
: Node(OpKind::Get("aten::div"), OpList{rhs, lhs}, rhs.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class DivInPlaceNode : public Node {
public:
DivInPlaceNode(Value self, Value other)
: Node(OpKind::Get("aten::div_"), OpList{self, other}, self.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class ExpandNode : public Node {
public:
ExpandNode(Value input, at::IntArrayRef size, bool implicit)
: Node(OpKind::Get("aten::expand"),
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(size)),
ir::Value(std::make_shared<ir::ConstantNode>(implicit))},
input.sizes()),
output_size(size.begin(), size.end()), implicit(implicit) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override { return output_size; }
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
private:
std::vector<int64_t> output_size;
bool implicit;
};
class GatherNode : public Node {
public:
GatherNode(Value input, int64_t dim, Value index, bool sparse_grad)
: Node(OpKind::Get("aten::gather"),
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim)),
index,
ir::Value(std::make_shared<ir::ConstantNode>(sparse_grad))},
input.sizes()) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class HardtanhNode : public Node {
public:
HardtanhNode(Value self, Value min_val, Value max_val)
: Node(OpKind::Get("aten::hardtanh"), OpList{self, min_val, max_val},
self.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class HardtanhInPlaceNode : public Node {
public:
HardtanhInPlaceNode(Value self, Value min_val, Value max_val)
: Node(OpKind::Get("aten::hardtanh_"), OpList{self, min_val, max_val},
self.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class HardtanhBackwardNode : public Node {
public:
HardtanhBackwardNode(Value grad_output, Value self, Value min_val,
Value max_val)
: Node(OpKind::Get("aten::hardtanh_backward"),
OpList{grad_output, self, min_val, max_val}, self.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class LogSoftmaxNode : public Node {
public:
LogSoftmaxNode(Value input, int64_t dim, bool half_to_float)
: Node(OpKind::Get("aten::_log_softmax"),
OpList{
input, ir::Value(std::make_shared<ir::ConstantNode>(dim)),
ir::Value(std::make_shared<ir::ConstantNode>(half_to_float))},
input.sizes()),
dim(dim), half_to_float(half_to_float) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
private:
int64_t dim;
bool half_to_float;
};
class LogSoftmaxBackwardNode : public Node {
public:
LogSoftmaxBackwardNode(Value grad_output, Value output, int64_t dim,
Value input)
: Node(OpKind::Get("aten::_log_softmax_backward_data"),
OpList{grad_output, output,
ir::Value(std::make_shared<ir::ConstantNode>(dim)), input},
input.sizes()),
dim(dim) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
private:
int64_t dim;
};
class MaxPool2dWithIndicesNode : public Node {
public:
MaxPool2dWithIndicesNode(Value input, at::IntArrayRef kernel_size,
at::IntArrayRef stride, at::IntArrayRef padding,
at::IntArrayRef dilation, bool ceil_mode)
: Node(OpKind::Get("aten::max_pool2d_with_indices"),
OpList{input,
ir::Value(std::make_shared<ir::ConstantNode>(kernel_size)),
ir::Value(std::make_shared<ir::ConstantNode>(stride)),
ir::Value(std::make_shared<ir::ConstantNode>(padding)),
ir::Value(std::make_shared<ir::ConstantNode>(dilation)),
ir::Value(std::make_shared<ir::ConstantNode>(ceil_mode))},
input.sizes()),
kernel_size(kernel_size.begin(), kernel_size.end()),
stride(stride.begin(), stride.end()),
padding(padding.begin(), padding.end()),
dilation(dilation.begin(), dilation.end()), ceil_mode(ceil_mode){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override {
assert(0 && "Cannot call sizes() for multiple outputs");
}
std::vector<int64_t> sizes(size_t i) const override;
private:
std::vector<int64_t> kernel_size;
std::vector<int64_t> stride;
std::vector<int64_t> padding;
std::vector<int64_t> dilation;
bool ceil_mode;
};
class MaxPool2dWithIndicesBackwardNode : public Node {
public:
MaxPool2dWithIndicesBackwardNode(Value grad_output, Value input,
at::IntArrayRef kernel_size,
at::IntArrayRef stride,
at::IntArrayRef padding,
at::IntArrayRef dilation, bool ceil_mode,
Value indices)
: Node(OpKind::Get("aten::max_pool2d_with_indices_backward"),
OpList{grad_output, input,
ir::Value(std::make_shared<ir::ConstantNode>(kernel_size)),
ir::Value(std::make_shared<ir::ConstantNode>(stride)),
ir::Value(std::make_shared<ir::ConstantNode>(padding)),
ir::Value(std::make_shared<ir::ConstantNode>(dilation)),
ir::Value(std::make_shared<ir::ConstantNode>(ceil_mode)),
indices},
input.sizes()),
kernel_size(kernel_size.begin(), kernel_size.end()),
stride(stride.begin(), stride.end()),
padding(padding.begin(), padding.end()),
dilation(dilation.begin(), dilation.end()), ceil_mode(ceil_mode){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
private:
std::vector<int64_t> kernel_size;
std::vector<int64_t> stride;
std::vector<int64_t> padding;
std::vector<int64_t> dilation;
bool ceil_mode;
};
class MeanNode : public Node {
public:
MeanNode(Value input, at::IntArrayRef dim, bool keepdim,
c10::optional<at::ScalarType> dtype)
: Node(OpKind::Get("aten::mean"),
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim)),
ir::Value(std::make_shared<ir::ConstantNode>(keepdim))},
input.sizes()),
dim(dim.begin(), dim.end()), keepdim(keepdim), dtype(dtype) {}
MeanNode(Value input, c10::optional<at::ScalarType> dtype)
: Node(OpKind::Get("aten::mean"), OpList{input}, input.sizes()),
dtype(dtype) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override;
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
private:
std::vector<int64_t> dim;
bool keepdim;
c10::optional<at::ScalarType> dtype;
};
class MMNode : public Node {
public:
MMNode(Value input, Value mat2)
: Node(OpKind::Get("aten::mm"), OpList{input, mat2},
std::vector<int64_t>{input.sizes()[0], mat2.sizes()[1]}){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class MulNode : public Node {
public:
MulNode(Value rhs, Value lhs)
: Node(OpKind::Get("aten::mul"), OpList{rhs, lhs}, rhs.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class MulInPlaceNode : public Node {
public:
MulInPlaceNode(Value self, Value other)
: Node(OpKind::Get("aten::mul_"), OpList{self, other}, self.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class NegNode : public Node {
public:
NegNode(Value input)
: Node(OpKind::Get("aten::neg"), OpList{input}, input.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class NllLoss2dForwardNode : public Node {
public:
NllLoss2dForwardNode(Value self, Value target, Value weight,
int64_t reduction, int64_t ignore_index)
: Node(
OpKind::Get("aten::nll_loss2d_forward"),
OpList{self, target, weight,
ir::Value(std::make_shared<ir::ConstantNode>(reduction)),
ir::Value(std::make_shared<ir::ConstantNode>(ignore_index))},
1 /*target.sizes()*/),
reduction(reduction), ignore_index(ignore_index) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
private:
int64_t reduction;
int64_t ignore_index;
};
class NllLoss2dBackwardNode : public Node {
public:
NllLoss2dBackwardNode(Value grad_output, Value self, Value target,
Value weight, int64_t reduction, int64_t ignore_index,
Value total_weight)
: Node(OpKind::Get("aten::nll_loss2d_backward"),
OpList{grad_output, self, target, weight,
ir::Value(std::make_shared<ir::ConstantNode>(reduction)),
ir::Value(std::make_shared<ir::ConstantNode>(ignore_index)),
total_weight},
self.sizes()),
reduction(reduction), ignore_index(ignore_index) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
private:
int64_t reduction;
int64_t ignore_index;
};
class NllLossForwardNode : public Node {
public:
NllLossForwardNode(Value self, Value target, Value weight, int64_t reduction,
int64_t ignore_index)
: Node(
OpKind::Get("aten::nll_loss_forward"),
OpList{self, target, weight,
ir::Value(std::make_shared<ir::ConstantNode>(reduction)),
ir::Value(std::make_shared<ir::ConstantNode>(ignore_index))},
1 /*target.sizes()*/),
reduction(reduction), ignore_index(ignore_index) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
private:
int64_t reduction;
int64_t ignore_index;
};
class NllLossBackwardNode : public Node {
public:
NllLossBackwardNode(Value grad_output, Value self, Value target, Value weight,
int64_t reduction, int64_t ignore_index,
Value total_weight)
: Node(OpKind::Get("aten::nll_loss_backward"),
OpList{grad_output, self, target, weight,
ir::Value(std::make_shared<ir::ConstantNode>(reduction)),
ir::Value(std::make_shared<ir::ConstantNode>(ignore_index)),
total_weight},
self.sizes()),
reduction(reduction), ignore_index(ignore_index) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
private:
int64_t reduction;
int64_t ignore_index;
};
class SumNode : public Node {
public:
SumNode(Value input, at::IntArrayRef dim, bool keepdim,
c10::optional<at::ScalarType> dtype)
: Node(OpKind::Get("aten::sum"),
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim)),
ir::Value(std::make_shared<ir::ConstantNode>(keepdim))},
input.sizes()),
dim(dim.begin(), dim.end()), keepdim(keepdim), dtype(dtype) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override;
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
private:
std::vector<int64_t> dim;
bool keepdim;
c10::optional<at::ScalarType> dtype;
};
class ReLUNode : public Node {
public:
ReLUNode(Value input)
: Node(OpKind::Get("aten::relu"), OpList{input}, input.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class ReLUInPlaceNode : public Node {
public:
ReLUInPlaceNode(Value input)
: Node(OpKind::Get("aten::relu_"), OpList{input}, input.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class ThresholdBackwardNode : public Node {
public:
ThresholdBackwardNode(Value grad_output, Value input, Value threshold)
: Node(OpKind::Get("aten::threshold_backward"),
OpList{grad_output, input, threshold}, input.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class TransposeNode : public Node {
public:
TransposeNode(Value input)
: Node(OpKind::Get("aten::t"), OpList{input},
std::vector<int64_t>{input.sizes()[1], input.sizes()[0]}){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class SizeNode : public Node {
public:
SizeNode(Value input, int64_t dim)
: Node(OpKind::Get("aten::size"),
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim))},
1),
dim(dim) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
private:
int64_t dim;
};
class SqueezeNode : public Node {
public:
SqueezeNode(Value input, int64_t dim)
: Node(OpKind::Get("aten::squeeze"),
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim))},
input.sizes()),
dim(dim) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override;
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
private:
int64_t dim;
};
class SubNode : public Node {
public:
SubNode(Value rhs, Value lhs, Value alpha)
: Node(OpKind::Get("aten::sub"), OpList{rhs, lhs, alpha}, rhs.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class SubInPlaceNode : public Node {
public:
SubInPlaceNode(Value self, Value other, Value alpha)
: Node(OpKind::Get("aten::sub_"), OpList{self, other, alpha},
self.sizes()){};
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
};
class UnsqueezeNode : public Node {
public:
UnsqueezeNode(Value input, int64_t dim)
: Node(OpKind::Get("aten::unsqueeze"),
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim))},
input.sizes()),
dim(dim) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override;
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
private:
int64_t dim;
};
class ViewNode : public Node {
public:
ViewNode(Value input, at::IntArrayRef size)
: Node(OpKind::Get("aten::view"),
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(size))},
input.sizes()),
view_size(size.begin(), size.end()) {}
mlir::Operation *
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
std::map<const ir::Value, mlir::Value> &symbolTable) override;
std::vector<int64_t> sizes() const override;
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
private:
std::vector<int64_t> view_size;
};
class TorchDataNode : public Node {
public:
TorchDataNode(at::Tensor tensor)
: Node(ir::OpKind::Get("aten::torch_data"), {}, tensor.sizes()),
tensor_(std::move(tensor)) {}
at::Tensor tensor() { return tensor_; }
private:
at::Tensor tensor_;
};
} // namespace ir
} // namespace torch_mlir

View File

@ -1,333 +0,0 @@
//===- jit.cpp --------------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
// This file drives the generation and lowering of MLIR, followed by JIT
// compiling the resulting LLVM dialect.
#include "npcomp/Dialect/ATen/ATenDialect.h"
#include "npcomp/Dialect/ATen/ATenPasses.h"
#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/ExecutionEngine/ExecutionEngine.h"
#include "mlir/ExecutionEngine/JitRunner.h"
#include "mlir/ExecutionEngine/OptUtils.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/Location.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/Module.h"
#include "mlir/IR/StandardTypes.h"
#include "mlir/IR/Types.h"
#include "mlir/IR/Verifier.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Target/LLVMIR.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
#include <dlfcn.h>
#include "ATen/ArrayRef.h"
namespace at {
template <typename T> using ArrayRef = c10::ArrayRef<T>;
}
#include "ATen/Tensor.h"
#include <ATen/CPUType.h>
#include "jit.h"
#include "mlir_gen.h"
#include "tensor.h"
#include "torch_util.h"
#define DEBUG_TYPE "torch_mlir"
using namespace mlir;
namespace torch_mlir {
namespace {
int LowerATenDialect(mlir::ModuleOp module) {
PassManager pm0(module.getContext());
pm0.addPass(mlir::createCSEPass());
// Lower to function calls.
pm0.addPass(mlir::NPCOMP::aten::createATenLoweringPass());
pm0.addPass(mlir::NPCOMP::aten::createReturnEliminationPass());
if (failed(pm0.run(module))) {
llvm::errs() << "aten to loops conversion failed ";
return 1;
}
PassManager pm1(module.getContext());
pm1.addPass(mlir::createLowerAffinePass());
pm1.addPass(mlir::createLowerToCFGPass());
pm1.addPass(mlir::createCSEPass());
if (failed(pm1.run(module))) {
llvm::errs() << "loops to std conversion failed ";
return 1;
}
return 0;
}
int LowerStdDialect(mlir::ModuleOp module) {
PassManager pm(module.getContext());
struct LowerToLLVMOptions options;
options.emitCWrappers = true;
LLVM_DEBUG(module.print(llvm::outs()));
pm.addPass(mlir::createLowerToLLVMPass(options));
pm.addPass(mlir::createCSEPass());
LLVM_DEBUG(module.print(llvm::outs()));
if (failed(pm.run(module))) {
llvm::errs() << "std to llvm conversion failed ";
return 1;
}
if (!module)
return 1;
return 0;
}
template <typename T, int N> struct llvm_tensor_t {
T *d;
T *aligned;
size_t offset;
size_t shape[N];
size_t stride[N];
};
template <typename T, int N> void *setupArg(at::Tensor &t) {
llvm_tensor_t<T, N> *arg = new llvm_tensor_t<T, N>;
llvm_tensor_t<T, N> **arg_storage = new llvm_tensor_t<T, N> *;
*arg_storage = arg;
arg->d = arg->aligned = (T *)t.data_ptr();
arg->offset = 0;
assert(t.dim() == N);
for (int j = 0; j < N; j++) {
arg->shape[j] = t.sizes()[j];
arg->stride[j] = t.stride(j);
}
return (void *)arg_storage;
}
at::Tensor LowerAndRun(mlir::ModuleOp module,
std::vector<at::Tensor> &arguments, const ir::Value &v,
mlir::MLIRContext &context) {
LowerATenDialect(module);
LowerStdDialect(module);
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
Optional<llvm::CodeGenOpt::Level> jitCodeGenOptLevel =
llvm::CodeGenOpt::Level::Aggressive;
std::string libpath;
if (const char *path = std::getenv("TEST_BUILD_PATH")) {
libpath = path;
}
std::vector<std::string> sharedLibs{libpath +
"/frontends/pytorch/lib/libaten_ops.so"};
llvm::errs() << "Loading " << sharedLibs[0] << "\n";
llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
llvm::SmallVector<llvm::StringRef, 1> libs(sharedLibs.begin(),
sharedLibs.end());
auto expectedEngine = mlir::ExecutionEngine::create(
module, {}, jitCodeGenOptLevel, libs, false, false, false);
assert(expectedEngine && "no engine, cannot fly");
llvm::StringRef entryPoint("_mlir_ciface_graph");
auto engine = std::move(*expectedEngine);
auto expectedFPtr = engine->lookup(entryPoint);
assert(expectedFPtr && "entryPoint missing");
void (*fptr)(void **) = *expectedFPtr;
// this array holds pointers to the function arguments
void **args = (void **)malloc((arguments.size() + 1) * sizeof(void *));
// allocate and setup the function arguments
for (int i = 0, e = arguments.size(); i < e; i++) {
at::Tensor &t = arguments[i];
auto dtype = t.dtype();
int dim = t.dim();
if (dim == 4) {
if (dtype == at::kFloat)
args[i] = setupArg<float, 4>(t);
else if (dtype == at::kLong)
args[i] = setupArg<uint64_t, 4>(t);
else
assert(0);
} else if (dim == 3) {
if (dtype == at::kFloat)
args[i] = setupArg<float, 3>(t);
else if (dtype == at::kLong)
args[i] = setupArg<uint64_t, 3>(t);
else
assert(0);
} else if (dim == 2) {
if (dtype == at::kFloat)
args[i] = setupArg<float, 2>(t);
else if (dtype == at::kLong)
args[i] = setupArg<uint64_t, 2>(t);
else
assert(0);
} else if (dim == 1) {
if (dtype == at::kFloat)
args[i] = setupArg<float, 1>(t);
else if (dtype == at::kLong)
args[i] = setupArg<uint64_t, 1>(t);
else
assert(0);
} else {
assert(0 && "unhandled dim");
}
}
// allocate the result tensors
// TODO: num results > 1
at::Tensor result = util::Zeros(v.sizes(), at::kFloat);
if (result.dim() == 4) {
args[arguments.size()] = setupArg<float, 4>(result);
} else if (result.dim() == 3) {
args[arguments.size()] = setupArg<float, 3>(result);
} else if (result.dim() == 2) {
args[arguments.size()] = setupArg<float, 2>(result);
} else if (result.dim() == 1) {
args[arguments.size()] = setupArg<float, 1>(result);
} else {
assert(0 && "unhandled dim");
}
// call the JITed function
fptr(args);
// free pointers to the results
// TODO: num results > 1
if (result.dim() == 4) {
auto arg_storage =
static_cast<llvm_tensor_t<float, 4> **>(args[arguments.size()]);
auto arg = *arg_storage;
delete arg;
delete arg_storage;
} else if (result.dim() == 3) {
auto arg_storage =
static_cast<llvm_tensor_t<float, 3> **>(args[arguments.size()]);
auto arg = *arg_storage;
delete arg;
delete arg_storage;
} else if (result.dim() == 2) {
auto arg_storage =
static_cast<llvm_tensor_t<float, 2> **>(args[arguments.size()]);
auto arg = *arg_storage;
delete arg;
delete arg_storage;
} else if (result.dim() == 1) {
auto arg_storage =
static_cast<llvm_tensor_t<float, 1> **>(args[arguments.size()]);
auto arg = *arg_storage;
delete arg;
delete arg_storage;
} else {
assert(0 && "unhandled dim");
}
// free pointers to the arguments
for (int i = 0, e = arguments.size(); i < e; i++) {
at::Tensor &t = arguments[i];
int dim = t.dim();
if (dim == 4) {
auto arg_storage = static_cast<llvm_tensor_t<float, 4> **>(args[i]);
auto arg = *arg_storage;
delete arg;
delete arg_storage;
} else if (dim == 3) {
auto arg_storage = static_cast<llvm_tensor_t<float, 3> **>(args[i]);
auto arg = *arg_storage;
delete arg;
delete arg_storage;
} else if (dim == 2) {
auto arg_storage = static_cast<llvm_tensor_t<float, 2> **>(args[i]);
auto arg = *arg_storage;
delete arg;
delete arg_storage;
} else if (dim == 1) {
auto arg_storage = static_cast<llvm_tensor_t<float, 1> **>(args[i]);
auto arg = *arg_storage;
delete arg;
delete arg_storage;
} else {
assert(0 && "unhandled dim");
}
}
// free the array of void* ptrs
free(args);
return result;
}
at::Tensor JitAndRun(const ir::Value &v, mlir::MLIRContext &context) {
// generate the MLIR
std::vector<ir::Value> vs{v};
auto mlir_gen = MLIRGen(context).genModule(vs);
mlir::OwningModuleRef module = std::move(std::get<0>(mlir_gen));
std::vector<at::Tensor> arguments = std::move(std::get<1>(mlir_gen));
return LowerAndRun(module.get(), arguments, v, context);
}
at::Tensor JitAndRun(const ir::Value &v) {
mlir::MLIRContext context;
return JitAndRun(v, context);
}
at::Tensor Interpret(const ir::Value &v) { assert(0 && "unsupported"); }
} // anonymous namespace
// FIXME: Why is this code here and not in tensor.cpp?
std::string MLIRTensor::GetMLIR() const {
// generate the MLIR
mlir::MLIRContext context;
ir::Value ir_value = CurrentIrValue();
if (!ir_value)
return "<tensor>";
std::vector<ir::Value> vs{ir_value};
auto mlir_gen = MLIRGen(context).genModule(vs);
mlir::OwningModuleRef module = std::move(std::get<0>(mlir_gen));
std::string aten;
llvm::raw_string_ostream ss(aten);
module->print(ss);
return ss.str();
}
at::Tensor MLIRTensor::CompileAndRun() const {
return JitAndRun(CurrentIrValue());
}
} // namespace torch_mlir

View File

@ -1,16 +0,0 @@
//===- jit.h ----------------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
#pragma once
namespace torch_mlir {
// namespace jit {
// at::Tensor CompileAndRun(const MLIRTensor &tensor);
// at::Tensor JitAndRun(const ir::Value &v);
//}
} // namespace torch_mlir

View File

@ -1,214 +0,0 @@
//===- mlir_gen.cpp ---------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/Location.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/Module.h"
#include "mlir/IR/StandardTypes.h"
#include "mlir/IR/Types.h"
#include "mlir/IR/Verifier.h"
#include "llvm/Support/Debug.h"
#include "npcomp/Dialect/ATen/ATenDialect.h"
#include "ATen/ArrayRef.h"
namespace at {
template <typename T> using ArrayRef = c10::ArrayRef<T>;
}
#include "ATen/Tensor.h"
#include "ir.h"
#include "mlir_gen.h"
#include <set>
#include <vector>
#define DEBUG_TYPE "torch_mlir"
namespace torch_mlir {
MLIRGen::MLIRGen(mlir::MLIRContext &context) : context(context) {
context.getOrLoadDialect<mlir::NPCOMP::aten::ATenDialect>();
context.getOrLoadDialect<mlir::StandardOpsDialect>();
}
std::tuple<mlir::OwningModuleRef, std::vector<at::Tensor>>
MLIRGen::genModule(std::vector<ir::Value> &v) {
// the module
module = mlir::ModuleOp::create(mlir::UnknownLoc::get(&context));
auto fn = genFunction(v);
if (fn) {
module->push_back(fn);
if (failed(mlir::verify(*module))) {
emitError(mlir::UnknownLoc::get(&context), "module verification error");
}
}
return std::make_tuple(std::move(module), arguments);
}
mlir::Value MLIRGen::genValue(const ir::Value &v) {
if (symbolTable.count(v))
return symbolTable[v];
LLVM_DEBUG(llvm::dbgs() << "genValue node: " << v.node->op() << "\n");
ir::NodePtr node = v.node;
auto loc = mlir::UnknownLoc::get(&context);
for (auto &operand : node->operands())
genValue(operand);
mlir::Value mlirValue = nullptr;
if (opTable.count(v.node)) {
mlirValue = opTable[v.node]->getResult(v.index);
} else {
mlir::Operation *mlirOp = node->genMLIR(builder, context, symbolTable);
opTable.insert({v.node, mlirOp});
assert(mlirOp && "failed to generate mlir op");
mlirValue = mlirOp->getResult(v.index);
}
declareSymbol(v, mlirValue);
return mlirValue;
}
// generate function parameters for the IR rooted at v
void MLIRGen::genParameters(const ir::Value &v, std::set<ir::Value> &visited) {
ir::NodePtr node = v.node;
if (visited.count(v))
return;
visited.insert(v);
for (const ir::Value &operand : node->operands()) {
// if the operand is a leaf
if (operand.node->op() == ir::OpKind::Get("aten::torch_data")) {
parameters.push_back(operand);
} else {
genParameters(operand, visited);
}
}
}
mlir::FuncOp MLIRGen::genFunction(std::vector<ir::Value> &vs) {
auto loc = mlir::UnknownLoc::get(&context);
auto gen_tensor_ty = [&](const ir::Value &v) {
auto shape = v.sizes();
auto tdn = dynamic_cast<ir::TorchDataNode *>(v.node.get());
mlir::Type elemTy;
if (tdn) {
auto dtype = tdn->tensor().dtype();
if (dtype == at::kFloat)
elemTy = mlir::FloatType::getF32(&context);
else if (dtype == at::kDouble)
elemTy = mlir::FloatType::getF64(&context);
else if (dtype == at::kLong)
elemTy = mlir::IntegerType::get(64, &context);
else if (dtype == at::kInt)
elemTy = mlir::IntegerType::get(32, &context);
else if (dtype == at::kShort)
elemTy = mlir::IntegerType::get(16, &context);
else if (dtype == at::kChar || dtype == at::kByte)
elemTy = mlir::IntegerType::get(8, &context);
else {
std::cout << tdn->tensor().dtype() << "\n";
assert(0 && "bad type");
}
} else {
elemTy = mlir::FloatType::getF32(&context);
}
return mlir::RankedTensorType::get(shape, elemTy);
};
std::set<ir::Value> visited;
for (auto &v : vs)
genParameters(v, visited);
std::map<ir::Value, ir::Value> parameter_map;
std::vector<ir::Value> unique_parameters;
for (const ir::Value &p : parameters) {
bool found = false;
for (const ir::Value &q : unique_parameters) {
if (p.node->op() == ir::OpKind::Get("aten::torch_data") &&
q.node->op() == ir::OpKind::Get("aten::torch_data")) {
auto &ptd = *dynamic_cast<ir::TorchDataNode *>(p.node.get());
auto &qtd = *dynamic_cast<ir::TorchDataNode *>(q.node.get());
if (ptd.tensor().is_same(qtd.tensor())) {
found = true;
parameter_map.insert({p, q});
break;
}
}
}
if (!found) {
unique_parameters.push_back(p);
}
}
// collect the argument types and tensors
std::vector<mlir::Type> arg_types;
for (const ir::Value &p : unique_parameters) {
// tensor type for the function signature
arg_types.push_back(gen_tensor_ty(p));
// tensor itself for actually calling the graph
auto tdn = dynamic_cast<ir::TorchDataNode *>(p.node.get());
arguments.push_back(tdn->tensor());
}
// construct return type
std::vector<mlir::Type> ret_types;
for (auto &v : vs)
ret_types.push_back(gen_tensor_ty(v));
// create the function type and the function itself
auto func_type = mlir::FunctionType::get(arg_types, ret_types, &context);
auto function =
mlir::FuncOp::create(loc, "graph", func_type, /* attrs = */ {});
// entry
auto &entryBlock = *function.addEntryBlock();
// Declare all the function arguments in the symbol table.
for (const auto &i :
llvm::zip(unique_parameters, entryBlock.getArguments())) {
declareSymbol(std::get<0>(i), std::get<1>(i));
}
// Declare all the duplicates from the original
// parameter list in the symbol table
for (auto &k_v : parameter_map) {
assert(symbolTable.count(k_v.second));
declareSymbol(k_v.first, symbolTable[k_v.second]);
}
builder = std::make_unique<mlir::OpBuilder>(function.getBody());
std::vector<mlir::Value> rets;
for (auto &v : vs)
rets.push_back(genValue(v));
builder->create<mlir::ReturnOp>(loc, rets);
return function;
}
bool MLIRGen::declareSymbol(const ir::Value &irValue, mlir::Value mlirValue) {
if (symbolTable.count(irValue)) {
return false;
}
symbolTable.insert({irValue, mlirValue});
return true;
}
} // namespace torch_mlir

View File

@ -1,45 +0,0 @@
//===- mlir_gen.h -----------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
#pragma once
#include "mlir/IR/MLIRContext.h"
#include "ir.h"
namespace torch_mlir {
/// This class generates MLIR from a pytorch graph
class MLIRGen {
public:
MLIRGen(mlir::MLIRContext &context);
// Generate an MLIR model that computes the given outputs.
std::tuple<mlir::OwningModuleRef, std::vector<at::Tensor>>
genModule(std::vector<ir::Value> &v);
private:
mlir::Value genValue(const ir::Value &v);
void genParameters(const ir::Value &v, std::set<ir::Value> &visited);
mlir::FuncOp genFunction(std::vector<ir::Value> &v);
bool declareSymbol(const ir::Value &irValue, mlir::Value mlirValue);
private:
mlir::MLIRContext &context;
mlir::OwningModuleRef module;
std::unique_ptr<mlir::OpBuilder> builder;
std::map<const ir::Value, mlir::Value> symbolTable;
std::map<const ir::NodePtr, mlir::Operation *> opTable;
std::vector<ir::Value> parameters;
std::vector<at::Tensor> arguments;
};
} // namespace torch_mlir

View File

@ -1,137 +0,0 @@
//===- init_python_bindings.cpp ---------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
// This file implements Python bindings to the MLIR/NPCOMP ATen dialect.
// Roughly speaking, it enables something like this:
//
// dev = torch_mlir.mlir_device()
// t0 = torch.randn((4,4), device=dev)
// t1 = torch.randn((4,4), device=dev)
// t2 = t0 + t1
// t2_mlir = torch_mlir.get_mlir( t2 )
// t2_cpu = t2.to('cpu')
//
// In this case t2_cpu contains the result of the computation, and t2_mlir
// contains the mlir description of the computation.
#include "../pybind.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/Module.h"
#include "mlir/IR/Verifier.h"
#include "mlir/Parser.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Transforms/Passes.h"
#include "npcomp/Dialect/ATen/ATenDialect.h"
#include "npcomp/Dialect/ATen/ATenOpReport.h"
#include "npcomp/Dialect/ATen/ATenPasses.h"
#include "npcomp/Dialect/ATen/LivenessReport.h"
namespace py = pybind11;
// Then ATen headers with workarounds
#include "ATen/ArrayRef.h"
namespace at {
template <typename T> using ArrayRef = c10::ArrayRef<T>;
}
#include "ATen/SmallVector.h"
namespace at {
template <typename T, int S> using SmallVector = c10::SmallVector<T, S>;
}
#include <ATen/Tensor.h>
// other headers
#include "aten_mlir_bridge.h"
#include "aten_mlir_type.h"
#include "init_python_bindings.h"
#include "mlir_gen.h"
#include <string>
using namespace mlir;
namespace llvm {
extern bool DebugFlag;
}
namespace torch_mlir {
namespace {
mlir::OwningModuleRef LoadModule(mlir::MLIRContext &context, std::string mlir) {
mlir::OwningModuleRef module;
std::unique_ptr<llvm::MemoryBuffer> membuf =
llvm::MemoryBuffer::getMemBuffer(mlir);
llvm::SourceMgr sourceMgr;
sourceMgr.AddNewSourceBuffer(std::move(membuf), llvm::SMLoc());
module = mlir::parseSourceFile(sourceMgr, &context);
if (!module) {
llvm::errs() << "Error can't parse mlir module\n";
return nullptr;
}
if (failed(mlir::verify(*module))) {
llvm::errs() << "Error verifying MLIR module\n";
return nullptr;
}
if (!module)
return nullptr;
return module;
}
void InitModuleBindings(py::module &m) {
m.def("_initialize_aten_bindings",
[]() { ATenMLIRType::InitializeAtenBindings(); });
m.def("_set_default_device", []() {});
m.def("_get_mlir", [](std::vector<at::Tensor> &ts) -> std::string {
if (ts.size() == 0)
return std::string();
mlir::MLIRContext context;
// gather IR for all the tensors
std::vector<ir::Value> recorded_ir;
for (auto &t : ts)
if (c10::optional<MLIRTensor> at = bridge::TryGetMLIRTensor(t))
recorded_ir.push_back(at->GetIrValue());
// generate MLIR from IR
auto mlir_gen = MLIRGen(context).genModule(recorded_ir);
mlir::OwningModuleRef module = std::move(std::get<0>(mlir_gen));
mlir::PassManager pm(module->getContext());
pm.addPass(mlir::createCSEPass());
pm.addPass(mlir::NPCOMP::aten::createATenLayerNamePass());
if (failed(pm.run(*module))) {
llvm::errs() << "ATenLayerNamePass failed";
return "<error>";
}
// dump MLIR to string and return
std::string s;
llvm::raw_string_ostream ss(s);
module->print(ss);
return ss.str();
});
}
} // namespace
void InitTypeDispatchBindings(py::module &m) { InitModuleBindings(m); }
} // namespace torch_mlir

View File

@ -1,613 +0,0 @@
//===- tensor.cpp -----------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/Debug.h"
#include "ATen/ArrayRef.h"
namespace at {
template <typename T> using ArrayRef = c10::ArrayRef<T>;
}
#include "ATen/Tensor.h"
#include "jit.h"
#include "tensor.h"
#include <atomic>
#define DEBUG_TYPE "torch_mlir"
namespace torch_mlir {
MLIRTensor MLIRTensor::Create(const at::Tensor &tensor, const Device &device) {
assert(tensor.device().type() == at::kCPU);
MLIRTensor device_tensor(tensor, device);
return device_tensor;
}
MLIRTensor
MLIRTensor::Create(ir::Value ir_value, const Device &device,
c10::optional<at::ScalarType> logical_element_type) {
MLIRTensor device_tensor(std::move(ir_value), device, logical_element_type);
return device_tensor;
}
MLIRTensor::MLIRTensor(const at::Tensor &tensor, const Device &device)
: data_(std::make_shared<Data>(tensor, device)) {}
MLIRTensor::MLIRTensor(ir::Value ir_value, const Device &device,
c10::optional<at::ScalarType> logical_element_type)
: data_(std::make_shared<Data>(std::move(ir_value), device,
logical_element_type)) {}
MLIRTensor::Data *MLIRTensor::data() const {
assert(data_ != nullptr && "Trying to access null data");
return data_.get();
}
at::ScalarType MLIRTensor::dtype() const {
return data()->logical_element_type ? *data()->logical_element_type
: at::ScalarType::Float;
}
const Device &MLIRTensor::GetDevice() const { return data()->device; }
uint64_t MLIRTensor::GetNextTensorId() {
static std::atomic<uint64_t> *id_generator = new std::atomic<uint64_t>(1);
return id_generator->fetch_add(1);
}
void MLIRTensor::SetTensorData(at::Tensor tensor_data) {
data()->tensor_data = std::move(tensor_data);
}
ir::Value MLIRTensor::GetIrValue() const {
ir::Value ir_value = CurrentIrValue();
if (ir_value) {
return ir_value;
}
c10::optional<at::Tensor> tensor_data = CurrentTensorData();
if (tensor_data) {
at::Tensor tensor = *tensor_data;
if (!tensor.dim()) {
auto dtype = tensor.dtype();
if (dtype == at::kFloat) {
auto d = tensor.data_ptr<float>();
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
} else if (dtype == at::kDouble) {
auto d = tensor.data_ptr<double>();
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
} else if (dtype == at::kLong) {
auto d = tensor.data_ptr<int64_t>();
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
} else if (dtype == at::kInt) {
auto d = tensor.data_ptr<int32_t>();
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
} else if (dtype == at::kShort) {
auto d = tensor.data_ptr<int16_t>();
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
} else if (dtype == at::kChar || dtype == at::kByte) {
auto d = tensor.data_ptr<int8_t>();
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
}
// fall through to TorchDataNode below
}
return ir::Value(std::make_shared<ir::TorchDataNode>(*tensor_data));
}
assert(0 && "Could not create ir value from leaf tensor");
return ir::Value();
}
ir::Value MLIRTensor::CurrentIrValue() const { return data()->ir_value; }
void MLIRTensor::SetIrValue(ir::Value ir_value) {
data()->generation += 1;
data()->ir_value = std::move(ir_value);
}
c10::optional<at::Tensor> MLIRTensor::CurrentTensorData() const {
return data()->tensor_data;
}
void MLIRTensor::SetTensor(at::Tensor tensor) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
SetTensorData(tensor);
data()->generation += 1;
}
at::Tensor MLIRTensor::ToTensor() const {
c10::optional<at::Tensor> tensor_data = CurrentTensorData();
if (!tensor_data)
tensor_data = CompileAndRun();
assert(tensor_data);
return *tensor_data;
}
void MLIRTensor::ShallowCopyTo(MLIRTensor *dest) const {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
auto data = CurrentTensorData();
if (data)
dest->SetTensor(*data);
else
dest->SetIrValue(CurrentIrValue());
dest->SetScalarType(dtype());
assert(GetDevice() == dest->GetDevice());
}
void MLIRTensor::SetScalarType(
c10::optional<at::ScalarType> logical_element_type) {
data()->logical_element_type = logical_element_type;
}
std::vector<int64_t> MLIRTensor::sizes() const {
if (data()->ir_value) {
return data()->ir_value.sizes();
}
assert(data()->tensor_data && "tensor has no shape information");
if (data()->tensor_data) {
auto s = data()->tensor_data->sizes();
return {s.begin(), s.end()};
}
return {};
}
std::vector<int64_t> MLIRTensor::strides() const {
if (data()->ir_value) {
return data()->ir_value.strides();
}
assert(data()->tensor_data && "tensor has no shape information");
if (data()->tensor_data) {
auto s = data()->tensor_data->strides();
return {s.begin(), s.end()};
}
return {};
}
MLIRTensor MLIRTensor::CreateFrom(ir::Value ir_value) const {
return Create(std::move(ir_value), GetDevice(), dtype());
}
////////////////////////////////////////////
// aten tensor methods
////////////////////////////////////////////
MLIRTensor MLIRTensor::_adaptive_avg_pool2d(const MLIRTensor &self,
at::IntArrayRef output_size) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::AdaptiveAvgPool2dNode>(
self.GetIrValue(), output_size);
return self.CreateFrom(node);
}
MLIRTensor
MLIRTensor::_adaptive_avg_pool2d_backward(const MLIRTensor &grad_output,
const MLIRTensor &self) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::AdaptiveAvgPool2dBackwardNode>(
grad_output.GetIrValue(), self.GetIrValue());
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::add(const MLIRTensor &self, const MLIRTensor &other,
at::Scalar alpha) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::AddNode>(
self.GetIrValue(), other.GetIrValue(),
ir::Value(std::make_shared<ir::ConstantNode>(alpha)));
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::add_(MLIRTensor &self, const MLIRTensor &other,
at::Scalar alpha) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::AddInPlaceNode>(
self.GetIrValue(), other.GetIrValue(),
ir::Value(std::make_shared<ir::ConstantNode>(alpha)));
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::addmm(const MLIRTensor &input, const MLIRTensor &mat1,
const MLIRTensor &mat2, at::Scalar beta,
at::Scalar alpha) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::AddmmNode>(
input.GetIrValue(), mat1.GetIrValue(), mat2.GetIrValue(),
ir::Value(std::make_shared<ir::ConstantNode>(beta)),
ir::Value(std::make_shared<ir::ConstantNode>(alpha)));
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::as_strided(const MLIRTensor &input, at::IntArrayRef size,
at::IntArrayRef stride,
c10::optional<int64_t> storage_offset) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::AsStridedNode>(
input.GetIrValue(), size, stride, storage_offset);
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::clone(const MLIRTensor &input) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
return MLIRTensor::Create(std::move(input.ToTensor()), input.GetDevice());
}
MLIRTensor MLIRTensor::convolution(
const MLIRTensor &input, const MLIRTensor &weight, const MLIRTensor &bias,
at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation,
bool transposed, at::IntArrayRef output_padding, int64_t groups) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::Conv2dNode>(
input.GetIrValue(), weight.GetIrValue(), bias.GetIrValue(), stride,
padding, dilation, transposed, output_padding, groups);
return input.CreateFrom(node);
}
std::tuple<MLIRTensor, MLIRTensor, MLIRTensor> MLIRTensor::convolution_backward(
const MLIRTensor &grad_output, const MLIRTensor &input,
const MLIRTensor &weight, at::IntArrayRef stride, at::IntArrayRef padding,
at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding,
int64_t groups, std::array<bool, 3> output_mask) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::Conv2dBackwardNode>(
grad_output.GetIrValue(), input.GetIrValue(), weight.GetIrValue(), stride,
padding, dilation, transposed, output_padding, groups /*, output_mask*/);
auto result0 = input.CreateFrom(ir::Value(node, 0));
auto result1 = input.CreateFrom(ir::Value(node, 1));
auto result2 = input.CreateFrom(ir::Value(node, 2));
return std::make_tuple(result0, result1, result2);
}
void MLIRTensor::copy_(MLIRTensor &self, MLIRTensor &src) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
src.ShallowCopyTo(&self);
}
MLIRTensor MLIRTensor::div(const MLIRTensor &self, at::Scalar other) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::DivNode>(
self.GetIrValue(), ir::Value(std::make_shared<ir::ConstantNode>(other)));
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::div(const MLIRTensor &self, const MLIRTensor &other) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::DivNode>(self.GetIrValue(), other.GetIrValue());
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::div_(MLIRTensor &self, const MLIRTensor &other) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::DivInPlaceNode>(
self.GetIrValue(), other.GetIrValue());
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::expand(const MLIRTensor &self, at::IntArrayRef size,
bool implicit) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::ExpandNode>(self.GetIrValue(), size, implicit);
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::gather(const MLIRTensor &self, int64_t dim,
const MLIRTensor &index, bool sparse_grad) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::GatherNode>(
self.GetIrValue(), dim, index.GetIrValue(), sparse_grad);
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::hardtanh(const MLIRTensor &self, at::Scalar min_val,
at::Scalar max_val) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::HardtanhNode>(
self.GetIrValue(), ir::Value(std::make_shared<ir::ConstantNode>(min_val)),
ir::Value(std::make_shared<ir::ConstantNode>(max_val)));
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::hardtanh_(MLIRTensor &self, at::Scalar min_val,
at::Scalar max_val) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::HardtanhInPlaceNode>(
self.GetIrValue(), ir::Value(std::make_shared<ir::ConstantNode>(min_val)),
ir::Value(std::make_shared<ir::ConstantNode>(max_val)));
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::hardtanh_backward(const MLIRTensor &grad_output,
const MLIRTensor &self,
at::Scalar min_val,
at::Scalar max_val) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::HardtanhBackwardNode>(
grad_output.GetIrValue(), self.GetIrValue(),
ir::Value(std::make_shared<ir::ConstantNode>(min_val)),
ir::Value(std::make_shared<ir::ConstantNode>(max_val)));
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::_log_softmax(const MLIRTensor &input, int64_t dim,
bool half_to_float) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::LogSoftmaxNode>(
input.GetIrValue(), dim, half_to_float);
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::_log_softmax_backward_data(const MLIRTensor &grad_output,
const MLIRTensor &output,
int64_t dim,
const MLIRTensor &input) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::LogSoftmaxBackwardNode>(
grad_output.GetIrValue(), output.GetIrValue(), dim, input.GetIrValue());
return input.CreateFrom(node);
}
std::tuple<MLIRTensor, MLIRTensor> MLIRTensor::max_pool2d_with_indices(
const MLIRTensor &input, at::IntArrayRef kernel_size,
at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation,
bool ceil_mode) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::MaxPool2dWithIndicesNode>(
input.GetIrValue(), kernel_size, stride, padding, dilation,
ceil_mode);
auto result0 = input.CreateFrom(ir::Value(node, 0));
auto result1 = input.CreateFrom(ir::Value(node, 1));
return std::make_tuple(result0, result1);
}
MLIRTensor MLIRTensor::max_pool2d_with_indices_backward(
const MLIRTensor &grad_output, const MLIRTensor &input,
at::IntArrayRef kernel_size, at::IntArrayRef stride,
at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode,
const MLIRTensor &indices) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::MaxPool2dWithIndicesBackwardNode>(
grad_output.GetIrValue(), input.GetIrValue(), kernel_size, stride,
padding, dilation, ceil_mode, indices.GetIrValue());
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::mean(const MLIRTensor &input,
c10::optional<at::ScalarType> dtype) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::MeanNode>(input.GetIrValue(), dtype);
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::mean(const MLIRTensor &input, at::IntArrayRef dim,
bool keepdim, c10::optional<at::ScalarType> dtype) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::MeanNode>(input.GetIrValue(), dim, keepdim, dtype);
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::mm(const MLIRTensor &input, const MLIRTensor &mat1) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::MMNode>(input.GetIrValue(), mat1.GetIrValue());
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::mul(const MLIRTensor &self, const MLIRTensor &other) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::MulNode>(self.GetIrValue(), other.GetIrValue());
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::mul_(MLIRTensor &self, const MLIRTensor &other) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::MulInPlaceNode>(
self.GetIrValue(), other.GetIrValue());
return self.CreateFrom(node);
}
std::tuple<MLIRTensor, MLIRTensor, MLIRTensor> MLIRTensor::native_batch_norm(
const MLIRTensor &self, const MLIRTensor &weight, const MLIRTensor &bias,
const MLIRTensor &running_mean, const MLIRTensor &running_var,
bool training, double momentum, double eps) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::BatchNormNode>(
self.GetIrValue(), weight.GetIrValue(), bias.GetIrValue(),
running_mean.GetIrValue(), running_var.GetIrValue(), training, momentum,
eps);
auto result0 = self.CreateFrom(ir::Value(node, 0));
auto result1 = self.CreateFrom(ir::Value(node, 1));
auto result2 = self.CreateFrom(ir::Value(node, 2));
return std::make_tuple(result0, result1, result2);
}
std::tuple<MLIRTensor, MLIRTensor, MLIRTensor>
MLIRTensor::native_batch_norm_backward(
const MLIRTensor &grad_out, const MLIRTensor &input,
const MLIRTensor &weight, const MLIRTensor &running_mean,
const MLIRTensor &running_var, const MLIRTensor &save_mean,
const MLIRTensor &save_invstd, bool train, double eps,
std::array<bool, 3> output_mask) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::BatchNormBackwardNode>(
grad_out.GetIrValue(), input.GetIrValue(), weight.GetIrValue(),
running_mean.GetIrValue(), running_var.GetIrValue(),
save_mean.GetIrValue(), save_invstd.GetIrValue(), train, eps,
output_mask);
auto result0 = input.CreateFrom(ir::Value(node, 0));
auto result1 = input.CreateFrom(ir::Value(node, 1));
auto result2 = input.CreateFrom(ir::Value(node, 2));
return std::make_tuple(result0, result1, result2);
}
MLIRTensor MLIRTensor::neg(const MLIRTensor &input) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::NegNode>(input.GetIrValue());
return input.CreateFrom(node);
}
std::tuple<MLIRTensor, MLIRTensor>
MLIRTensor::nll_loss2d_forward(const MLIRTensor &self, const MLIRTensor &target,
const MLIRTensor &weight, int64_t reduction,
int64_t ignore_index) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::NllLoss2dForwardNode>(
self.GetIrValue(), target.GetIrValue(), weight.GetIrValue(), reduction,
ignore_index);
auto result0 = self.CreateFrom(ir::Value(node, 0));
auto result1 = self.CreateFrom(ir::Value(node, 1));
return std::make_tuple(result0, result1);
}
MLIRTensor MLIRTensor::nll_loss2d_backward(
const MLIRTensor &grad_output, const MLIRTensor &self,
const MLIRTensor &target, const MLIRTensor &weight, int64_t reduction,
int64_t ignore_index, const MLIRTensor &total_weight) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::NllLoss2dBackwardNode>(
grad_output.GetIrValue(), self.GetIrValue(), target.GetIrValue(),
weight.GetIrValue(), reduction, ignore_index, total_weight.GetIrValue());
return self.CreateFrom(node);
}
std::tuple<MLIRTensor, MLIRTensor>
MLIRTensor::nll_loss_forward(const MLIRTensor &self, const MLIRTensor &target,
const MLIRTensor &weight, int64_t reduction,
int64_t ignore_index) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::NllLossForwardNode>(
self.GetIrValue(), target.GetIrValue(), weight.GetIrValue(), reduction,
ignore_index);
auto result0 = self.CreateFrom(ir::Value(node, 0));
auto result1 = self.CreateFrom(ir::Value(node, 1));
return std::make_tuple(result0, result1);
}
MLIRTensor MLIRTensor::nll_loss_backward(
const MLIRTensor &grad_output, const MLIRTensor &self,
const MLIRTensor &target, const MLIRTensor &weight, int64_t reduction,
int64_t ignore_index, const MLIRTensor &total_weight) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::NllLossBackwardNode>(
grad_output.GetIrValue(), self.GetIrValue(), target.GetIrValue(),
weight.GetIrValue(), reduction, ignore_index, total_weight.GetIrValue());
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::sum(const MLIRTensor &input, at::IntArrayRef dim,
bool keepdim, c10::optional<at::ScalarType> dtype) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::SumNode>(input.GetIrValue(), dim, keepdim, dtype);
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::relu(const MLIRTensor &input) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::ReLUNode>(input.GetIrValue());
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::relu_(MLIRTensor &input) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::ReLUInPlaceNode>(input.GetIrValue());
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::size(const MLIRTensor &input, int64_t dim) {
assert(0);
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::SizeNode>(input.GetIrValue(), dim);
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::squeeze(const MLIRTensor &input, int64_t dim) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::SqueezeNode>(input.GetIrValue(), dim);
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::sub(const MLIRTensor &self, const MLIRTensor &other,
at::Scalar alpha) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::SubNode>(
self.GetIrValue(), other.GetIrValue(),
ir::Value(std::make_shared<ir::ConstantNode>(alpha)));
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::sub_(MLIRTensor &self, const MLIRTensor &other,
at::Scalar alpha) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::SubInPlaceNode>(
self.GetIrValue(), other.GetIrValue(),
ir::Value(std::make_shared<ir::ConstantNode>(alpha)));
return self.CreateFrom(node);
}
MLIRTensor MLIRTensor::t(const MLIRTensor &input) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::TransposeNode>(input.GetIrValue());
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::threshold_backward(const MLIRTensor &grad_output,
const MLIRTensor &input,
at::Scalar threshold) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node = std::make_shared<ir::ThresholdBackwardNode>(
grad_output.GetIrValue(), input.GetIrValue(),
ir::Value(std::make_shared<ir::ConstantNode>(threshold)));
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::to(MLIRTensor &input, c10::optional<Device> device,
c10::optional<at::ScalarType> scalar_type) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
if (!device) {
device = input.GetDevice();
}
if (!scalar_type) {
scalar_type = input.dtype();
}
MLIRTensor new_tensor = Create(input.ToTensor(), *device);
if (input.dtype() != *scalar_type) {
new_tensor.SetScalarType(*scalar_type);
}
return new_tensor;
}
MLIRTensor MLIRTensor::unsqueeze(const MLIRTensor &input, int64_t dim) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::UnsqueezeNode>(input.GetIrValue(), dim);
return input.CreateFrom(node);
}
MLIRTensor MLIRTensor::view(const MLIRTensor &input, at::IntArrayRef size) {
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
std::shared_ptr<ir::Node> node =
std::make_shared<ir::ViewNode>(input.GetIrValue(), size);
return input.CreateFrom(node);
}
} // namespace torch_mlir

View File

@ -1,275 +0,0 @@
//===- tensor.h -------------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
#pragma once
#include "device.h"
#include "ir.h"
#include <cstdint>
#include <ATen/Tensor.h>
#include <c10/util/ArrayRef.h>
namespace torch_mlir {
class MLIRTensor {
struct Data;
public:
static MLIRTensor Create(const at::Tensor &tensor, const Device &device);
static MLIRTensor Create(ir::Value ir_value, const Device &device,
c10::optional<at::ScalarType> logical_element_type);
MLIRTensor() = default;
bool is_null() const { return data_ptr() == nullptr; }
void ShallowCopyTo(MLIRTensor *dest) const;
void SetTensor(at::Tensor tensor);
void SetIrValue(ir::Value ir_value);
at::ScalarType dtype() const;
// Set logical_element_type which is visible to upstream PyTorch.
void SetScalarType(c10::optional<at::ScalarType> logical_element_type);
std::vector<int64_t> sizes() const;
std::vector<int64_t> strides() const;
at::Tensor ToTensor() const;
const Device &GetDevice() const;
size_t generation() const { return data()->generation; }
std::string GetMLIR() const;
// Retrieves the IR Node representing this MLIRTensor. One will be created if
// missing. Note that although this is a const API, it actually changes the
// internal state of the object.
ir::Value GetIrValue() const;
at::Tensor CompileAndRun() const;
uint64_t id() const { return data()->unique_id; }
private:
struct Data {
Data(at::Tensor tensor_data, const Device &device)
: logical_element_type(tensor_data.scalar_type()),
tensor_data(std::move(tensor_data)), device(device),
unique_id(GetNextTensorId()) {}
Data(ir::Value ir_value, const Device &device,
c10::optional<at::ScalarType> logical_element_type)
: logical_element_type(logical_element_type),
ir_value(std::move(ir_value)), device(device),
unique_id(GetNextTensorId()) {}
~Data(){};
c10::optional<at::ScalarType> logical_element_type;
c10::optional<at::Tensor> tensor_data;
ir::Value ir_value;
const Device device;
const uint64_t unique_id = 0;
size_t generation = 1;
};
MLIRTensor(const at::Tensor &tensor, const Device &device);
MLIRTensor(ir::Value ir_value, const Device &device,
c10::optional<at::ScalarType> logical_element_type = c10::nullopt);
void SetTensorData(at::Tensor tensor_data);
c10::optional<at::Tensor> CurrentTensorData() const;
// Retrieves the current IR Node, or nullptr in case no active IR Node is
// available.
ir::Value CurrentIrValue() const;
Data *data() const;
std::shared_ptr<Data> data_ptr() const { return data_; }
MLIRTensor CreateFrom(ir::Value ir_value) const;
static uint64_t GetNextTensorId();
std::shared_ptr<Data> data_;
//////////////////////////////////////////////////////////////////////////////
// ATEN operators follows here, listed in alphabetical order.
//////////////////////////////////////////////////////////////////////////////
public:
static MLIRTensor _adaptive_avg_pool2d(const MLIRTensor &self,
at::IntArrayRef output_size);
static MLIRTensor _adaptive_avg_pool2d_backward(const MLIRTensor &grad_output,
const MLIRTensor &self);
static MLIRTensor add(const MLIRTensor &input, const MLIRTensor &other,
at::Scalar alpha);
static MLIRTensor add_(MLIRTensor &input, const MLIRTensor &other,
at::Scalar alpha);
static MLIRTensor addmm(const MLIRTensor &input, const MLIRTensor &mat1,
const MLIRTensor &mat2, at::Scalar beta,
at::Scalar alpha);
static MLIRTensor as_strided(const MLIRTensor &self, at::IntArrayRef size,
at::IntArrayRef stride,
c10::optional<int64_t> storage_offset);
static MLIRTensor clone(const MLIRTensor &self);
static MLIRTensor convolution(const MLIRTensor &input,
const MLIRTensor &weight,
const MLIRTensor &bias, at::IntArrayRef stride,
at::IntArrayRef padding,
at::IntArrayRef dilation, bool transposed,
at::IntArrayRef output_padding, int64_t groups);
static std::tuple<MLIRTensor, MLIRTensor, MLIRTensor>
convolution_backward(const MLIRTensor &grad_output, const MLIRTensor &input,
const MLIRTensor &weight, at::IntArrayRef stride,
at::IntArrayRef padding, at::IntArrayRef dilation,
bool transposed, at::IntArrayRef output_padding,
int64_t groups, std::array<bool, 3> output_mask);
static void copy_(MLIRTensor &input, MLIRTensor &src);
static MLIRTensor div(const MLIRTensor &self, at::Scalar other);
static MLIRTensor div(const MLIRTensor &self, const MLIRTensor &other);
static MLIRTensor div_(MLIRTensor &self, const MLIRTensor &other);
static MLIRTensor expand(const MLIRTensor &self, at::IntArrayRef size,
bool implicit);
static MLIRTensor gather(const MLIRTensor &self, int64_t dim,
const MLIRTensor &index, bool sparse_grad);
static MLIRTensor hardtanh(const MLIRTensor &self, at::Scalar min_val,
at::Scalar max_val);
static MLIRTensor hardtanh_(MLIRTensor &self, at::Scalar min_val,
at::Scalar max_val);
static MLIRTensor hardtanh_backward(const MLIRTensor &grad_output,
const MLIRTensor &self,
at::Scalar min_val, at::Scalar max_val);
static MLIRTensor _log_softmax(const MLIRTensor &input, int64_t dim,
bool half_to_float);
static MLIRTensor _log_softmax_backward_data(const MLIRTensor &grad_output,
const MLIRTensor &output,
int64_t dim,
const MLIRTensor &self);
static std::tuple<MLIRTensor, MLIRTensor>
max_pool2d_with_indices(const MLIRTensor &input, at::IntArrayRef kernel_size,
at::IntArrayRef stride, at::IntArrayRef padding,
at::IntArrayRef dilation, bool ceil_mode);
static MLIRTensor max_pool2d_with_indices_backward(
const MLIRTensor &grad_output, const MLIRTensor &self,
at::IntArrayRef kernel_size, at::IntArrayRef stride,
at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode,
const MLIRTensor &indices);
static MLIRTensor mean(const MLIRTensor &input,
c10::optional<at::ScalarType> dtype);
static MLIRTensor mean(const MLIRTensor &input, at::IntArrayRef dim,
bool keepdim, c10::optional<at::ScalarType> dtype);
static MLIRTensor mm(const MLIRTensor &input, const MLIRTensor &mat1);
static MLIRTensor mul(const MLIRTensor &self, const MLIRTensor &other);
static MLIRTensor mul_(MLIRTensor &self, const MLIRTensor &other);
static std::tuple<MLIRTensor, MLIRTensor, MLIRTensor>
native_batch_norm(const MLIRTensor &input, const MLIRTensor &weight,
const MLIRTensor &bias, const MLIRTensor &running_mean,
const MLIRTensor &running_var, bool training,
double momentum, double eps);
static std::tuple<MLIRTensor, MLIRTensor, MLIRTensor>
native_batch_norm_backward(const MLIRTensor &grad_out,
const MLIRTensor &input, const MLIRTensor &weight,
const MLIRTensor &running_mean,
const MLIRTensor &running_var,
const MLIRTensor &save_mean,
const MLIRTensor &save_invstd, bool train,
double eps, std::array<bool, 3> output_mask);
static MLIRTensor neg(const MLIRTensor &input);
static std::tuple<MLIRTensor, MLIRTensor>
nll_loss2d_forward(const MLIRTensor &self, const MLIRTensor &target,
const MLIRTensor &weight, int64_t reduction,
int64_t ignore_index);
static MLIRTensor nll_loss2d_backward(const MLIRTensor &grad_output,
const MLIRTensor &self,
const MLIRTensor &target,
const MLIRTensor &weight,
int64_t reduction, int64_t ignore_index,
const MLIRTensor &total_weight);
static std::tuple<MLIRTensor, MLIRTensor>
nll_loss_forward(const MLIRTensor &self, const MLIRTensor &target,
const MLIRTensor &weight, int64_t reduction,
int64_t ignore_index);
static MLIRTensor nll_loss_backward(const MLIRTensor &grad_output,
const MLIRTensor &self,
const MLIRTensor &target,
const MLIRTensor &weight,
int64_t reduction, int64_t ignore_index,
const MLIRTensor &total_weight);
static MLIRTensor size(const MLIRTensor &self, int64_t dim);
static MLIRTensor squeeze(const MLIRTensor &self, int64_t dim);
static MLIRTensor sub(const MLIRTensor &input, const MLIRTensor &other,
at::Scalar alpha);
static MLIRTensor sub_(MLIRTensor &input, const MLIRTensor &other,
at::Scalar alpha);
static MLIRTensor sum(const MLIRTensor &self, at::IntArrayRef dim,
bool keepdim, c10::optional<at::ScalarType> dtype);
static MLIRTensor relu(const MLIRTensor &input);
static MLIRTensor relu_(MLIRTensor &input);
static MLIRTensor t(const MLIRTensor &input);
static MLIRTensor threshold_backward(const MLIRTensor &grad_output,
const MLIRTensor &self,
at::Scalar threshold);
static MLIRTensor to(MLIRTensor &input, c10::optional<Device> device,
c10::optional<at::ScalarType> scalar_type);
static MLIRTensor unsqueeze(const MLIRTensor &self, int64_t dim);
static MLIRTensor view(const MLIRTensor &input, at::IntArrayRef size);
};
} // namespace torch_mlir

View File

@ -1,156 +0,0 @@
//===- tensor_impl.cpp ------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
#include "tensor_impl.h"
#include "aten_mlir_bridge.h"
#include <c10/core/impl/DeviceGuardImplInterface.h>
#include <c10/macros/Macros.h>
namespace torch_mlir {
namespace {
thread_local c10::Device g_current_device(at::DeviceType::XLA, 0);
struct MLIRGuardImpl : public c10::impl::DeviceGuardImplInterface {
at::DeviceType type() const override { return at::DeviceType::XLA; }
c10::Device exchangeDevice(c10::Device device) const override {
std::swap(g_current_device, device);
return device;
}
c10::Device getDevice() const override { return g_current_device; }
void setDevice(c10::Device device) const override {
g_current_device = device;
}
void uncheckedSetDevice(c10::Device device) const noexcept override {
g_current_device = device;
}
c10::Stream getStream(c10::Device device) const noexcept override {
return c10::Stream(c10::Stream::DEFAULT, device);
}
c10::Stream exchangeStream(c10::Stream s) const noexcept override {
return c10::Stream(c10::Stream::DEFAULT, g_current_device);
}
c10::DeviceIndex deviceCount() const noexcept override { return 0; }
};
C10_REGISTER_GUARD_IMPL(XLA, MLIRGuardImpl);
} // namespace
MLIRTensorImpl::MLIRTensorImpl(MLIRTensor tensor)
: c10::TensorImpl(c10::XLATensorId(), GetTypeMeta(tensor),
bridge::MLIRDeviceToAtenDevice(tensor.GetDevice())),
tensor_(std::move(tensor)) {}
c10::intrusive_ptr<c10::TensorImpl> MLIRTensorImpl::shallow_copy_and_detach(
const c10::VariableVersion &version_counter,
bool allow_tensor_metadata_change) const {
// std::cout << "MLIRTensorImpl::" << __func__ << std::endl;
auto impl = c10::make_intrusive<MLIRTensorImpl>(tensor_);
copy_tensor_metadata(
/*src_impl=*/this,
/*dest_impl=*/impl.get(),
/*version_counter=*/version_counter,
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
return impl;
}
void MLIRTensorImpl::shallow_copy_from(
const c10::intrusive_ptr<TensorImpl> &impl) {
// std::cout << "MLIRTensorImpl::" << __func__ << std::endl;
MLIRTensorImpl *tensor_impl = dynamic_cast<MLIRTensorImpl *>(impl.get());
copy_tensor_metadata(
/*src_impl=*/tensor_impl,
/*dest_impl=*/this,
/*version_counter=*/version_counter(),
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change());
tensor_impl->tensor_.ShallowCopyTo(&tensor_);
generation_ = 0;
}
at::IntArrayRef MLIRTensorImpl::sizes() const {
const_cast<MLIRTensorImpl *>(this)->SetupSizeProperties();
return c10::TensorImpl::sizes();
}
at::IntArrayRef MLIRTensorImpl::strides() const {
const_cast<MLIRTensorImpl *>(this)->SetupSizeProperties();
return c10::TensorImpl::strides();
}
int64_t MLIRTensorImpl::dim() const {
const_cast<MLIRTensorImpl *>(this)->SetupSizeProperties();
return c10::TensorImpl::dim();
}
int64_t MLIRTensorImpl::numel() const {
const_cast<MLIRTensorImpl *>(this)->SetupSizeProperties();
return c10::TensorImpl::numel();
}
bool MLIRTensorImpl::is_contiguous(at::MemoryFormat memory_format) const {
// Only check that the storage is already contiguous.
assert(is_contiguous_ && "Non-contiguous storage for MLIR tensor");
return true;
}
int64_t MLIRTensorImpl::size(int64_t d) const {
const_cast<MLIRTensorImpl *>(this)->SetupSizeProperties();
return c10::TensorImpl::size(d);
}
void MLIRTensorImpl::SetupSizeProperties() {
size_t generation = tensor_.generation();
if (generation != generation_) {
// Fill up the basic dimension data members which the base class
// implementation uses in its APIs.
auto sizes = tensor_.sizes();
auto strides = tensor_.strides();
strides_.clear();
sizes_.clear();
numel_ = 1;
for (auto t : llvm::zip(sizes, strides)) {
auto size = std::get<0>(t);
sizes_.push_back(size);
strides_.push_back(std::get<1>(t));
numel_ *= size;
}
generation_ = generation;
}
}
caffe2::TypeMeta MLIRTensorImpl::GetTypeMeta(const MLIRTensor &tensor) {
return c10::scalarTypeToTypeMeta(tensor.dtype());
}
c10::Device MLIRTensorImpl::GetCurrentAtenDevice() { return g_current_device; }
c10::Device MLIRTensorImpl::SetCurrentAtenDevice(c10::Device device) {
std::swap(g_current_device, device);
return device;
}
void MLIRTensorImpl::AtenInitialize() {}
const at::Storage &MLIRTensorImpl::storage() const {
assert(0 && "MLIR tensors do not have storage");
}
bool MLIRTensorImpl::has_storage() const { return false; }
} // namespace torch_mlir

View File

@ -1,60 +0,0 @@
//===- tensor_impl.h --------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
#pragma once
#include "tensor.h"
#include <ATen/Tensor.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorImpl.h>
namespace torch_mlir {
class MLIRTensorImpl : public c10::TensorImpl {
public:
explicit MLIRTensorImpl(MLIRTensor tensor);
MLIRTensor &tensor() { return tensor_; }
c10::intrusive_ptr<TensorImpl>
shallow_copy_and_detach(const c10::VariableVersion &version_counter,
bool allow_tensor_metadata_change) const override;
void shallow_copy_from(const c10::intrusive_ptr<TensorImpl> &impl) override;
at::IntArrayRef sizes() const override;
at::IntArrayRef strides() const override;
int64_t dim() const override;
int64_t numel() const override;
bool is_contiguous(at::MemoryFormat memory_format) const override;
int64_t size(int64_t d) const override;
static c10::Device GetCurrentAtenDevice();
static c10::Device SetCurrentAtenDevice(c10::Device device);
static void AtenInitialize();
const at::Storage &storage() const override;
bool has_storage() const override;
private:
static caffe2::TypeMeta GetTypeMeta(const MLIRTensor &tensor);
void SetupSizeProperties();
MLIRTensor tensor_;
size_t generation_ = 0;
};
} // namespace torch_mlir

View File

@ -1,44 +0,0 @@
//===- torch_util.cpp -------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
#include "torch_util.h"
#include <ATen/Functions.h>
#include <ATen/Tensor.h>
namespace torch_mlir {
namespace util {
at::Tensor Zeros(at::IntArrayRef sizes, at::ScalarType type) {
return at::zeros(sizes, type);
}
at::Tensor CopyTensor(const at::Tensor &ref) {
return ref.to(ref.options(), /*non_blocking=*/false, /*copy=*/true);
}
// Same as above, with an additional cast.
at::Tensor CopyTensor(const at::Tensor &ref, at::ScalarType dest_type) {
return ref.to(ref.options().dtype(dest_type), /*non_blocking=*/false,
/*copy=*/true);
}
at::ScalarType GetScalarType(at::Scalar scalar) {
if (scalar.isFloatingPoint()) {
return at::kDouble;
} else if (scalar.isIntegral(/*includeBool=*/false)) {
return at::kLong;
} else if (scalar.isBoolean()) {
return at::kBool;
} else if (scalar.isComplex()) {
return at::kComplexDouble;
}
assert(0 && "Unknown type for scalar");
}
} // namespace util
} // namespace torch_mlir

View File

@ -1,34 +0,0 @@
//===- torch_util.h ---------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
#pragma once
#include <ATen/Tensor.h>
#include <c10/core/ScalarType.h>
#include <c10/util/Optional.h>
namespace torch_mlir {
namespace util {
at::Tensor Zeros(at::IntArrayRef sizes, at::ScalarType type);
// Makes a deep copy of an ATEN tensor.
at::Tensor CopyTensor(const at::Tensor &ref);
// Same as above, with an additional cast.
at::Tensor CopyTensor(const at::Tensor &ref, at::ScalarType dest_type);
// Return at::ScalarType from at::Scalar
at::ScalarType GetScalarType(at::Scalar scalar);
template <typename T, typename S>
T OptionalOr(const c10::optional<S> &value, T defval) {
return value ? static_cast<T>(*value) : defval;
}
} // namespace util
} // namespace torch_mlir

View File

@ -1,10 +0,0 @@
include_directories(
${TORCH_INCLUDE_DIRS}
)
add_library(aten_ops SHARED
aten_ops.cpp
)
target_link_libraries(aten_ops
${TORCH_LIBRARIES}
)

View File

@ -1,772 +0,0 @@
//===- aten_ops.cpp ---------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
// This file implements C libraries that are targetted by MLIR code generation
// from the ATen dialect. This library is intended to support a functional
// proof of concept rather than optimized for high performance. Most of the
// functions are implemented by calling back into the torch libraries.
#include <assert.h>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <ATen/ATen.h>
#include <torch/torch.h>
#include <nnpack.h>
#include <ATen/CPUType.h>
namespace {
template <typename T, int N> struct tensor_t {
T *d;
T *aligned;
size_t offset;
size_t shape[N];
size_t stride[N];
size_t index(size_t n, size_t channel, size_t row, size_t col) const {
size_t channels = shape[1];
size_t height = shape[2];
size_t width = shape[3];
return n * height * width * channels + channel * height * width +
row * width + col;
}
tensor_t() {
d = aligned = nullptr;
offset = 0;
for (int i = 0; i < N; i++)
shape[i] = stride[i] = 0;
}
};
template <typename T, int N>
std::vector<int64_t> translate_shape(tensor_t<T, N> *t) {
std::vector<int64_t> shape;
for (int i = 0; i < N; i++) {
shape.push_back(t->shape[i]);
// std::cout << i << " shape " << t->shape[i] << std::endl;
}
return shape;
}
template <typename T, int N>
std::vector<int64_t> translate_stride(tensor_t<T, N> *t) {
std::vector<int64_t> stride;
for (int i = 0; i < N; i++) {
stride.push_back(t->stride[i]);
// std::cout << i << " stride " << t->stride[i] << std::endl;
}
return stride;
}
template <int N> void dumpTensor(std::ostream &o, tensor_t<float, N> *t) {
o << "Shape:";
for (int i = 0; i < N; i++)
o << t->shape[i] << " ";
o << "Stride:";
for (int i = 0; i < N; i++)
o << t->stride[i] << " ";
o << "\n";
}
template <typename T, int N>
at::Tensor to_torch(tensor_t<T, N> *t,
const at::TensorOptions &options = at::TensorOptions()) {
// std::cout << "to_torch\n";
return torch::from_blob((void *)t->d, translate_shape(t), translate_stride(t),
options);
}
template <typename T>
void mm_out(tensor_t<T, 2> *a, tensor_t<T, 2> *b, tensor_t<T, 2> *r);
template <typename T, int N>
void add_out(tensor_t<T, N> *a, tensor_t<T, N> *b, T alpha, tensor_t<T, N> *r) {
at::Tensor torch_a = to_torch(a);
at::Tensor torch_b = to_torch(b);
at::Tensor result = at::native::add(torch_a, torch_b, alpha).clone();
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T>
void addmm_out(tensor_t<T, 1> *a, tensor_t<T, 2> *b, tensor_t<T, 2> *c,
int32_t alpha, int32_t beta, tensor_t<T, 2> *r) {
at::Tensor torch_a = to_torch(a);
at::Tensor torch_b = to_torch(b);
at::Tensor torch_c = to_torch(c);
at::Tensor result =
at::native::addmm(torch_a, torch_b, torch_c, alpha, beta).clone();
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T, int N, int M>
void as_strided_out(tensor_t<float, M> *a,
/*size*/ int32_t sz0, int32_t sz1, int32_t sz2, int32_t sz3,
/*stride*/ int32_t sd0, int32_t sd1, int32_t sd2,
int32_t sd3, int32_t offset, tensor_t<T, N> *r) {
at::Tensor input = to_torch(a);
std::vector<int64_t> size;
std::vector<int64_t> stride;
c10::optional<int64_t> storage_offset;
if (offset != 0)
storage_offset = offset;
if (N > 0) {
size.push_back(sz0);
stride.push_back(sd0);
}
if (N > 1) {
size.push_back(sz1);
stride.push_back(sd1);
}
if (N > 2) {
size.push_back(sz2);
stride.push_back(sd2);
}
if (N > 3) {
size.push_back(sz3);
stride.push_back(sd3);
}
std::vector<int64_t> sizeRef{size};
std::vector<int64_t> strideRef{stride};
// for (int i = 0; i<N; i++)
// std::cout << "STRIDE " << i << " " << stride[i] << std::endl;
at::Tensor result =
at::native::as_strided_tensorimpl(input, size, stride, storage_offset)
.clone();
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
// FIXME: stride, padding, dilaection, output_padding should be IntArrayRef
template <typename T>
void conv2d_out(tensor_t<T, 4> *t, tensor_t<T, 4> *weight, tensor_t<T, 1> *bias,
int32_t stride, int32_t pad, int32_t dilation,
tensor_t<T, 4> *r) {
at::Tensor torch_t = to_torch(t);
at::Tensor torch_w = to_torch(weight);
at::Tensor torch_b = to_torch(bias);
int64_t groups = 1;
at::Tensor result = at::native::conv2d(torch_t, torch_w, torch_b, stride, pad,
dilation, groups)
.clone();
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T>
void conv2d_backward_out(tensor_t<T, 4> *grad_output, tensor_t<T, 4> *input,
tensor_t<T, 4> *weight, int32_t stride, int32_t pad,
int32_t dilation, tensor_t<T, 4> *r0,
tensor_t<T, 4> *r1, tensor_t<T, 1> *r2) {
const at::Tensor &arg_grad = to_torch(grad_output);
const at::Tensor &arg_input = to_torch(input);
const at::Tensor &arg_weight = to_torch(weight);
std::vector<int64_t> p{pad, pad};
std::vector<int64_t> s{stride, stride};
std::vector<int64_t> d{dilation, dilation};
std::array<bool, 3> output_mask{true, true, true};
std::tuple<at::Tensor, at::Tensor, at::Tensor> grads =
at::native::mkldnn_convolution_backward(arg_input, arg_grad, arg_weight,
p, s, d, 1, output_mask);
auto result0 = std::get<0>(grads);
auto result1 = std::get<1>(grads);
auto result2 = std::get<2>(grads);
memcpy(r0->d, result0.data_ptr(), result0.numel() * sizeof(T));
memcpy(r1->d, result1.data_ptr(), result1.numel() * sizeof(T));
memcpy(r2->d, result2.data_ptr(), result2.numel() * sizeof(T));
}
template <typename T, int N>
void log_softmax_out(tensor_t<T, N> *t, int32_t dim, bool half_to_float,
tensor_t<T, N> *r) {
at::Tensor input = to_torch(t);
at::Tensor result = at::native::log_softmax_cpu(input, dim, half_to_float);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T, int N>
void log_softmax_backward_data_out(tensor_t<T, N> *a, tensor_t<T, N> *b,
int32_t c, tensor_t<T, N> *d,
tensor_t<T, N> *r) {
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::Tensor inputD = to_torch(d);
at::Tensor result =
at::native::log_softmax_backward_cpu(inputA, inputB, c, inputD);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T>
void max_pool2d_with_indices_out(tensor_t<T, 4> *t, int32_t c, int32_t d,
int32_t e, int32_t f, bool ceil_mode,
tensor_t<T, 4> *r0, tensor_t<int64_t, 4> *r1) {
at::Tensor input = to_torch(t);
std::vector<int64_t> kernel{c, c};
std::vector<int64_t> stride{d, d};
std::vector<int64_t> padding{e, e};
std::vector<int64_t> dilation{f, f};
auto result = at::native::max_pool2d_with_indices_cpu(
input, kernel, stride, padding, dilation, ceil_mode);
at::Tensor outTensor = std::get<0>(result);
at::Tensor idxTensor = std::get<1>(result);
memcpy(r0->d, outTensor.data_ptr(), outTensor.numel() * sizeof(T));
memcpy(r1->d, idxTensor.data_ptr(), idxTensor.numel() * sizeof(T));
}
template <typename T>
void max_pool2d_with_indices_backward_out(tensor_t<T, 4> *a, tensor_t<T, 4> *b,
int32_t c, int32_t d, int32_t e,
int32_t f, bool g,
tensor_t<int64_t, 4> *h,
tensor_t<T, 4> *r) {
const at::Tensor &inputA = to_torch(a);
const at::Tensor &inputB = to_torch(b);
at::TensorOptions options(at::ScalarType::Long);
const at::Tensor &inputH = to_torch(h, options);
std::vector<int64_t> kernel{c, c};
std::vector<int64_t> stride{d, d};
std::vector<int64_t> padding{e, e};
std::vector<int64_t> dilation{f, f};
at::Tensor result = at::native::max_pool2d_with_indices_backward_cpu(
inputA, inputB, kernel, stride, padding, dilation, g, inputH);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T>
void mm_out(tensor_t<T, 2> *a, tensor_t<T, 2> *b, tensor_t<T, 2> *r) {
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::Tensor result = inputA.matmul(inputB);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T, int N>
void mul_out(tensor_t<T, N> *a, tensor_t<T, N> *b, tensor_t<T, N> *r) {
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::Tensor result = at::native::mul(inputA, inputB);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T, int N>
void relu_out(tensor_t<T, N> *a, tensor_t<T, N> *r) {
at::Tensor inputA = to_torch(a);
at::Tensor result = at::native::relu(inputA);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T> void t_out(tensor_t<T, 2> *a, tensor_t<T, 2> *r) {
size_t h = a->shape[0];
size_t w = a->shape[1];
for (size_t i = 0; i < h; i++)
for (size_t j = 0; j < w; j++)
r->d[j * h + i] = a->d[i * w + j];
}
template <typename T, int N>
void threshold_backward_out(tensor_t<T, N> *a, tensor_t<T, N> *b, int32_t c,
tensor_t<T, N> *r) {
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::Tensor result = at::native::threshold_backward(inputA, inputB, c);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T, int N, int M>
void view_out(tensor_t<T, M> *a, int32_t b, int32_t c, int32_t d, int32_t e,
tensor_t<T, N> *r) {
tensor_t<T, N> result;
size_t numel = 1;
for (size_t d = 0; d < M; d++)
numel *= a->shape[d];
if (N == 1)
c = d = e = 1;
if (N == 2)
d = e = 1;
if (N == 3)
e = 1;
int inferred = 0;
if (b == -1)
inferred++;
if (c == -1)
inferred++;
if (d == -1)
inferred++;
if (e == -1)
inferred++;
assert(inferred <= 1 &&
"aten.view Error: only one dimension can be inferred");
if (b == -1)
b = numel / (c * d * e);
if (c == -1)
c = numel / (b * d * e);
if (d == -1)
d = numel / (b * c * e);
if (e == -1)
e = numel / (b * c * d);
if (N > 0)
r->shape[0] = b;
if (N > 1)
r->shape[1] = c;
if (N > 2)
r->shape[2] = d;
if (N > 3)
r->shape[3] = e;
memcpy(r->d, a->d, numel * sizeof(T));
}
} // namespace
extern "C" {
// add_out
void _mlir_ciface_add_1F32_1F32_1F32_out(tensor_t<float, 1> *a,
tensor_t<float, 1> *b, int32_t i,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
add_out<float, 1>(a, b, i, r);
}
void _mlir_ciface_add_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *b, int32_t i,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
add_out<float, 2>(a, b, i, r);
}
void _mlir_ciface_add_3F32_3F32_3F32_out(tensor_t<float, 3> *a,
tensor_t<float, 3> *b, int32_t i,
tensor_t<float, 3> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
add_out<float, 3>(a, b, i, r);
}
void _mlir_ciface_add_4F32_4F32_4F32_out(tensor_t<float, 4> *a,
tensor_t<float, 4> *b, int32_t i,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
add_out<float, 4>(a, b, i, r);
}
// addmm_out
void _mlir_ciface_addmm_2F32_1F32_2F32_2F32_out(tensor_t<float, 1> *a,
tensor_t<float, 2> *b,
tensor_t<float, 2> *c,
int32_t alpha, int32_t beta,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
addmm_out<float>(a, b, c, alpha, beta, r);
}
// as_strided_out
void _mlir_ciface_as_strided_1F32_1F32_out(tensor_t<float, 1> *a,
/*size*/ int32_t sz0, int32_t sz1,
int32_t sz2, int32_t sz3,
/*stride*/ int32_t sd0, int32_t sd1,
int32_t sd2, int32_t sd3,
int32_t offset,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
as_strided_out<float, 1, 1>(a, sz0, sz1, sz2, sz3, sd0, sd1, sd2, sd3, offset,
r);
}
void _mlir_ciface_as_strided_4F32_2F32_out(tensor_t<float, 2> *a,
/*size*/ int32_t sz0, int32_t sz1,
int32_t sz2, int32_t sz3,
/*stride*/ int32_t sd0, int32_t sd1,
int32_t sd2, int32_t sd3,
int32_t offset,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
// std::cout << sz0 << " "
// << sz1 << " "
// << sz2 << " "
// << sz3 << "\n";
// std::cout << sd0 << " "
// << sd1 << " "
// << sd2 << " "
// << sd3 << "\n";
as_strided_out<float, 4, 2>(a, sz0, sz1, sz2, sz3, sd0, sd1, sd2, sd3, offset,
r);
}
// conv2d_out
void _mlir_ciface_conv2d_4F32_4F32_4F32_1F32_out(
tensor_t<float, 4> *t, tensor_t<float, 4> *weight, tensor_t<float, 1> *bias,
int32_t stride, int32_t padding, int32_t dilation, tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
conv2d_out<float>(t, weight, bias, stride, padding, dilation, r);
}
void _mlir_ciface_conv2d_relu_4F32_4F32_4F32_1F32_out(
tensor_t<float, 4> *t, tensor_t<float, 4> *weight, tensor_t<float, 1> *bias,
int32_t stride, int32_t padding, int32_t dilation, tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
conv2d_out<float>(t, weight, bias, stride, padding, dilation, r);
relu_out<float, 4>(r, r);
}
// conv2d_backward_out
void _mlir_ciface_conv2d_backward_4F32_4F32_1F32_4F32_4F32_4F32_out(
tensor_t<float, 4> *grad_output, tensor_t<float, 4> *t,
tensor_t<float, 4> *weight, int32_t stride, int32_t padding,
int32_t dilation, tensor_t<float, 4> *r0, tensor_t<float, 4> *r1,
tensor_t<float, 1> *r2) {
// std::cout << "aten_ops " << __func__ << "\n";
conv2d_backward_out<float>(grad_output, t, weight, stride, padding, dilation,
r0, r1, r2);
}
// div
float *div_0F32_0F32_0F32(float *a, float *b) {
// std::cout << "aten_ops " << __func__ << "\n";
float *ret = (float *)malloc(sizeof(float));
*ret = *a / *b;
return ret;
}
// log_softmax_out
void _mlir_ciface_log_softmax_1F32_1F32_out(tensor_t<float, 1> *t, int32_t dim,
bool half_to_float,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_out<float, 1>(t, dim, half_to_float, r);
}
void _mlir_ciface_log_softmax_2F32_2F32_out(tensor_t<float, 2> *t, int32_t dim,
bool half_to_float,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_out<float, 2>(t, dim, half_to_float, r);
}
void _mlir_ciface_log_softmax_3F32_3F32_out(tensor_t<float, 3> *t, int32_t dim,
bool half_to_float,
tensor_t<float, 3> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_out<float, 3>(t, dim, half_to_float, r);
}
void _mlir_ciface_log_softmax_4F32_4F32_out(tensor_t<float, 4> *t, int32_t dim,
bool half_to_float,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_out<float, 4>(t, dim, half_to_float, r);
}
// log_softmax_backward_data_out
void _mlir_ciface_log_softmax_backward_data_2F32_2F32_2F32_2F32_out(
tensor_t<float, 2> *a, tensor_t<float, 2> *b, int32_t c,
tensor_t<float, 2> *d, tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_backward_data_out<float, 2>(a, b, c, d, r);
}
void _mlir_ciface_log_softmax_backward_data_4F32_4F32_4F32_4F32_out(
tensor_t<float, 4> *a, tensor_t<float, 4> *b, int32_t c,
tensor_t<float, 4> *d, tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_backward_data_out<float, 4>(a, b, c, d, r);
}
// max_pool2d_out
void _mlir_ciface_max_pool2d_with_indices_4F32_4I64_4F32_out(
tensor_t<float, 4> *t, int32_t kernel, int32_t pad, int32_t stride,
int32_t dilation, bool ceil_mode, tensor_t<float, 4> *r0,
tensor_t<int64_t, 4> *r1) {
// std::cout << "aten_ops " << __func__ << "\n";
max_pool2d_with_indices_out<float>(t, kernel, pad, stride, dilation,
ceil_mode, r0, r1);
}
// max_pool2d backward_out
void _mlir_ciface_max_pool2d_with_indices_backward_4F32_4F32_4F32_4I64_out(
tensor_t<float, 4> *a, tensor_t<float, 4> *b, int32_t c, int32_t d,
int32_t e, int32_t f, bool g, tensor_t<int64_t, 4> *h,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
max_pool2d_with_indices_backward_out<float>(a, b, c, d, e, f, g, h, r);
}
// mm_out
void _mlir_ciface_mm_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *b,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
mm_out<float>(a, b, r);
}
// mul_out
void _mlir_ciface_mul_1F32_1F32_1F32_out(tensor_t<float, 1> *a,
tensor_t<float, 1> *b,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
mul_out<float, 1>(a, b, r);
}
void _mlir_ciface_mul_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *b,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
mul_out<float, 2>(a, b, r);
}
void _mlir_ciface_mul_3F32_3F32_3F32_out(tensor_t<float, 3> *a,
tensor_t<float, 3> *b,
tensor_t<float, 3> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
mul_out<float, 3>(a, b, r);
}
void _mlir_ciface_mul_4F32_4F32_4F32_out(tensor_t<float, 4> *a,
tensor_t<float, 4> *b,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
mul_out<float, 4>(a, b, r);
}
// nll_loss2d_forward_out
void _mlir_ciface_nll_loss2d_forward_1F32_1F32_4F32_3I64_1F32_out(
tensor_t<float, 4> *a, tensor_t<uint64_t, 3> *b, tensor_t<float, 1> *c,
int64_t d, int64_t e, tensor_t<float, 1> *r0, tensor_t<float, 1> *r1) {
// std::cout << "aten_ops " << __func__ << "\n";
using T = float;
at::Tensor inputA = to_torch(a);
at::TensorOptions options(at::ScalarType::Long);
at::Tensor inputB = to_torch(b, options);
at::Tensor inputC = to_torch(c);
std::tuple<at::Tensor, at::Tensor> result =
at::CPUType::nll_loss2d_forward(inputA, inputB, inputC, d, e);
at::Tensor result0 = std::get<0>(result);
at::Tensor result1 = std::get<1>(result);
memcpy(r0->d, result0.data_ptr(), result0.numel() * sizeof(T));
memcpy(r1->d, result1.data_ptr(), result1.numel() * sizeof(T));
}
// nll_loss2d_backward_out
void _mlir_ciface_nll_loss2d_backward_4F32_1F32_4F32_3I64_1F32_1F32_out(
tensor_t<float, 1> *a, tensor_t<float, 4> *b, tensor_t<uint64_t, 3> *c,
tensor_t<float, 1> *d, int32_t e, int32_t f, tensor_t<float, 1> *g,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
using T = float;
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::TensorOptions options(at::ScalarType::Long);
at::Tensor inputC = to_torch(c, options);
at::Tensor inputD = to_torch(d);
at::Tensor inputG = to_torch(g);
at::Tensor result = at::CPUType::nll_loss2d_backward(inputA, inputB, inputC,
inputD, e, f, inputG);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
void _mlir_ciface_nll_loss_backward_2F32_1F32_2F32_1I64_1F32_1F32_out(
tensor_t<float, 1> *a, tensor_t<float, 2> *b, tensor_t<uint64_t, 1> *c,
tensor_t<float, 1> *d, int32_t e, int32_t f, tensor_t<float, 1> *g,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
using T = float;
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::TensorOptions options(at::ScalarType::Long);
at::Tensor inputC = to_torch(c, options);
at::Tensor inputD = to_torch(d);
at::Tensor inputG = to_torch(g);
at::Tensor result = at::CPUType::nll_loss_backward(inputA, inputB, inputC,
inputD, e, f, inputG);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
// nll_loss_forward_out
void _mlir_ciface_nll_loss_forward_1F32_1F32_2F32_1I64_1F32_out(
tensor_t<float, 2> *a, tensor_t<uint64_t, 1> *b, tensor_t<float, 1> *c,
int64_t d, int64_t e, tensor_t<float, 1> *r0, tensor_t<float, 1> *r1) {
// std::cout << "aten_ops " << __func__ << "\n";
using T = float;
at::Tensor inputA = to_torch(a);
at::TensorOptions options(at::ScalarType::Long);
at::Tensor inputB = to_torch(b, options);
at::Tensor inputC = to_torch(c);
std::tuple<at::Tensor, at::Tensor> result =
at::CPUType::nll_loss_forward(inputA, inputB, inputC, d, e);
at::Tensor result0 = std::get<0>(result);
at::Tensor result1 = std::get<1>(result);
memcpy(r0->d, result0.data_ptr(), result0.numel() * sizeof(T));
memcpy(r1->d, result1.data_ptr(), result1.numel() * sizeof(T));
}
// relu_out
void _mlir_ciface_relu_1F32_1F32_out(tensor_t<float, 1> *a,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
relu_out<float, 1>(a, r);
}
void _mlir_ciface_relu_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
relu_out<float, 2>(a, r);
}
void _mlir_ciface_relu_3F32_3F32_out(tensor_t<float, 3> *a,
tensor_t<float, 3> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
relu_out<float, 3>(a, r);
}
void _mlir_ciface_relu_4F32_4F32_out(tensor_t<float, 4> *a,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
relu_out<float, 4>(a, r);
}
// t_out
void _mlir_ciface_t_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
t_out<float>(a, r);
}
// threshold_backward_out
void _mlir_ciface_threshold_backward_1F32_1F32_1F32_out(tensor_t<float, 1> *a,
tensor_t<float, 1> *b,
int32_t c,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
threshold_backward_out<float, 1>(a, b, c, r);
}
void _mlir_ciface_threshold_backward_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *b,
int32_t c,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
threshold_backward_out<float, 2>(a, b, c, r);
}
void _mlir_ciface_threshold_backward_3F32_3F32_3F32_out(tensor_t<float, 3> *a,
tensor_t<float, 3> *b,
int32_t c,
tensor_t<float, 3> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
threshold_backward_out<float, 3>(a, b, c, r);
}
void _mlir_ciface_threshold_backward_4F32_4F32_4F32_out(tensor_t<float, 4> *a,
tensor_t<float, 4> *b,
int32_t c,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
threshold_backward_out<float, 4>(a, b, c, r);
}
// view_out
void _mlir_ciface_view_1F32_4F32_out(tensor_t<float, 4> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 1, 4>(a, b, c, d, e, r);
}
void _mlir_ciface_view_1F32_3F32_out(tensor_t<float, 3> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 1, 3>(a, b, c, d, e, r);
}
void _mlir_ciface_view_1F32_2F32_out(tensor_t<float, 2> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 1, 2>(a, b, c, d, e, r);
}
void _mlir_ciface_view_2F32_4F32_out(tensor_t<float, 4> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 2, 4>(a, b, c, d, e, r);
}
void _mlir_ciface_view_4F32_1F32_out(tensor_t<float, 1> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 4, 1>(a, b, c, d, e, r);
}
void _mlir_ciface_view_4F32_2F32_out(tensor_t<float, 2> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 4, 2>(a, b, c, d, e, r);
}
void _mlir_ciface_view_4F32_3F32_out(tensor_t<float, 3> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 4, 3>(a, b, c, d, e, r);
}
}

View File

@ -1,3 +0,0 @@
# TODO: Enable these tests for the new c10 dispatch code path with pt > 1.3
if config.enable_c10_dispatch:
config.unsupported = True

View File

@ -1,78 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import unittest
from unittest import TestCase
import torch
import torch.nn as nn
import torch.nn.functional as F
import npcomp.frontends.pytorch as torch_mlir
import inspect
# RUN: %PYTHON %s | FileCheck %s
class ResA(nn.Module):
def __init__(self, channels):
C = int(channels)
C2 = int(channels/2)
super(ResA, self).__init__()
self.model = nn.Sequential(# A1
nn.BatchNorm2d(C),
nn.ReLU(),
nn.Conv2d(C,C2,1,stride=1,padding=0,dilation=1,groups=1,bias=True),
# B1
nn.BatchNorm2d(C2),
nn.ReLU(),
nn.Conv2d(C2,C2,3,stride=1,padding=1,dilation=1,groups=1,bias=True),
# C1
nn.BatchNorm2d(C2),
nn.ReLU(),
nn.Conv2d(C2,C,1,stride=1,padding=0,dilation=1,groups=1,bias=True))
def forward(self, x):
res = self.model.forward(x)
return x + res
# Prints `str` prefixed by the current test function name so we can use it in
# Filecheck label directives.
# This is achieved by inspecting the stack and getting the parent name.
def printWithCurrentFunctionName(s):
# stack[1] is the caller, i.e. "_test_model"
# stack[2] is the caller's caller, e.g. "test_conv_1"
print(inspect.stack()[2][3], s)
class TestMLIRExport(unittest.TestCase):
def setUp(self):
pass
def _test_model(self, model, model_args):
result = model(model_args)
mlir = torch_mlir.get_mlir(result)
printWithCurrentFunctionName (mlir)
return True
def test_ResA_16(self):
dev = torch_mlir.mlir_device()
model = ResA(16).to(dev)
passed = self._test_model(model, torch.ones((1,16,128,128), device=dev))
# CHECK-LABEL: test_ResA_16
# CHECK: [[V0:%[a-zA-Z0-9]+]], %{{.*}}, %{{.*}} = "aten.native_batch_norm"({{.*}}) {layer_name = "L0-native_batch_norm-0"}
# CHECK: [[V1:%[a-zA-Z0-9]+]] = "aten.relu"([[V0]]) {layer_name = "L1-relu-0"}
# CHECK: [[V2:%[a-zA-Z0-9]+]] = "aten.convolution_overrideable"([[V1]], {{.*}}) {layer_name = "L2-convolution_overrideable-0"}
# CHECK: [[V3:%[a-zA-Z0-9_]+]], %{{.*}}, %{{.*}} = "aten.native_batch_norm"([[V2]]{{.*}}) {layer_name = "L3-native_batch_norm-1"}
# CHECK: [[V4:%[a-zA-Z0-9]+]] = "aten.relu"([[V3]]) {layer_name = "L4-relu-1"}
# CHECK: [[V5:%[a-zA-Z0-9]+]] = "aten.convolution_overrideable"([[V4]],{{.*}}) {layer_name = "L5-convolution_overrideable-1"}
# CHECK: [[V6:%[a-zA-Z0-9_]+]], %{{.*}}, %{{.*}} = "aten.native_batch_norm"([[V5]],{{.*}}) {layer_name = "L6-native_batch_norm-2"}
# CHECK: [[V7:%[a-zA-Z0-9]+]] = "aten.relu"([[V6]]) {layer_name = "L7-relu-2"}
# CHECK: [[V8:%[a-zA-Z0-9]+]] = "aten.convolution_overrideable"([[V7]],{{.*}}) {layer_name = "L8-convolution_overrideable-2"}
# CHECK: {{.*}} = "aten.add"(%arg0, [[V8]], {{.*}}) {layer_name = "L9-add-0"}
self.assertTrue(passed)
verbose = False
if __name__ == '__main__':
verbose = True
unittest.main()

View File

@ -1,26 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
t0 = torch.randn((1,2,3,4), device=dev)
t1 = torch.randn((1,2,3,4), device=dev)
t2 = torch.randn((1,2,3,4), device=dev)
t3 = t0 + t1 + t2
#
# Generate and check the MLIR for the result tensor
#
t3_mlir = torch_mlir.get_mlir( t3 )
# CHECK-LABEL: test_export_add3
# CHECK: %1 = "aten.add"(%arg0, %arg1, %0) {layer_name = "L0-add-0"} : (tensor<1x2x3x4xf32>, tensor<1x2x3x4xf32>, i32) -> tensor<1x2x3x4xf32>
# CHECK: %2 = "aten.add"(%1, %arg2, %0) {layer_name = "L1-add-1"} : (tensor<1x2x3x4xf32>, tensor<1x2x3x4xf32>, i32) -> tensor<1x2x3x4xf32>
print("test_export_add3")
print(t3_mlir)

View File

@ -1,19 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
model = torch.nn.BatchNorm2d(123).to(dev)
result = model(torch.ones(42,123,4,5).to(dev))
# CHECK-LABEL: test_export_batchnorm
# CHECK: aten.native_batch_norm
mlir = torch_mlir.get_mlir( result )
print("test_export_batchnorm")
print(mlir)

View File

@ -1,49 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
N = 3
Cin = 16
Cout = 4
w = 10
h = 10
model = torch.nn.Conv2d(Cin, Cout, (3,3))
ref_model = torch.nn.Conv2d(Cin, Cout, (3,3))
ref_model.weight.data = model.weight.clone()
ref_model.bias.data = model.bias.clone()
model = model.to(dev)
softmax = torch.nn.LogSoftmax(dim=1)
loss = torch.nn.NLLLoss()
tensor = torch.randn(N, Cin, h, w, device=dev)
result = model(tensor)
# CHECK-LABEL: test_export_conv2d
# CHECK: aten.convolution_overrideable
print("test_export_conv2d")
print(torch_mlir.get_mlir( result ))
target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, Cout)
ref_target = target.clone()
target = target.to(dev)
test_loss = loss( softmax(result), target )
test_loss.backward()
# CHECK-LABEL: test_export_conv2d_back
# CHECK: aten.convolution_overrideable
# CHECK: aten._log_softmax
# CHECK: aten.nll_loss2d_forward
print("test_export_conv2d_back")
print(torch_mlir.get_mlir( test_loss ))

View File

@ -1,24 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
t0 = torch.randn(4, device=dev)
t1 = torch.randn(4, device=dev)
t2 = torch.randn(4, device=dev)
t4 = t0 + t1 + t2
t5 = t4 + t1
t6 = t5 + t4
# CHECK-LABEL: test_multi_out
# CHECK: return %2, %3, %4 : tensor<4xf32>, tensor<4xf32>, tensor<4xf32>
mlir = torch_mlir.get_mlir([t4, t5, t6])
print ("test_multi_out")
print (mlir)

View File

@ -1,25 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import torchvision.models as models
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
model = models.resnet18().to(dev)
model.training = False
tensor = torch.randn(32,3,32,32).to(dev)
result = model(tensor)
mlir = torch_mlir.get_mlir( result )
# for now we just check the output shape
# CHECK-LABEL: test_export_resnet18
# CHECK: return %{{.*}} : tensor<32x1000xf32>
print("test_export_resnet18")
print(mlir)

View File

@ -1,24 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import torchvision.models as models
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
model = models.vgg11_bn().to(dev)
model.training = False
result = model(torch.ones(32,3,32,32).to(dev))
mlir = torch_mlir.get_mlir( result )
# for now we just check the output shape
# CHECK-LABEL: test_export_vgg11
# CHECK: return %{{.*}} : tensor<32x1000xf32>
print("test_export_vgg11")
print(mlir)

View File

@ -1,27 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
t0 = torch.randn((4,4), device=dev)
t1 = torch.randn((4,4), device=dev)
t2 = t0 + t1
#
# Check the result tensor against the CPU
#
t0_cpu = t0.to('cpu')
t1_cpu = t1.to('cpu')
t2_cpu = t2.to('cpu')
print (t0_cpu, " +\n", t1_cpu, " =\n", t2_cpu)
# CHECK: PASS! add2 check
test.compare(t2, t0_cpu + t1_cpu, "add2")

View File

@ -1,29 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
t0 = torch.randn((1,2,3,4), device=dev)
t1 = torch.randn((1,2,3,4), device=dev)
t2 = torch.randn((1,2,3,4), device=dev)
t3 = t0 + t1 + t2
#
# Check the result tensor against the CPU
#
t0_cpu = t0.to('cpu')
t1_cpu = t1.to('cpu')
t2_cpu = t2.to('cpu')
t3_cpu = t3.to('cpu')
print (t0_cpu, " +\n", t1_cpu, " +\n", t2_cpu, " =\n", t3_cpu)
# CHECK: PASS!
test.compare(t3, t0_cpu + t1_cpu + t2_cpu, "add3")

View File

@ -1,42 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
t0 = torch.randn((4,16,4), device=dev)
t1 = torch.randn((4,16,4), device=dev)
t3 = torch.randn((4,64), device=dev)
t4 = torch.randn((4,64), device=dev)
t2 = t0 + t1
t5 = t3 + t4
t6 = t5.view((4,4,4,4))
t7 = t2.view((4,4,4,4))
t8 = t6 + t7
t0_cpu = t0.to('cpu')
t1_cpu = t1.to('cpu')
# CHECK: PASS! add_views_0 check
test.compare(t2, t0_cpu + t1_cpu, "add_views_0")
t3_cpu = t3.to('cpu')
t4_cpu = t4.to('cpu')
# CHECK: PASS! add_views_1 check
test.compare(t5, t3_cpu + t4_cpu, "add_views_1")
t6_cpu = t6.to('cpu')
t7_cpu = t7.to('cpu')
# CHECK: PASS! add_views_2 check
test.compare(t8, t6_cpu + t7_cpu, "add_views_2")

View File

@ -1,43 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
x = torch.rand((3,64,8,8), device=dev)
y = x*x
print (y.stride())
dim = [64,24,24]
dim = [4,4,4]
N = 2;
count = dim[0]*dim[1]*dim[2]
sizes = (N,dim[0],dim[1],dim[2])
strides = (1,dim[1]*dim[2],dim[2],1)
print(count)
t0 = torch.randn((N,count), device=dev)
t0_like = torch.randn((N,count))
t1 = t0.as_strided(sizes, strides)
t1_ref = t0.to('cpu').as_strided(sizes, strides)
t1_like = t0_like.as_strided(sizes, strides)
t1_ref = t1_ref.clone()
# check that the IR has recorded the
# stride properly before invoking JIT
# CHECK: PASS! stride check
test.compare_eq(t1.stride(), t1_like.stride(), "stride")
# CHECK: PASS! as_stride check
test.compare(t1_ref, t1, "as_stride")
# CHECK: PASS! as_stride stride check
test.compare_eq(t1_ref.stride(), t1.to("cpu").stride(), "as_stride stride")

View File

@ -1,17 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
model = torch.nn.Conv2d(2,16,7,stride=[2,2], padding=[3,3],
dilation=1, groups=1, bias=True)
tensor = torch.randn((1,2,128,128))
# CHECK: PASS! fwd check
test.check_ref(model, tensor)

View File

@ -1,46 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import torch.nn as nn
import torch.nn.functional as F
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
N = 3
Cin = 16
Cout = 4
w = 10
h = 10
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(Cin, Cout, (3,3))
def forward(self, x):
x = self.conv1(x)
output = F.log_softmax(x, dim=1)
return output
model = Net()
tensor = torch.randn(N, Cin, h, w)
# CHECK: PASS! fwd check
fwd_path = test.check_ref(model, tensor)
loss = torch.nn.NLLLoss()
target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, Cout)
# CHECK: PASS! back check
test.check_back(fwd_path, target, loss)
# CHECK: PASS! weight_grad check
test.compare(model.conv1.weight.grad, fwd_path[0].conv1.weight.grad, "weight_grad")
# CHECK: PASS! bias_grad check
test.compare(model.conv1.bias.grad, fwd_path[0].conv1.bias.grad, "bias_grad")

View File

@ -1,60 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1, bias=True)
self.conv2 = nn.Conv2d(32, 64, 3, 1, bias=True)
#self.maxpool2d = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(9216*4, 128, bias=True)
self.fc2 = nn.Linear(128, 10, bias=True)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
#x = self.maxpool2d(x)
x = x.view((64,9216*4))
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
def main():
model = Net()
tensor = torch.randn((64, 1, 28, 28), requires_grad=True)
# CHECK: PASS! fwd check
fwd_path = test.check_fwd(model, tensor)
target = torch.ones((64), dtype=torch.long)
loss = F.nll_loss
# CHECK: PASS! back check
test.check_back(fwd_path, target, loss)
# CHECK: PASS! weight_grad check
test.compare(model.conv2.weight.grad,
fwd_path[0].conv2.weight.grad, "weight_grad")
# CHECK: PASS! bias_grad check
test.compare(model.conv2.bias.grad,
fwd_path[0].conv2.bias.grad, "bias_grad")
# CHECK: PASS! fc1_weight_grad check
test.compare(model.fc1.weight.grad,
fwd_path[0].fc1.weight.grad, "fc1_weight_grad")
if __name__ == '__main__':
main()

View File

@ -1,53 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.maxpool2d = nn.MaxPool2d(2,2)
#self.dropout1 = nn.Dropout2d(0.25)
#self.dropout2 = nn.Dropout2d(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = self.maxpool2d(x)
#x = self.dropout1(x)
x = x.view((4,9216))
x = self.fc1(x)
x = F.relu(x)
#x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
def main():
model = Net()
tensor = torch.randn((4, 1, 28, 28))
# CHECK: PASS! fwd check
fwd_path = test.check_fwd(model, tensor)
if __name__ == '__main__':
main()

View File

@ -1,17 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
model = torch.nn.Linear(1024,16).to(dev)
tensor = torch.randn(4,1024).to(dev)
# CHECK: PASS! fwd check
fwd_path = test.check_fwd(model, tensor)

View File

@ -1,15 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
model = torch.nn.LogSoftmax(dim=0)
tensor = torch.ones(1,2,3,4)
# CHECK: PASS! fwd check
fwd_path = test.check_fwd(model, tensor)

View File

@ -1,18 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
model = torch.nn.MaxPool2d(kernel_size=(3,3), stride=(2,2), padding=(1,1),
dilation=1, return_indices=False, ceil_mode=False)
tensor = torch.randn(1,32,16,16)
# CHECK: PASS! fwd check
fwd_path = test.check_fwd(model, tensor)

View File

@ -1,49 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(28*28, 50)
self.fc2 = nn.Linear(50, 50)
self.fc3 = nn.Linear(50, 10)
def forward(self, x):
x = x.view(-1, 28*28)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return F.log_softmax(self.fc3(x), dim=1)
def main():
device = torch_mlir.mlir_device()
model = Net()
tensor = torch.randn((64, 1, 28, 28),requires_grad=True)
# CHECK: PASS! fwd check
fwd_path = test.check_ref(model, tensor)
target = torch.ones((64), dtype=torch.long)
loss = F.nll_loss
# CHECK: PASS! back check
test.check_back(fwd_path, target, loss)
# CHECK: PASS! fc1_weight_grad check
test.compare(model.fc1.weight.grad, fwd_path[0].fc1.weight.grad, "fc1_weight_grad")
if __name__ == '__main__':
main()

View File

@ -1,32 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
t0 = torch.randn((3,13), device=dev)
t1 = torch.randn((13,5), device=dev)
print(t0.to('cpu'), t1.to('cpu'))
print(torch.mm(t0.to('cpu'), t1.to('cpu')))
t2 = torch.mm(t0, t1)
#
# Check the result tensor against the CPU
#
t0_cpu = t0.to('cpu')
t1_cpu = t1.to('cpu')
t2_cpu = t2.to('cpu')
print (t0_cpu, " *\n", t1_cpu, " =\n", t2_cpu)
ref_tensor = torch.mm(t0_cpu, t1_cpu)
# CHECK: PASS! mm check
test.compare(t2, ref_tensor, "mm")

View File

@ -1,26 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
t0 = torch.randn((4,4), device=dev)
t1 = torch.randn((4,4), device=dev)
t2 = t0 * t1
#
# Check the result tensor against the CPU
#
t0_cpu = t0.to('cpu')
t1_cpu = t1.to('cpu')
t2_cpu = t2.to('cpu')
print (t0_cpu, " *\n", t1_cpu, " =\n", t2_cpu)
# CHECK: PASS! mul2 check
test.compare(t2, t0_cpu * t1_cpu, "mul2")

View File

@ -1,21 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
model = torch.nn.LogSoftmax(dim=1)
tensor = torch.randn(3,5,requires_grad=True)
# CHECK: PASS! fwd check
fwd_path = test.check_fwd(model, tensor)
target = torch.tensor([1, 0, 4])
loss = torch.nn.NLLLoss()
# CHECK: PASS! back check
test.check_back(fwd_path, target, loss)

View File

@ -1,15 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
model = torch.nn.ReLU()
tensor = torch.randn(10)
# CHECK: PASS! fwd check
fwd_path = test.check_ref(model, tensor)

View File

@ -1,18 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
import npcomp.frontends.pytorch.test as test
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
tensor = torch.randn(2,3).to(dev)
result = tensor.t()
ref_result = tensor.to('cpu').t()
# CHECK: PASS! transpose check
test.compare(ref_result, result, "transpose")

View File

@ -1,31 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import torch
import npcomp.frontends.pytorch as torch_mlir
# RUN: %PYTHON %s | FileCheck %s
dev = torch_mlir.mlir_device()
model = torch.nn.Conv2d(2,16,7,stride=[2,2], padding=[3,3], dilation=1, groups=1, bias=True).to(dev)
tensor = torch.randn((1,2,128,128), device=dev)
result = model(tensor)
mlir = torch_mlir.get_mlir( result )
report = torch_mlir.op_report(mlir)
# CHECK-LABEL: "L0-convolution_overrideable-0"
# CHECK-NEXT: "activation_in": 32768
# CHECK-NEXT: "activation_out": 65536
# CHECK-NEXT: "ops:+": 65536
# CHECK-NEXT: "ops:MAC": 6422528
# CHECK-NEXT: "parameters_in": 1584
# CHECK-NEXT: "reads": 34352
# CHECK-NEXT: "writes": 65536
for k,v in report.items():
print("\"{}\"".format(k))
for k,v in v.items():
print("\"{}\": {}".format(k,v))

View File

@ -1,107 +0,0 @@
# -*- Python -*-
# This file is licensed under a pytorch-style license
# See frontends/pytorch/LICENSE for license information.
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import npcomp.frontends.pytorch as torch_mlir
import json
# RUN: %PYTHON %s | FileCheck %s
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 8, 3, padding=1)
self.conv2 = nn.Conv2d(8, 16, 3, padding=0)
self.maxpool1 = nn.MaxPool2d(2,2)
self.maxpool2 = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(576, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 8)
def forward(self, x):
x = self.conv1(x)
print(x.shape)
x = F.relu(x)
print(x.shape)
x = self.maxpool1(x)
print(x.shape)
x = self.conv2(x)
print(x.shape)
x = F.relu(x)
print(x.shape)
x = self.maxpool2(x)
print(x.shape)
x = x.view(8, 6*6*16)
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = F.relu(x)
x = self.fc3(x)
output = F.log_softmax(x, dim=1)
return output
def main():
test_status = "PASS!"
# CHECK-LABEL: test_op_report_vgg_style_lenet
# CHECK: PASS!
print("test_op_report_vgg_style_lenet")
device = torch_mlir.mlir_device()
model = Net().to(device)
ref_tensor = torch.randn((8, 1, 30, 30))
tensor = ref_tensor.clone().to(device)
result = model(tensor)
target = torch.ones((8), dtype=torch.long).to(device)
loss = F.nll_loss(result, target)
loss.backward()
mlir0 = torch_mlir.get_mlir(model.conv1.weight.grad)
print(mlir0)
report = torch_mlir.op_report(mlir0)
print(report)
report_dict = report
expected = 32
if (len(report_dict) != expected):
print("### ERROR: Expecting",expected,"items in the report, but got ",len(report_dict))
test_status = "FAIL!"
# Every item should have a read and a write
for key, value in report_dict.items():
if not 'reads' in value:
print(f"### ERROR: {key} does not contain the required reads field")
test_status = "FAIL!"
if not 'writes' in value:
print(f"### ERROR: {key} does not contain the required writes field")
test_status = "FAIL!"
if "convolution" in key:
if not 'ops:MAC' in value:
print(f"### ERROR: convolution {key} does not contain the required MAC field")
test_status = "FAIL!"
if "mm" in key:
if not 'ops:MAC' in value:
print(f"### ERROR: mm {key} does not contain the required MAC field")
test_status = "FAIL!"
print(test_status)
if __name__ == '__main__':
main()

View File

@ -1,2 +0,0 @@
if not config.enable_c10_dispatch:
config.unsupported = True

View File

@ -34,7 +34,6 @@ config.llvm_host_triple = '@LLVM_HOST_TRIPLE@'
config.host_arch = "@HOST_ARCH@"
config.npcomp_src_root = "@CMAKE_SOURCE_DIR@"
config.npcomp_obj_root = "@CMAKE_BINARY_DIR@"
config.enable_c10_dispatch = not @NPCOMP_ENABLE_TORCH_TYPE_DISPATCH@
# Support substitution of the tools_dir with user parameters. This is
# used when we can't determine the tool dir at configuration time.