mirror of https://github.com/llvm/torch-mlir
Delete old PyTorch 1.3 type dispatch oriented code paths.
* We aren't quite at e2e parity, but we aren't going back and the old path is bit-rotted.pull/115/head
parent
e359167562
commit
47ac80491c
|
@ -1,48 +0,0 @@
|
|||
FROM nvcr.io/nvidia/pytorch:19.10-py3
|
||||
MAINTAINER Stephen Neuendorffer <stephenn@xilinx.com>
|
||||
|
||||
#
|
||||
# get the basics
|
||||
#
|
||||
USER root
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get install software-properties-common --assume-yes
|
||||
RUN apt-get install wget curl unzip libxml2-dev --assume-yes
|
||||
RUN apt-get install autoconf libtool g++ g++-multilib --assume-yes
|
||||
RUN apt-get install build-essential python3 cmake git gitk --assume-yes
|
||||
RUN apt-get install clang-8 lld-8 ninja-build --assume-yes
|
||||
RUN apt-get install libncurses5-dev --assume-yes
|
||||
|
||||
RUN /opt/conda/bin/conda install matplotlib pybind11
|
||||
#torchvision
|
||||
|
||||
ENV LD_LIBRARY_PATH "${LD_LIBRARY_PATH}:/opt/conda/lib"
|
||||
# Rebuild pytorch
|
||||
|
||||
WORKDIR /opt/pytorch/pytorch
|
||||
|
||||
# this is the recommended rebuild command from NVIDIA
|
||||
# with the cleanup of the build area omitted.
|
||||
RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5+PTX" \
|
||||
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
|
||||
NCCL_INCLUDE_DIR="/usr/include/" \
|
||||
NCCL_LIB_DIR="/usr/lib/" \
|
||||
python setup.py install
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
# Additional packages for building npcomp
|
||||
RUN apt-get install clang-10 lld-10 --assume-yes
|
||||
RUN conda install -c gaiar nnpack
|
||||
|
||||
# Make it possible to symbolize stack traces in crashes.
|
||||
RUN ln -s /usr/bin/llvm-symbolizer-10 /usr/bin/llvm-symbolizer
|
||||
|
||||
# Additional env for building npcomp and running tests.
|
||||
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda-10.1/compat/lib.real"
|
||||
ENV CC=clang-10
|
||||
ENV CXX=clang++-10
|
||||
ENV CXXFLAGS "-I/opt/conda/include"
|
||||
ENV LDFLAGS "-fuse-ld=/usr/bin/ld.lld-10 -L/opt/conda/lib"
|
|
@ -1,60 +0,0 @@
|
|||
# Deprecated PyTorch 1.3 based build
|
||||
|
||||
These instructions are retained for the transition. Refer to top-level README for up to date instructions.
|
||||
|
||||
### PyTorch 1.3 - ATen pseudo-device type dispatch
|
||||
|
||||
The currently functional approach to PyTorch integration uses an ATen pseudo
|
||||
device for program capture. It is activated by including the PyTorch cmake
|
||||
path and settind `-DNPCOMP_ENABLE_TORCH_TYPE_DISPATCH=ON`. This approach has a
|
||||
very fragile dependency on a specific PyTorch revisions in the ~1.3 era and
|
||||
currently must be built via the docker image in `docker/pytorch-1.3`.
|
||||
|
||||
We are migrating to newer approaches that build with more recent PyTorch
|
||||
versions, but these are not yet functional (see below).
|
||||
|
||||
Docker container setup:
|
||||
|
||||
```shell
|
||||
# One of the maintainers does periodically push new images. To use one of these,
|
||||
# skip the build step and use:
|
||||
# BUILD_IMAGE_TAG="stellaraccident/npcomp:build-pytorch-1.3"
|
||||
# Since we are not planning to support this branch long term, this process is
|
||||
# entirely ad-hoc at present and geared for project maintainers and build bots
|
||||
# to be able to make progress.
|
||||
# See https://hub.docker.com/repository/docker/stellaraccident/npcomp
|
||||
BUILD_IMAGE_TAG="local/npcomp:build-pytorch-1.3"
|
||||
|
||||
# Build the docker image (rebuilds PyTorch, so takes quite some time).
|
||||
docker build docker/pytorch-1.3 --tag $BUILD_IMAGE_TAG
|
||||
|
||||
# Docker workflow (or use your own preferences).
|
||||
# Create a volume for npcomp build artifacts.
|
||||
docker volume create npcomp-pytorch-1.3-build
|
||||
|
||||
# Run the container, mounting /npcomp to the source directory and the volume
|
||||
# above to the /build directory. The source directory is mounted read-only to
|
||||
# avoid the container putting root owned files there.
|
||||
# Replace `$HOME/src/mlir-npcomp` with an appropriate path to where the project
|
||||
# is checked out.
|
||||
docker run \
|
||||
--mount type=bind,source=$HOME/src/mlir-npcomp,target=/npcomp,readonly \
|
||||
--mount source=npcomp-pytorch-1.3-build,target=/build \
|
||||
--rm -it $BUILD_IMAGE_TAG /bin/bash
|
||||
```
|
||||
|
||||
```shell
|
||||
# From within the docker image.
|
||||
# Install MLIR and configure project.
|
||||
cd /npcomp
|
||||
BUILD_DIR=/build ./build_tools/install_mlir.sh
|
||||
BUILD_DIR=/build ./build_tools/cmake_configure.sh \
|
||||
-DCMAKE_PREFIX_PATH=/opt/conda/lib/python3.6/site-packages/torch/share/cmake \
|
||||
-DNPCOMP_ENABLE_TORCH_TYPE_DISPATCH=ON
|
||||
|
||||
# Build.
|
||||
cd /build
|
||||
ninja
|
||||
ninja check-npcomp
|
||||
ninja check-frontends-pytorch
|
||||
```
|
|
@ -1,24 +1,7 @@
|
|||
#-------------------------------------------------------------------------------
|
||||
# Options and settings
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
option(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH "Enables the legacy ATen Type dispatch code path" OFF)
|
||||
if(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
|
||||
add_compile_definitions(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
|
||||
message(STATUS "Legacy Torch type dispatch mechanism enabled")
|
||||
endif()
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Subdirectories
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# TODO: This sub-directory does not need to be gated on the type dispatch
|
||||
# mechanism, but it presently has some dependencies on an older pytorch version
|
||||
# and is being excluded until those can be resolved.
|
||||
if(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
|
||||
add_subdirectory(lib)
|
||||
endif()
|
||||
|
||||
add_subdirectory(csrc)
|
||||
add_subdirectory(python)
|
||||
add_subdirectory(test)
|
||||
|
|
|
@ -1,8 +1,4 @@
|
|||
if(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
|
||||
add_subdirectory(type_dispatch)
|
||||
else()
|
||||
add_subdirectory(c10_dispatch)
|
||||
endif()
|
||||
add_subdirectory(builder)
|
||||
|
||||
include(NpcompPython)
|
||||
|
||||
|
@ -16,25 +12,14 @@ include_directories(
|
|||
)
|
||||
link_directories("${TORCH_INSTALL_PREFIX}/lib")
|
||||
|
||||
set(torch_mlir_optional_libraries)
|
||||
if(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
|
||||
list(APPEND torch_mlir_optional_libraries
|
||||
npcomp_torch_type_dispatch_bindings
|
||||
)
|
||||
else()
|
||||
list(APPEND torch_mlir_optional_libraries
|
||||
npcomp_torch_c10_dispatch_bindings
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(NPCOMPTorchMLIRExt SHARED
|
||||
init_python_bindings.cpp
|
||||
)
|
||||
target_link_libraries(NPCOMPTorchMLIRExt
|
||||
${TORCH_LIBRARIES}
|
||||
${PYTHON_LIBRARIES}
|
||||
${torch_mlir_optional_libraries}
|
||||
torch_python
|
||||
npcomp_torch_builder_bindings
|
||||
|
||||
# NPCOMP shared library.
|
||||
NPCOMP
|
||||
|
|
|
@ -7,7 +7,7 @@ include_directories(
|
|||
${PYTHON_INCLUDE_DIRS}
|
||||
)
|
||||
link_directories("${TORCH_INSTALL_PREFIX}/lib")
|
||||
add_library(npcomp_torch_c10_dispatch_bindings
|
||||
add_library(npcomp_torch_builder_bindings
|
||||
acap_dispatch.cpp
|
||||
debug.cpp
|
||||
func_builder.cpp
|
||||
|
@ -15,7 +15,7 @@ add_library(npcomp_torch_c10_dispatch_bindings
|
|||
python_bindings.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(npcomp_torch_c10_dispatch_bindings
|
||||
target_link_libraries(npcomp_torch_builder_bindings
|
||||
${TORCH_LIBRARIES}
|
||||
${PYTHON_LIBRARIES}
|
||||
torch_python
|
|
@ -11,8 +11,8 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_ACAP_DISPATCH_H
|
||||
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_ACAP_DISPATCH_H
|
||||
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_ACAP_DISPATCH_H
|
||||
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_ACAP_DISPATCH_H
|
||||
|
||||
#include <list>
|
||||
#include <memory>
|
|
@ -5,6 +5,9 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_DEBUG_H
|
||||
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_DEBUG_H
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace torch_mlir {
|
||||
|
@ -20,3 +23,5 @@ void debugTrace(const std::string &message);
|
|||
void enableDebugTraceToStderr();
|
||||
|
||||
} // namespace torch_mlir
|
||||
|
||||
#endif // NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_DEBUG_H
|
|
@ -5,8 +5,8 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_FUNC_BUILDER_H
|
||||
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_FUNC_BUILDER_H
|
||||
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_FUNC_BUILDER_H
|
||||
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_FUNC_BUILDER_H
|
||||
|
||||
#include "mlir-c/IR.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
|
@ -169,4 +169,4 @@ private:
|
|||
|
||||
} // namespace torch_mlir
|
||||
|
||||
#endif // NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_MODULE_BUILDER_H
|
||||
#endif // NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_FUNC_BUILDER_H
|
|
@ -5,8 +5,8 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_MODULE_BUILDER_H
|
||||
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_C10_DISPATCH_MODULE_BUILDER_H
|
||||
#ifndef NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_H
|
||||
#define NPCOMP_FRONTENDS_PYTORCH_CSRC_BUILDER_H
|
||||
|
||||
#include "../pybind.h"
|
||||
|
|
@ -126,7 +126,9 @@ py::list GetRegisteredOps() {
|
|||
return results;
|
||||
}
|
||||
|
||||
void InitModuleBindings(py::module &m) {
|
||||
} // namespace
|
||||
|
||||
void torch_mlir::InitBuilderBindings(py::module &m) {
|
||||
m.def("debug_trace_to_stderr", &enableDebugTraceToStderr);
|
||||
|
||||
py::class_<AcapController, std::shared_ptr<AcapController>>(m,
|
||||
|
@ -139,8 +141,3 @@ void InitModuleBindings(py::module &m) {
|
|||
ModuleBuilder::bind(m);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void torch_mlir::InitC10DispatchBindings(py::module &m) {
|
||||
InitModuleBindings(m);
|
||||
}
|
|
@ -148,12 +148,7 @@ void InitModuleBindings(py::module &m) {
|
|||
|
||||
void InitBindings(py::module &m) {
|
||||
InitModuleBindings(m);
|
||||
|
||||
#if defined(NPCOMP_ENABLE_TORCH_TYPE_DISPATCH)
|
||||
InitTypeDispatchBindings(m);
|
||||
#else
|
||||
InitC10DispatchBindings(m);
|
||||
#endif
|
||||
InitBuilderBindings(m);
|
||||
}
|
||||
|
||||
} // namespace torch_mlir
|
||||
|
|
|
@ -15,13 +15,8 @@ namespace torch_mlir {
|
|||
// Perform top-level initialization for the module.
|
||||
void InitBindings(pybind11::module &m);
|
||||
|
||||
// Adds bindings related to the type-dispatch program capture mechanism.
|
||||
// Only defined if NPCOMP_ENABLE_TORCH_TYPE_DISPATCH (optional feature).
|
||||
void InitTypeDispatchBindings(pybind11::module &m);
|
||||
|
||||
// Adds bindings related to the c10-dispatch program capture mechanism.
|
||||
// Only defined if !NPCOMP_ENABLE_TORCH_TYPE_DISPATCH (default).
|
||||
void InitC10DispatchBindings(pybind11::module &m);
|
||||
// Adds bindings related to building modules.
|
||||
void InitBuilderBindings(pybind11::module &m);
|
||||
|
||||
} // namespace torch_mlir
|
||||
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
include_directories(
|
||||
${TORCH_INCLUDE_DIRS}
|
||||
${TORCH_INSTALL_PREFIX}/include/TH
|
||||
${TORCH_INSTALL_PREFIX}/include/THC/opt/pytorch/pytorch
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
${PYTHON_INCLUDE_DIRS}
|
||||
)
|
||||
link_directories("${TORCH_INSTALL_PREFIX}/lib")
|
||||
add_library(npcomp_torch_type_dispatch_bindings
|
||||
aten_mlir_bridge.cpp
|
||||
aten_mlir_type.cpp
|
||||
aten_mlir_type_default.cpp
|
||||
device.cpp
|
||||
ir.cpp
|
||||
jit.cpp
|
||||
mlir_gen.cpp
|
||||
python_bindings.cpp
|
||||
tensor.cpp
|
||||
tensor_impl.cpp
|
||||
torch_util.cpp
|
||||
)
|
||||
|
||||
get_property(mlir_libs GLOBAL PROPERTY MLIR_ALL_LIBS)
|
||||
target_link_libraries(npcomp_torch_type_dispatch_bindings
|
||||
NPCOMPATenDialect
|
||||
${TORCH_LIBRARIES}
|
||||
${mlir_libs}
|
||||
${PYTHON_LIBRARIES}
|
||||
torch_python
|
||||
)
|
|
@ -1,5 +0,0 @@
|
|||
# Type dispatch MLIR capture interface
|
||||
|
||||
This directory contains code related to the legacy ATen "type dispatch"
|
||||
interface (which uses a large table of virtual functions). It is being
|
||||
superceded by the c10 dispatcher mechanism.
|
|
@ -1,192 +0,0 @@
|
|||
//===- aten_mlir_bridge.cpp -------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Structured similarly to code from git@github.com:pytorch/xla.git
|
||||
|
||||
#include "aten_mlir_bridge.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "device.h"
|
||||
#include "tensor_impl.h"
|
||||
|
||||
namespace torch_mlir {
|
||||
namespace bridge {
|
||||
namespace {
|
||||
|
||||
class AtenMLIRDeviceMapper {
|
||||
public:
|
||||
static AtenMLIRDeviceMapper *Get();
|
||||
|
||||
size_t GetDeviceOrdinal(const Device &device) const {
|
||||
auto it = devices_ordinals_.find(device);
|
||||
assert(it != devices_ordinals_.end());
|
||||
return it->second;
|
||||
}
|
||||
|
||||
const Device &GetDeviceFromOrdinal(size_t ordinal) const {
|
||||
return devices_.at(ordinal);
|
||||
}
|
||||
|
||||
private:
|
||||
AtenMLIRDeviceMapper() {
|
||||
std::vector<std::string> local_devices{"mlir:0", "mlir:1", "mlir:2"};
|
||||
for (auto &device_str : local_devices) {
|
||||
devices_.emplace_back(device_str);
|
||||
devices_ordinals_[devices_.back()] = devices_.size() - 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Device> devices_;
|
||||
std::map<Device, size_t> devices_ordinals_;
|
||||
};
|
||||
|
||||
AtenMLIRDeviceMapper *AtenMLIRDeviceMapper::Get() {
|
||||
static AtenMLIRDeviceMapper *device_mapper = new AtenMLIRDeviceMapper();
|
||||
return device_mapper;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
c10::optional<MLIRTensor> TryGetMLIRTensor(const at::Tensor &tensor) {
|
||||
MLIRTensorImpl *impl =
|
||||
dynamic_cast<MLIRTensorImpl *>(tensor.unsafeGetTensorImpl());
|
||||
if (impl == nullptr) {
|
||||
return c10::nullopt;
|
||||
}
|
||||
return impl->tensor();
|
||||
}
|
||||
|
||||
MLIRTensor GetMLIRTensor(const at::Tensor &tensor) {
|
||||
auto xtensor = TryGetMLIRTensor(tensor);
|
||||
assert(xtensor && "Input tensor is not an MLIR tensor");
|
||||
return *xtensor;
|
||||
}
|
||||
|
||||
MLIRTensor GetOrCreateMLIRTensor(const at::Tensor &tensor,
|
||||
const Device &device) {
|
||||
if (!tensor.defined()) {
|
||||
return MLIRTensor();
|
||||
}
|
||||
auto xtensor = TryGetMLIRTensor(tensor);
|
||||
return xtensor ? *xtensor : MLIRTensor::Create(tensor, device);
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> MLIRCreateTensorList(const at::TensorList &tensors) {
|
||||
|
||||
std::vector<at::Tensor> aten_device_tensors(tensors.size());
|
||||
std::vector<MLIRTensor> device_tensors;
|
||||
|
||||
std::vector<bool> to_translate(tensors.size());
|
||||
|
||||
for (size_t i = 0; i < tensors.size(); ++i) {
|
||||
const at::Tensor &tensor = tensors[i];
|
||||
if (tensor.defined()) {
|
||||
auto xtensor = TryGetMLIRTensor(tensor);
|
||||
if (xtensor) {
|
||||
to_translate[i] = true;
|
||||
device_tensors.push_back(*xtensor);
|
||||
} else {
|
||||
aten_device_tensors[i] = tensor;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0, defined_pos = 0; i < tensors.size(); ++i) {
|
||||
if (to_translate[i]) {
|
||||
aten_device_tensors[i] =
|
||||
std::move(device_tensors[defined_pos++].ToTensor());
|
||||
}
|
||||
}
|
||||
return aten_device_tensors;
|
||||
}
|
||||
|
||||
c10::optional<Device> GetMLIRDevice(const at::TensorList &tensors) {
|
||||
for (const auto &tensor : tensors) {
|
||||
auto device = GetMLIRDevice(tensor);
|
||||
if (device) {
|
||||
return device;
|
||||
}
|
||||
}
|
||||
return c10::nullopt;
|
||||
}
|
||||
|
||||
c10::optional<Device> GetMLIRDevice(const at::TensorOptions &tensor_options) {
|
||||
if (!tensor_options.has_device()) {
|
||||
return c10::nullopt;
|
||||
}
|
||||
return GetMLIRDevice(tensor_options.device());
|
||||
}
|
||||
|
||||
c10::optional<Device> GetMLIRDevice(const c10::Device &device) {
|
||||
if (device.type() != at::kXLA) {
|
||||
return c10::nullopt;
|
||||
}
|
||||
return AtenDeviceToMLIRDevice(device);
|
||||
}
|
||||
|
||||
c10::optional<Device> GetMLIRDevice(const at::Tensor &tensor) {
|
||||
auto xtensor = TryGetMLIRTensor(tensor);
|
||||
if (!xtensor) {
|
||||
return c10::nullopt;
|
||||
}
|
||||
return xtensor->GetDevice();
|
||||
}
|
||||
|
||||
Device AtenDeviceToMLIRDevice(const c10::Device &device) {
|
||||
assert(device.type() == at::kXLA);
|
||||
int ordinal = device.has_index() ? device.index() : -1;
|
||||
if (ordinal < 0) {
|
||||
c10::Device current_device = MLIRTensorImpl::GetCurrentAtenDevice();
|
||||
if (current_device.has_index()) {
|
||||
ordinal = current_device.index();
|
||||
}
|
||||
}
|
||||
if (ordinal < 0) {
|
||||
return *GetDefaultDevice();
|
||||
}
|
||||
return AtenMLIRDeviceMapper::Get()->GetDeviceFromOrdinal(ordinal);
|
||||
}
|
||||
|
||||
c10::Device MLIRDeviceToAtenDevice(const Device &device) {
|
||||
// TODO: define our own device and stop hijacking the xla device.
|
||||
return c10::Device(at::kXLA,
|
||||
AtenMLIRDeviceMapper::Get()->GetDeviceOrdinal(device));
|
||||
}
|
||||
|
||||
at::Tensor MLIRToAtenTensor(MLIRTensor device_tensor,
|
||||
const at::TensorOptions &tensor_options) {
|
||||
if (tensor_options.has_device()) {
|
||||
assert(tensor_options.device().type() != at::kXLA);
|
||||
}
|
||||
|
||||
at::Tensor tensor = device_tensor.ToTensor();
|
||||
|
||||
// We need to copy the tensor since it is cached within the MLIRTensor, and
|
||||
// returning it directly might expose it to in place changes.
|
||||
return tensor.to(tensor_options, /*non_blocking=*/false, /*copy=*/true);
|
||||
}
|
||||
|
||||
at::Tensor AtenFromMLIRTensor(MLIRTensor device_tensor) {
|
||||
assert(!device_tensor.is_null());
|
||||
at::Tensor ret =
|
||||
at::Tensor(c10::make_intrusive<MLIRTensorImpl>(std::move(device_tensor)));
|
||||
return ret;
|
||||
}
|
||||
|
||||
at::Tensor CreateMLIRTensor(at::Tensor tensor,
|
||||
const c10::optional<Device> &device) {
|
||||
if (tensor.defined() && device) {
|
||||
MLIRTensor device_tensor = MLIRTensor::Create(std::move(tensor), *device);
|
||||
tensor = AtenFromMLIRTensor(device_tensor);
|
||||
}
|
||||
return tensor;
|
||||
}
|
||||
|
||||
} // namespace bridge
|
||||
} // namespace torch_mlir
|
|
@ -1,61 +0,0 @@
|
|||
//===- aten_mlir_bridge.h ---------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma once
|
||||
|
||||
// Structured similarly to code from git@github.com:pytorch/xla.git
|
||||
|
||||
// This file implements a bridge which moves data back and forth from torch
|
||||
// tensors (at::Tensor) to MLIRTensor, which represents a tensor associated
|
||||
// with our virtual 'MLIR' device.
|
||||
|
||||
#include "device.h"
|
||||
#include "tensor.h"
|
||||
|
||||
#include <ATen/Device.h>
|
||||
#include <ATen/Functions.h>
|
||||
#include <ATen/Tensor.h>
|
||||
|
||||
namespace torch_mlir {
|
||||
namespace bridge {
|
||||
|
||||
c10::optional<MLIRTensor> TryGetMLIRTensor(const at::Tensor &tensor);
|
||||
|
||||
// Return an MLIR tensor that is computed the same way as the given at::Tensor
|
||||
MLIRTensor GetMLIRTensor(const at::Tensor &tensor);
|
||||
|
||||
MLIRTensor GetOrCreateMLIRTensor(const at::Tensor &tensor,
|
||||
const Device &device);
|
||||
|
||||
// Creates a vector of at::Tensor objects extracted from a list of MLIR tensors.
|
||||
std::vector<at::Tensor> MLIRCreateTensorList(const at::TensorList &tensors);
|
||||
|
||||
c10::optional<Device> GetMLIRDevice(const at::TensorList &tensors);
|
||||
|
||||
c10::optional<Device> GetMLIRDevice(const at::TensorOptions &tensor_options);
|
||||
|
||||
c10::optional<Device> GetMLIRDevice(const c10::Device &device);
|
||||
|
||||
c10::optional<Device> GetMLIRDevice(const at::Tensor &tensor);
|
||||
|
||||
Device AtenDeviceToMLIRDevice(const c10::Device &device);
|
||||
|
||||
c10::Device MLIRDeviceToAtenDevice(const Device &device);
|
||||
|
||||
at::Tensor MLIRToAtenTensor(MLIRTensor device_tensor,
|
||||
const at::TensorOptions &tensor_options);
|
||||
|
||||
// Create an Aten tensor with MLIR type id from MLIRTensor
|
||||
at::Tensor AtenFromMLIRTensor(MLIRTensor device_tensor);
|
||||
|
||||
// Creates an MLIR tensor holding the data in tensor, on the given device.
|
||||
at::Tensor CreateMLIRTensor(at::Tensor tensor,
|
||||
const c10::optional<Device> &device);
|
||||
|
||||
} // namespace bridge
|
||||
|
||||
} // namespace torch_mlir
|
|
@ -1,669 +0,0 @@
|
|||
//===- aten_mlir_type.cpp ---------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Structured similarly to code from git@github.com:pytorch/xla.git
|
||||
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#include "aten_mlir_bridge.h"
|
||||
#include "aten_mlir_type.h"
|
||||
#include "aten_mlir_type_default.h"
|
||||
#include "ir.h"
|
||||
#include "tensor_impl.h"
|
||||
#include "torch_util.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#define DEBUG_TYPE "torch_mlir"
|
||||
|
||||
namespace torch_mlir {
|
||||
namespace {
|
||||
|
||||
struct MLIROptions {
|
||||
MLIROptions(const at::TensorOptions &options,
|
||||
c10::optional<Device> device_opt = c10::nullopt,
|
||||
c10::optional<at::ScalarType> scalar_type_opt = c10::nullopt)
|
||||
: device(std::move(device_opt)), scalar_type(std::move(scalar_type_opt)) {
|
||||
if (options.has_device()) {
|
||||
device = bridge::AtenDeviceToMLIRDevice(options.device());
|
||||
}
|
||||
if (options.has_dtype()) {
|
||||
scalar_type = c10::typeMetaToScalarType(options.dtype());
|
||||
}
|
||||
}
|
||||
|
||||
Device get_device() const { return device ? *device : *GetDefaultDevice(); }
|
||||
|
||||
at::ScalarType
|
||||
get_scalar_type(at::ScalarType defval = at::ScalarType::Float) const {
|
||||
return scalar_type ? *scalar_type : defval;
|
||||
}
|
||||
|
||||
c10::optional<Device> device;
|
||||
c10::optional<at::ScalarType> scalar_type;
|
||||
};
|
||||
|
||||
std::tuple<MLIRTensor, MLIRTensor>
|
||||
GetPromotedMLIRTensorsForBinaryOp(const at::Tensor &self,
|
||||
const at::Tensor &other) {
|
||||
// this requires slightly newer than pytorch 1.3.0, disable for now.
|
||||
// at::ScalarType dtype = at::result_type(self, other);
|
||||
MLIRTensor tensor1 = bridge::GetMLIRTensor(self);
|
||||
MLIRTensor tensor2 =
|
||||
bridge::GetOrCreateMLIRTensor(other, tensor1.GetDevice());
|
||||
// tensor1.SetScalarType(dtype);
|
||||
// tensor2.SetScalarType(dtype);
|
||||
return std::make_tuple(tensor1, tensor2);
|
||||
}
|
||||
|
||||
void AtenInitialize() {
|
||||
RegisterAtenTypeFunctions();
|
||||
ir::RegisterAtenIR();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void ATenMLIRType::InitializeAtenBindings() {
|
||||
static std::once_flag once;
|
||||
std::call_once(once, []() { AtenInitialize(); });
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::_adaptive_avg_pool2d(const at::Tensor &self,
|
||||
at::IntArrayRef output_size) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::_adaptive_avg_pool2d(
|
||||
bridge::GetMLIRTensor(self), output_size));
|
||||
}
|
||||
|
||||
at::Tensor
|
||||
ATenMLIRType::_adaptive_avg_pool2d_backward(const at::Tensor &grad_output,
|
||||
const at::Tensor &self) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto grad_output_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
|
||||
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::_adaptive_avg_pool2d_backward(
|
||||
grad_output_tensor, input_tensor));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::add(const at::Tensor &self, const at::Tensor &other,
|
||||
at::Scalar alpha) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::add(std::get<0>(tensors), std::get<1>(tensors), alpha));
|
||||
}
|
||||
|
||||
at::Tensor &ATenMLIRType::add_(at::Tensor &self, const at::Tensor &other,
|
||||
at::Scalar alpha) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
|
||||
auto result = bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::add_(std::get<0>(tensors), std::get<1>(tensors), alpha));
|
||||
MLIRTensorImpl *self_impl =
|
||||
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
|
||||
self_impl->shallow_copy_from(result.getIntrusivePtr());
|
||||
return self;
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::addmm(const at::Tensor &self, const at::Tensor &mat1,
|
||||
const at::Tensor &mat2, at::Scalar beta,
|
||||
at::Scalar alpha) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto tensor = bridge::GetMLIRTensor(self);
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::addmm(
|
||||
tensor, bridge::GetOrCreateMLIRTensor(mat1, tensor.GetDevice()),
|
||||
bridge::GetOrCreateMLIRTensor(mat2, tensor.GetDevice()), beta, alpha));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::as_strided(const at::Tensor &self,
|
||||
at::IntArrayRef size,
|
||||
at::IntArrayRef stride,
|
||||
c10::optional<int64_t> storage_offset) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::as_strided(
|
||||
bridge::GetMLIRTensor(self), size, stride, storage_offset));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::clone(const at::Tensor &self) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::clone(bridge::GetMLIRTensor(self)));
|
||||
}
|
||||
|
||||
at::Tensor &ATenMLIRType::copy_(at::Tensor &self, const at::Tensor &src,
|
||||
bool non_blocking) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
|
||||
auto self_tensor = bridge::TryGetMLIRTensor(self);
|
||||
auto src_tensor = bridge::TryGetMLIRTensor(src);
|
||||
|
||||
if (!src_tensor) {
|
||||
assert(self_tensor);
|
||||
self_tensor->SetTensor(util::CopyTensor(src, self.scalar_type()));
|
||||
} else if (!self_tensor) {
|
||||
at::Tensor t = src_tensor->ToTensor();
|
||||
const_cast<at::Tensor &>(self).unsafeGetTensorImpl()->shallow_copy_from(
|
||||
t.getIntrusivePtr());
|
||||
} else {
|
||||
MLIRTensor::copy_(*self_tensor, *src_tensor);
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::_copy_from(const at::Tensor &self,
|
||||
const at::Tensor &dst, bool non_blocking) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
|
||||
std::vector<at::Tensor> tensors = {self};
|
||||
auto device_tensors = bridge::MLIRCreateTensorList(tensors);
|
||||
// Hack in an overwrite of a const tensor.
|
||||
at::Tensor t = util::CopyTensor(device_tensors.front(), dst.scalar_type());
|
||||
const_cast<at::Tensor &>(dst).unsafeGetTensorImpl()->shallow_copy_from(
|
||||
t.getIntrusivePtr());
|
||||
return dst;
|
||||
}
|
||||
|
||||
std::tuple<at::Tensor, at::Tensor, at::Tensor>
|
||||
ATenMLIRType::convolution_backward_overrideable(
|
||||
const at::Tensor &grad_output, const at::Tensor &input,
|
||||
const at::Tensor &weight, at::IntArrayRef stride, at::IntArrayRef padding,
|
||||
at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding,
|
||||
int64_t groups, std::array<bool, 3> output_mask) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(input);
|
||||
auto weight_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice());
|
||||
auto grad_output_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
|
||||
|
||||
auto ret = MLIRTensor::convolution_backward(
|
||||
grad_output_tensor, input_tensor, weight_tensor, stride, padding,
|
||||
dilation, transposed, output_padding, groups, output_mask);
|
||||
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
|
||||
bridge::AtenFromMLIRTensor(std::get<1>(ret)),
|
||||
bridge::AtenFromMLIRTensor(std::get<2>(ret)));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::convolution_overrideable(
|
||||
const at::Tensor &input, const at::Tensor &weight, const at::Tensor &bias,
|
||||
at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation,
|
||||
bool transposed, at::IntArrayRef output_padding, int64_t groups) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(input);
|
||||
auto weight_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice());
|
||||
|
||||
auto bias_tensor =
|
||||
bias.defined()
|
||||
? bridge::GetOrCreateMLIRTensor(bias, input_tensor.GetDevice())
|
||||
: bridge::GetOrCreateMLIRTensor(
|
||||
at::zeros(at::IntArrayRef{weight.sizes()[0]}),
|
||||
input_tensor.GetDevice());
|
||||
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::convolution(
|
||||
input_tensor, weight_tensor, bias_tensor, stride, padding, dilation,
|
||||
transposed, output_padding, groups));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::div(const at::Tensor &self, at::Scalar other) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::div(input_tensor, other));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::div(const at::Tensor &self, const at::Tensor &other) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::div(std::get<0>(tensors), std::get<1>(tensors)));
|
||||
}
|
||||
|
||||
at::Tensor &ATenMLIRType::div_(at::Tensor &self, const at::Tensor &other) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
|
||||
auto result = bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::div_(std::get<0>(tensors), std::get<1>(tensors)));
|
||||
MLIRTensorImpl *self_impl =
|
||||
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
|
||||
self_impl->shallow_copy_from(result.getIntrusivePtr());
|
||||
return self;
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::expand(const at::Tensor &self, at::IntArrayRef size,
|
||||
bool implicit) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::expand(input_tensor, size, implicit));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::gather(const at::Tensor &self, int64_t dim,
|
||||
const at::Tensor &index, bool sparse_grad) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto index_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(index, input_tensor.GetDevice());
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::gather(input_tensor, dim, index_tensor, sparse_grad));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::hardtanh(const at::Tensor &self, at::Scalar min_val,
|
||||
at::Scalar max_val) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto result = bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::hardtanh(input_tensor, min_val, max_val));
|
||||
MLIRTensorImpl *self_impl =
|
||||
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
|
||||
self_impl->shallow_copy_from(result.getIntrusivePtr());
|
||||
return self;
|
||||
}
|
||||
|
||||
at::Tensor &ATenMLIRType::hardtanh_(at::Tensor &self, at::Scalar min_val,
|
||||
at::Scalar max_val) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto result = bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::hardtanh_(input_tensor, min_val, max_val));
|
||||
MLIRTensorImpl *self_impl =
|
||||
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
|
||||
self_impl->shallow_copy_from(result.getIntrusivePtr());
|
||||
return self;
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::hardtanh_backward(const at::Tensor &grad_output,
|
||||
const at::Tensor &self,
|
||||
at::Scalar min_val,
|
||||
at::Scalar max_val) {
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto grad_output_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::hardtanh_backward(
|
||||
grad_output_tensor, input_tensor, min_val, max_val));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::_log_softmax(const at::Tensor &self, int64_t dim,
|
||||
bool half_to_float) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::_log_softmax(input_tensor, dim, half_to_float));
|
||||
}
|
||||
|
||||
at::Tensor
|
||||
ATenMLIRType::_log_softmax_backward_data(const at::Tensor &grad_output,
|
||||
const at::Tensor &output, int64_t dim,
|
||||
const at::Tensor &self) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto output_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(output, input_tensor.GetDevice());
|
||||
auto grad_output_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::_log_softmax_backward_data(
|
||||
grad_output_tensor, output_tensor, dim, input_tensor));
|
||||
}
|
||||
|
||||
std::tuple<at::Tensor, at::Tensor> ATenMLIRType::max_pool2d_with_indices(
|
||||
const at::Tensor &self, at::IntArrayRef kernel_size, at::IntArrayRef stride,
|
||||
at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto ret = MLIRTensor::max_pool2d_with_indices(
|
||||
input_tensor, kernel_size, stride, padding, dilation, ceil_mode);
|
||||
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
|
||||
bridge::AtenFromMLIRTensor(std::get<1>(ret)));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::max_pool2d_with_indices_backward(
|
||||
const at::Tensor &grad_output, const at::Tensor &self,
|
||||
at::IntArrayRef kernel_size, at::IntArrayRef stride,
|
||||
at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode,
|
||||
const at::Tensor &indices) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto grad_output_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
|
||||
auto indices_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(indices, input_tensor.GetDevice());
|
||||
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::max_pool2d_with_indices_backward(
|
||||
grad_output_tensor, input_tensor, kernel_size, stride, padding,
|
||||
dilation, ceil_mode, indices_tensor));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::mean(const at::Tensor &self,
|
||||
c10::optional<at::ScalarType> dtype) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::mean(bridge::GetMLIRTensor(self), dtype));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::mean(const at::Tensor &self, at::IntArrayRef dim,
|
||||
bool keepdim,
|
||||
c10::optional<at::ScalarType> dtype) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::mean(bridge::GetMLIRTensor(self), dim, keepdim, dtype));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::mm(const at::Tensor &input, const at::Tensor &mat2) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(input);
|
||||
auto mat2_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(mat2, input_tensor.GetDevice());
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::mm(input_tensor, mat2_tensor));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::mul(const at::Tensor &self, const at::Tensor &other) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::mul(std::get<0>(tensors), std::get<1>(tensors)));
|
||||
}
|
||||
|
||||
at::Tensor &ATenMLIRType::mul_(at::Tensor &self, const at::Tensor &other) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
|
||||
auto result = bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::mul_(std::get<0>(tensors), std::get<1>(tensors)));
|
||||
MLIRTensorImpl *self_impl =
|
||||
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
|
||||
self_impl->shallow_copy_from(result.getIntrusivePtr());
|
||||
return self;
|
||||
}
|
||||
|
||||
std::tuple<at::Tensor, at::Tensor, at::Tensor> ATenMLIRType::native_batch_norm(
|
||||
const at::Tensor &input, const at::Tensor &weight, const at::Tensor &bias,
|
||||
const at::Tensor &running_mean, const at::Tensor &running_var,
|
||||
bool training, double momentum, double eps) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(input);
|
||||
auto weight_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice());
|
||||
auto bias_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(bias, input_tensor.GetDevice());
|
||||
auto running_mean_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(running_mean, input_tensor.GetDevice());
|
||||
auto running_var_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(running_var, input_tensor.GetDevice());
|
||||
|
||||
auto ret = MLIRTensor::native_batch_norm(
|
||||
input_tensor, weight_tensor, bias_tensor, running_mean_tensor,
|
||||
running_var_tensor, training, momentum, eps);
|
||||
|
||||
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
|
||||
bridge::AtenFromMLIRTensor(std::get<1>(ret)),
|
||||
bridge::AtenFromMLIRTensor(std::get<2>(ret)));
|
||||
}
|
||||
|
||||
std::tuple<at::Tensor, at::Tensor, at::Tensor>
|
||||
ATenMLIRType::native_batch_norm_backward(
|
||||
const at::Tensor &grad_out, const at::Tensor &input,
|
||||
const at::Tensor &weight, const at::Tensor &running_mean,
|
||||
const at::Tensor &running_var, const at::Tensor &save_mean,
|
||||
const at::Tensor &save_invstd, bool train, double eps,
|
||||
std::array<bool, 3> output_mask) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(input);
|
||||
auto grad_out_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(grad_out, input_tensor.GetDevice());
|
||||
auto weight_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice());
|
||||
auto running_mean_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(running_mean, input_tensor.GetDevice());
|
||||
auto running_var_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(running_var, input_tensor.GetDevice());
|
||||
auto save_mean_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(save_mean, input_tensor.GetDevice());
|
||||
auto save_invstd_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(save_invstd, input_tensor.GetDevice());
|
||||
|
||||
auto ret = MLIRTensor::native_batch_norm_backward(
|
||||
grad_out_tensor, input_tensor, weight_tensor, running_mean_tensor,
|
||||
running_var_tensor, save_mean_tensor, save_invstd_tensor, train, eps,
|
||||
output_mask);
|
||||
|
||||
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
|
||||
bridge::AtenFromMLIRTensor(std::get<1>(ret)),
|
||||
bridge::AtenFromMLIRTensor(std::get<2>(ret)));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::neg(const at::Tensor &self) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::neg(input_tensor));
|
||||
}
|
||||
|
||||
std::tuple<at::Tensor, at::Tensor> ATenMLIRType::nll_loss2d_forward(
|
||||
const at::Tensor &self, const at::Tensor &target, const at::Tensor &weight,
|
||||
int64_t reduction, int64_t ignore_index) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto target_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(target, input_tensor.GetDevice());
|
||||
|
||||
auto weight_tensor =
|
||||
weight.defined()
|
||||
? bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice())
|
||||
: bridge::GetOrCreateMLIRTensor(at::ones(self.sizes()[1]),
|
||||
input_tensor.GetDevice());
|
||||
|
||||
auto ret = MLIRTensor::nll_loss2d_forward(
|
||||
input_tensor, target_tensor, weight_tensor, reduction, ignore_index);
|
||||
|
||||
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
|
||||
bridge::AtenFromMLIRTensor(std::get<1>(ret)));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::nll_loss2d_backward(
|
||||
const at::Tensor &grad_output, const at::Tensor &self,
|
||||
const at::Tensor &target, const at::Tensor &weight, int64_t reduction,
|
||||
int64_t ignore_index, const at::Tensor &total_weight) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto grad_output_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
|
||||
auto target_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(target, input_tensor.GetDevice());
|
||||
|
||||
auto weight_tensor =
|
||||
weight.defined()
|
||||
? bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice())
|
||||
: bridge::GetOrCreateMLIRTensor(at::ones(self.sizes()[1]),
|
||||
input_tensor.GetDevice());
|
||||
auto total_weight_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(total_weight, input_tensor.GetDevice());
|
||||
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::nll_loss2d_backward(
|
||||
grad_output_tensor, input_tensor, target_tensor, weight_tensor, reduction,
|
||||
ignore_index, total_weight_tensor));
|
||||
}
|
||||
|
||||
std::tuple<at::Tensor, at::Tensor>
|
||||
ATenMLIRType::nll_loss_forward(const at::Tensor &self, const at::Tensor &target,
|
||||
const at::Tensor &weight, int64_t reduction,
|
||||
int64_t ignore_index) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto target_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(target, input_tensor.GetDevice());
|
||||
|
||||
auto weight_tensor =
|
||||
weight.defined()
|
||||
? bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice())
|
||||
: bridge::GetOrCreateMLIRTensor(at::ones(self.sizes()[1]),
|
||||
input_tensor.GetDevice());
|
||||
|
||||
auto ret = MLIRTensor::nll_loss_forward(
|
||||
input_tensor, target_tensor, weight_tensor, reduction, ignore_index);
|
||||
|
||||
return std::make_tuple(bridge::AtenFromMLIRTensor(std::get<0>(ret)),
|
||||
bridge::AtenFromMLIRTensor(std::get<1>(ret)));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::nll_loss_backward(
|
||||
const at::Tensor &grad_output, const at::Tensor &self,
|
||||
const at::Tensor &target, const at::Tensor &weight, int64_t reduction,
|
||||
int64_t ignore_index, const at::Tensor &total_weight) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto grad_output_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
|
||||
auto target_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(target, input_tensor.GetDevice());
|
||||
|
||||
auto weight_tensor =
|
||||
weight.defined()
|
||||
? bridge::GetOrCreateMLIRTensor(weight, input_tensor.GetDevice())
|
||||
: bridge::GetOrCreateMLIRTensor(at::ones(self.sizes()[1]),
|
||||
input_tensor.GetDevice());
|
||||
auto total_weight_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(total_weight, input_tensor.GetDevice());
|
||||
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::nll_loss_backward(
|
||||
grad_output_tensor, input_tensor, target_tensor, weight_tensor, reduction,
|
||||
ignore_index, total_weight_tensor));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::relu(const at::Tensor &self) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::relu(bridge::GetMLIRTensor(self)));
|
||||
}
|
||||
|
||||
at::Tensor &ATenMLIRType::relu_(at::Tensor &self) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto result = bridge::AtenFromMLIRTensor(MLIRTensor::relu_(input_tensor));
|
||||
MLIRTensorImpl *self_impl =
|
||||
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
|
||||
self_impl->shallow_copy_from(result.getIntrusivePtr());
|
||||
return self;
|
||||
}
|
||||
|
||||
int64_t ATenMLIRType::size(const at::Tensor &self, int64_t dim) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::GetMLIRTensor(self).sizes()[dim];
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::squeeze(const at::Tensor &self, int64_t dim) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::squeeze(bridge::GetMLIRTensor(self), dim));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::sub(const at::Tensor &self, const at::Tensor &other,
|
||||
at::Scalar alpha) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::sub(std::get<0>(tensors), std::get<1>(tensors), alpha));
|
||||
}
|
||||
|
||||
at::Tensor &ATenMLIRType::sub_(at::Tensor &self, const at::Tensor &other,
|
||||
at::Scalar alpha) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto tensors = GetPromotedMLIRTensorsForBinaryOp(self, other);
|
||||
auto result = bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::sub_(std::get<0>(tensors), std::get<1>(tensors), alpha));
|
||||
MLIRTensorImpl *self_impl =
|
||||
dynamic_cast<MLIRTensorImpl *>(self.unsafeGetTensorImpl());
|
||||
self_impl->shallow_copy_from(result.getIntrusivePtr());
|
||||
return self;
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::sum(const at::Tensor &self, at::IntArrayRef dim,
|
||||
bool keepdim,
|
||||
c10::optional<at::ScalarType> dtype) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::sum(bridge::GetMLIRTensor(self), dim, keepdim, dtype));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::t(const at::Tensor &self) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::t(bridge::GetMLIRTensor(self)));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::threshold_backward(const at::Tensor &grad_output,
|
||||
const at::Tensor &self,
|
||||
at::Scalar threshold) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
auto input_tensor = bridge::GetMLIRTensor(self);
|
||||
auto grad_output_tensor =
|
||||
bridge::GetOrCreateMLIRTensor(grad_output, input_tensor.GetDevice());
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::threshold_backward(
|
||||
grad_output_tensor, input_tensor, threshold));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::to(const at::Tensor &self,
|
||||
const at::TensorOptions &options,
|
||||
bool /* non_blocking */, bool /* copy */) {
|
||||
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
|
||||
auto self_tensor = bridge::TryGetMLIRTensor(self);
|
||||
if (!self_tensor) {
|
||||
assert(options.has_device());
|
||||
at::ScalarType dtype = options.has_dtype()
|
||||
? c10::typeMetaToScalarType(options.dtype())
|
||||
: self.scalar_type();
|
||||
MLIRTensor xtensor =
|
||||
MLIRTensor::Create(util::CopyTensor(self, dtype),
|
||||
bridge::AtenDeviceToMLIRDevice(options.device()));
|
||||
return bridge::AtenFromMLIRTensor(xtensor);
|
||||
}
|
||||
if (options.has_device() && options.device().type() != at::kXLA) {
|
||||
return bridge::MLIRToAtenTensor(*self_tensor, options);
|
||||
}
|
||||
MLIROptions mlir_options(options, self_tensor->GetDevice(),
|
||||
self_tensor->dtype());
|
||||
return bridge::AtenFromMLIRTensor(MLIRTensor::to(
|
||||
*self_tensor, mlir_options.device, mlir_options.scalar_type));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::to(const at::Tensor &self, c10::Device device,
|
||||
at::ScalarType dtype, bool non_blocking,
|
||||
bool copy) {
|
||||
return to(self, self.options().device(device).dtype(dtype), non_blocking,
|
||||
copy);
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::to(const at::Tensor &self, at::ScalarType dtype,
|
||||
bool non_blocking, bool copy) {
|
||||
return to(self, self.options().dtype(dtype), non_blocking, copy);
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::to(const at::Tensor &self, const at::Tensor &other,
|
||||
bool non_blocking, bool copy) {
|
||||
return to(self, other.options(), non_blocking, copy);
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::_unsafe_view(const at::Tensor &self,
|
||||
at::IntArrayRef size) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::view(bridge::GetMLIRTensor(self), size));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::unsqueeze(const at::Tensor &self, int64_t dim) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::unsqueeze(bridge::GetMLIRTensor(self), dim));
|
||||
}
|
||||
|
||||
at::Tensor ATenMLIRType::view(const at::Tensor &self, at::IntArrayRef size) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "ATenMLIRType::" << __func__ << "\n");
|
||||
return bridge::AtenFromMLIRTensor(
|
||||
MLIRTensor::view(bridge::GetMLIRTensor(self), size));
|
||||
}
|
||||
} // namespace torch_mlir
|
|
@ -1,212 +0,0 @@
|
|||
//===- aten_mlir_type.h -----------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Structured similarly to code from git@github.com:pytorch/xla.git
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ATen/Tensor.h>
|
||||
|
||||
namespace torch_mlir {
|
||||
|
||||
// Base ATEN Type class where the MLIR specific overrides should be defined.
|
||||
class ATenMLIRType {
|
||||
public:
|
||||
static void InitializeAtenBindings();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// ATEN API overrides in alphabetical order.
|
||||
// Note: The C++ signatures must match the ones listed within the following
|
||||
// pytorch folder file:
|
||||
// build/aten/src/ATen/RegistrationDeclarations.h
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// The static method definitions here have multiple uses. Each function
|
||||
// signature here will override the default implementation provided by
|
||||
// aten_mlir_type_defaults.h. Most of these overrides are used to construct
|
||||
// a small internal IR that can be used for different purposes. Primarily,
|
||||
// in this code, the IR will be converted to MLIR. As such there is a often
|
||||
// a 1:1 correspondance between code here and operations in the ATen MLIR
|
||||
// dialect.
|
||||
|
||||
// This file is parsed by gen_aten_dialect.py to generate
|
||||
// aten_mlir_type_defaults.*, including the appropriate bindings in that
|
||||
// file for all pytorch methods.
|
||||
|
||||
static at::Tensor _adaptive_avg_pool2d(const at::Tensor &self,
|
||||
at::IntArrayRef output_size);
|
||||
|
||||
static at::Tensor _adaptive_avg_pool2d_backward(const at::Tensor &grad_output,
|
||||
const at::Tensor &self);
|
||||
|
||||
static at::Tensor add(const at::Tensor &self, const at::Tensor &other,
|
||||
at::Scalar alpha);
|
||||
|
||||
static at::Tensor &add_(at::Tensor &self, const at::Tensor &other,
|
||||
at::Scalar alpha);
|
||||
|
||||
static at::Tensor addmm(const at::Tensor &self, const at::Tensor &mat1,
|
||||
const at::Tensor &mat2, at::Scalar beta,
|
||||
at::Scalar alpha);
|
||||
|
||||
static at::Tensor as_strided(const at::Tensor &self, at::IntArrayRef size,
|
||||
at::IntArrayRef stride,
|
||||
c10::optional<int64_t> storage_offset);
|
||||
|
||||
static at::Tensor clone(const at::Tensor &self);
|
||||
|
||||
static std::tuple<at::Tensor, at::Tensor, at::Tensor>
|
||||
convolution_backward_overrideable(
|
||||
const at::Tensor &grad_output, const at::Tensor &input,
|
||||
const at::Tensor &weight, at::IntArrayRef stride, at::IntArrayRef padding,
|
||||
at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding,
|
||||
int64_t groups, std::array<bool, 3> output_mask);
|
||||
|
||||
static at::Tensor convolution_overrideable(
|
||||
const at::Tensor &input, const at::Tensor &weight, const at::Tensor &bias,
|
||||
at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation,
|
||||
bool transposed, at::IntArrayRef output_padding, int64_t groups);
|
||||
|
||||
static at::Tensor ©_(at::Tensor &self, const at::Tensor &src,
|
||||
bool non_blocking);
|
||||
|
||||
static at::Tensor _copy_from(const at::Tensor &self, const at::Tensor &dst,
|
||||
bool non_blocking);
|
||||
|
||||
static at::Tensor div(const at::Tensor &self, const at::Tensor &other);
|
||||
|
||||
static at::Tensor &div_(at::Tensor &self, const at::Tensor &other);
|
||||
|
||||
static at::Tensor div(const at::Tensor &self, at::Scalar other);
|
||||
|
||||
static at::Tensor expand(const at::Tensor &self, at::IntArrayRef size,
|
||||
bool implicit);
|
||||
|
||||
static at::Tensor gather(const at::Tensor &self, int64_t dim,
|
||||
const at::Tensor &index, bool sparse_grad);
|
||||
|
||||
static at::Tensor hardtanh(const at::Tensor &self, at::Scalar min_val,
|
||||
at::Scalar max_val);
|
||||
|
||||
static at::Tensor &hardtanh_(at::Tensor &self, at::Scalar min_val,
|
||||
at::Scalar max_val);
|
||||
|
||||
static at::Tensor hardtanh_backward(const at::Tensor &grad_output,
|
||||
const at::Tensor &self,
|
||||
at::Scalar min_val, at::Scalar max_val);
|
||||
|
||||
static at::Tensor _log_softmax(const at::Tensor &self, int64_t dim,
|
||||
bool half_to_float);
|
||||
|
||||
static at::Tensor _log_softmax_backward_data(const at::Tensor &grad_output,
|
||||
const at::Tensor &output,
|
||||
int64_t dim,
|
||||
const at::Tensor &self);
|
||||
|
||||
static std::tuple<at::Tensor, at::Tensor>
|
||||
max_pool2d_with_indices(const at::Tensor &self, at::IntArrayRef kernel_size,
|
||||
at::IntArrayRef stride, at::IntArrayRef padding,
|
||||
at::IntArrayRef dilation, bool ceil_mode);
|
||||
|
||||
static at::Tensor max_pool2d_with_indices_backward(
|
||||
const at::Tensor &grad_output, const at::Tensor &self,
|
||||
at::IntArrayRef kernel_size, at::IntArrayRef stride,
|
||||
at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode,
|
||||
const at::Tensor &indices);
|
||||
|
||||
static at::Tensor mean(const at::Tensor &self,
|
||||
c10::optional<at::ScalarType> dtype);
|
||||
|
||||
static at::Tensor mean(const at::Tensor &self, at::IntArrayRef dim,
|
||||
bool keepdim, c10::optional<at::ScalarType> dtype);
|
||||
|
||||
static at::Tensor mm(const at::Tensor &self, const at::Tensor &mat2);
|
||||
|
||||
static at::Tensor mul(const at::Tensor &self, const at::Tensor &other);
|
||||
|
||||
static at::Tensor &mul_(at::Tensor &self, const at::Tensor &other);
|
||||
|
||||
static std::tuple<at::Tensor, at::Tensor, at::Tensor>
|
||||
native_batch_norm(const at::Tensor &input, const at::Tensor &weight,
|
||||
const at::Tensor &bias, const at::Tensor &running_mean,
|
||||
const at::Tensor &running_var, bool training,
|
||||
double momentum, double eps);
|
||||
|
||||
static std::tuple<at::Tensor, at::Tensor, at::Tensor>
|
||||
native_batch_norm_backward(const at::Tensor &grad_out,
|
||||
const at::Tensor &input, const at::Tensor &weight,
|
||||
const at::Tensor &running_mean,
|
||||
const at::Tensor &running_var,
|
||||
const at::Tensor &save_mean,
|
||||
const at::Tensor &save_invstd, bool train,
|
||||
double eps, std::array<bool, 3> output_mask);
|
||||
|
||||
static at::Tensor neg(const at::Tensor &self);
|
||||
|
||||
static std::tuple<at::Tensor, at::Tensor>
|
||||
nll_loss2d_forward(const at::Tensor &self, const at::Tensor &target,
|
||||
const at::Tensor &weight, int64_t reduction,
|
||||
int64_t ignore_index);
|
||||
|
||||
static at::Tensor nll_loss2d_backward(const at::Tensor &grad_output,
|
||||
const at::Tensor &self,
|
||||
const at::Tensor &target,
|
||||
const at::Tensor &weight,
|
||||
int64_t reduction, int64_t ignore_index,
|
||||
const at::Tensor &total_weight);
|
||||
|
||||
static std::tuple<at::Tensor, at::Tensor>
|
||||
nll_loss_forward(const at::Tensor &self, const at::Tensor &target,
|
||||
const at::Tensor &weight, int64_t reduction,
|
||||
int64_t ignore_index);
|
||||
|
||||
static at::Tensor nll_loss_backward(const at::Tensor &grad_output,
|
||||
const at::Tensor &self,
|
||||
const at::Tensor &target,
|
||||
const at::Tensor &weight,
|
||||
int64_t reduction, int64_t ignore_index,
|
||||
const at::Tensor &total_weight);
|
||||
|
||||
static at::Tensor relu(const at::Tensor &self);
|
||||
|
||||
static at::Tensor &relu_(at::Tensor &self);
|
||||
|
||||
static int64_t size(const at::Tensor &self, int64_t dim);
|
||||
|
||||
static at::Tensor squeeze(const at::Tensor &self, int64_t dim);
|
||||
|
||||
static at::Tensor sub(const at::Tensor &self, const at::Tensor &other,
|
||||
at::Scalar alpha);
|
||||
|
||||
static at::Tensor &sub_(at::Tensor &self, const at::Tensor &other,
|
||||
at::Scalar alpha);
|
||||
|
||||
static at::Tensor sum(const at::Tensor &self, at::IntArrayRef dim,
|
||||
bool keepdim, c10::optional<at::ScalarType> dtype);
|
||||
|
||||
static at::Tensor t(const at::Tensor &self);
|
||||
|
||||
static at::Tensor threshold_backward(const at::Tensor &grad_output,
|
||||
const at::Tensor &self,
|
||||
at::Scalar threshold);
|
||||
|
||||
static at::Tensor to(const at::Tensor &self, const at::TensorOptions &options,
|
||||
bool non_blocking, bool copy);
|
||||
static at::Tensor to(const at::Tensor &self, c10::Device device,
|
||||
at::ScalarType dtype, bool non_blocking, bool copy);
|
||||
static at::Tensor to(const at::Tensor &self, at::ScalarType dtype,
|
||||
bool non_blocking, bool copy);
|
||||
static at::Tensor to(const at::Tensor &self, const at::Tensor &other,
|
||||
bool non_blocking, bool copy);
|
||||
|
||||
static at::Tensor _unsafe_view(const at::Tensor &self, at::IntArrayRef size);
|
||||
|
||||
static at::Tensor unsqueeze(const at::Tensor &self, int64_t dim);
|
||||
|
||||
static at::Tensor view(const at::Tensor &self, at::IntArrayRef size);
|
||||
};
|
||||
|
||||
} // namespace torch_mlir
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,67 +0,0 @@
|
|||
//===- device.cpp -----------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Structured similarly to code from git@github.com:pytorch/xla.git
|
||||
|
||||
#include "device.h"
|
||||
|
||||
namespace torch_mlir {
|
||||
namespace {
|
||||
|
||||
std::string DeviceTypeToString(DeviceType hw_type) {
|
||||
switch (hw_type) {
|
||||
case DeviceType::CPU:
|
||||
return "CPU";
|
||||
case DeviceType::MLIR:
|
||||
return "MLIR";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
void ParseDevice(const std::string &device_spec, Device *device) {
|
||||
if (device_spec.empty()) {
|
||||
return ParseDevice(std::string("mlir:0"), device);
|
||||
}
|
||||
|
||||
if (device_spec[0] == ':') {
|
||||
return ParseDevice(std::string("mlir") + device_spec, device);
|
||||
}
|
||||
|
||||
auto pos = device_spec.find(':');
|
||||
auto devtype = device_spec.substr(0, pos);
|
||||
|
||||
// TODO error check
|
||||
|
||||
device->ordinal =
|
||||
std::stoi(device_spec.substr(pos + 1, device_spec.size() - pos - 1));
|
||||
if (devtype == "MLIR") {
|
||||
device->hw_type = DeviceType::MLIR;
|
||||
} else if (devtype == "CPU") {
|
||||
device->hw_type = DeviceType::CPU;
|
||||
} else {
|
||||
// TODO, error
|
||||
device->hw_type = DeviceType::MLIR;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Device::Device(const std::string &device_spec) {
|
||||
ParseDevice(device_spec, this);
|
||||
}
|
||||
|
||||
std::string Device::ToString() const {
|
||||
return DeviceTypeToString(hw_type) + std::string(":") +
|
||||
std::to_string(ordinal);
|
||||
}
|
||||
|
||||
const Device *GetDefaultDevice() {
|
||||
static const Device *default_device = new Device("");
|
||||
return default_device;
|
||||
}
|
||||
|
||||
} // namespace torch_mlir
|
|
@ -1,59 +0,0 @@
|
|||
//===- device.h -------------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Structured similarly to code from git@github.com:pytorch/xla.git
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
namespace torch_mlir {
|
||||
|
||||
enum class DeviceType { CPU, MLIR };
|
||||
|
||||
/// Model a pytorch device, which determines the location of a buffer in
|
||||
/// pytorch.
|
||||
struct Device {
|
||||
Device() = default;
|
||||
explicit Device(const std::string &device_spec);
|
||||
Device(DeviceType hw_type, int ordinal)
|
||||
: hw_type(hw_type), ordinal(ordinal) {}
|
||||
|
||||
bool operator==(const Device &other) const { return compare(other) == 0; }
|
||||
|
||||
bool operator!=(const Device &other) const { return compare(other) != 0; }
|
||||
|
||||
bool operator<(const Device &rhs) const { return compare(rhs) < 0; }
|
||||
|
||||
int compare(const Device &rhs) const {
|
||||
if (hw_type != rhs.hw_type) {
|
||||
return hw_type < rhs.hw_type ? -1 : +1;
|
||||
}
|
||||
return ordinal < rhs.ordinal ? -1 : (ordinal > rhs.ordinal ? +1 : 0);
|
||||
}
|
||||
|
||||
std::string ToString() const;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const Device &device) {
|
||||
os << device.ToString();
|
||||
return os;
|
||||
}
|
||||
|
||||
size_t hash() const { return std::hash<std::string>{}(ToString()); }
|
||||
|
||||
DeviceType hw_type = DeviceType::CPU;
|
||||
int ordinal = 0;
|
||||
};
|
||||
|
||||
const Device *GetDefaultDevice();
|
||||
|
||||
static inline const Device &GetDeviceOrDefault(const Device *device) {
|
||||
return device != nullptr ? *device : *GetDefaultDevice();
|
||||
}
|
||||
|
||||
} // namespace torch_mlir
|
File diff suppressed because it is too large
Load Diff
|
@ -1,920 +0,0 @@
|
|||
//===- ir.h -----------------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma once
|
||||
|
||||
// This file defines an intermediate IR generated from a pytorch model.
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
namespace mlir {
|
||||
class OpBuilder;
|
||||
class Value;
|
||||
class Operation;
|
||||
class MLIRContext;
|
||||
} // namespace mlir
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include <ATen/Tensor.h>
|
||||
#include <ATen/core/interned_strings.h>
|
||||
#include <c10/core/Scalar.h>
|
||||
#include <c10/util/ArrayRef.h>
|
||||
|
||||
namespace torch_mlir {
|
||||
namespace ir {
|
||||
|
||||
class Node;
|
||||
|
||||
void RegisterAtenIR();
|
||||
|
||||
using NodePtr = std::shared_ptr<Node>;
|
||||
|
||||
struct Value {
|
||||
Value() = default;
|
||||
Value(NodePtr node, size_t index = 0) : node(std::move(node)), index(index) {}
|
||||
|
||||
operator bool() const { return node != nullptr; }
|
||||
|
||||
bool operator==(const Value &rhs) const {
|
||||
return node == rhs.node && index == rhs.index;
|
||||
}
|
||||
|
||||
bool operator<(const Value &rhs) const {
|
||||
if (node == rhs.node)
|
||||
return index < rhs.index;
|
||||
return node < rhs.node;
|
||||
}
|
||||
|
||||
std::vector<int64_t> sizes() const;
|
||||
std::vector<int64_t> strides() const;
|
||||
|
||||
NodePtr node;
|
||||
size_t index = 0;
|
||||
};
|
||||
|
||||
struct OpKind {
|
||||
OpKind() = default;
|
||||
explicit OpKind(c10::Symbol op) : op(std::move(op)) {}
|
||||
|
||||
bool operator==(const OpKind &rhs) const { return op == rhs.op; }
|
||||
bool operator!=(const OpKind &rhs) const { return !operator==(rhs); }
|
||||
bool operator<(const OpKind &rhs) const {
|
||||
return c10::unique_t(op) < c10::unique_t(rhs.op);
|
||||
}
|
||||
|
||||
// size_t hash() const;
|
||||
|
||||
std::string ToString() const { return op.toQualString(); }
|
||||
|
||||
static OpKind Get(const std::string &name) {
|
||||
return OpKind(c10::Symbol::fromQualString(name));
|
||||
}
|
||||
|
||||
c10::Symbol op;
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &stream, const OpKind &op) {
|
||||
stream << op.ToString();
|
||||
return stream;
|
||||
}
|
||||
|
||||
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &stream,
|
||||
const OpKind &op) {
|
||||
stream << op.ToString();
|
||||
return stream;
|
||||
}
|
||||
|
||||
using OpList = std::vector<Value>;
|
||||
|
||||
class Node {
|
||||
|
||||
public:
|
||||
Node(OpKind op);
|
||||
Node(OpKind op, OpList operands, std::vector<int64_t> sizes);
|
||||
Node(OpKind op, OpList operands, at::IntArrayRef sizes);
|
||||
|
||||
const OpKind &op() const { return op_; }
|
||||
|
||||
virtual std::vector<int64_t> sizes() const { return sizes_[0]; }
|
||||
virtual std::vector<int64_t> sizes(size_t i) const { return sizes_[0]; }
|
||||
|
||||
virtual std::vector<int64_t> strides() const { return strides(sizes()); }
|
||||
virtual std::vector<int64_t> strides(size_t i) const {
|
||||
return strides(sizes(i));
|
||||
}
|
||||
|
||||
OpList &operands() { return operands_; }
|
||||
Value operand(size_t i) const { return operands_.at(i); }
|
||||
|
||||
virtual mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable);
|
||||
|
||||
private:
|
||||
std::vector<int64_t> strides(std::vector<int64_t> sz) const;
|
||||
|
||||
OpKind op_;
|
||||
OpList operands_;
|
||||
std::array<std::vector<int64_t>, 3> sizes_;
|
||||
// std::array<std::vector<int64_t>, 3> strides_;
|
||||
};
|
||||
|
||||
class ConstantNode : public Node {
|
||||
public:
|
||||
ConstantNode(at::Scalar scalar)
|
||||
: Node(OpKind::Get("aten::constant")), scalar(scalar) {}
|
||||
|
||||
ConstantNode(at::IntArrayRef array)
|
||||
: Node(OpKind::Get("aten::constant")), array(array.begin(), array.end()) {
|
||||
}
|
||||
|
||||
ConstantNode(bool bool_)
|
||||
: Node(OpKind::Get("aten::constant")), bool_(bool_) {}
|
||||
|
||||
ConstantNode(int int_) : Node(OpKind::Get("aten::constant")), int_(int_) {}
|
||||
|
||||
ConstantNode(int64_t int_)
|
||||
: Node(OpKind::Get("aten::constant")), int_(int_) {}
|
||||
|
||||
ConstantNode(float float_)
|
||||
: Node(OpKind::Get("aten::constant")), float_(float_) {}
|
||||
|
||||
ConstantNode(double double_)
|
||||
: Node(OpKind::Get("aten::constant")), double_(double_) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override { return {1}; }
|
||||
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
|
||||
|
||||
private:
|
||||
c10::optional<at::Scalar> scalar;
|
||||
std::vector<int64_t> array;
|
||||
c10::optional<bool> bool_;
|
||||
c10::optional<int> int_;
|
||||
c10::optional<float> float_;
|
||||
c10::optional<double> double_;
|
||||
};
|
||||
|
||||
class AdaptiveAvgPool2dNode : public Node {
|
||||
public:
|
||||
AdaptiveAvgPool2dNode(Value input, at::IntArrayRef kernel_size)
|
||||
: Node(OpKind::Get("aten::_adaptive_avg_pool2d"),
|
||||
OpList{input,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(kernel_size))},
|
||||
std::vector<int64_t>{input.sizes()[0], input.sizes()[1],
|
||||
kernel_size[0], kernel_size[1]}) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class AdaptiveAvgPool2dBackwardNode : public Node {
|
||||
public:
|
||||
AdaptiveAvgPool2dBackwardNode(Value grad_output, Value self)
|
||||
: Node(OpKind::Get("aten::_adaptive_avg_pool2d_backward"),
|
||||
OpList{grad_output, self}, self.sizes()) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class AddNode : public Node {
|
||||
public:
|
||||
AddNode(Value rhs, Value lhs, Value alpha)
|
||||
: Node(OpKind::Get("aten::add"), OpList{rhs, lhs, alpha}, rhs.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class AddInPlaceNode : public Node {
|
||||
public:
|
||||
AddInPlaceNode(Value self, Value other, Value alpha)
|
||||
: Node(OpKind::Get("aten::add_"), OpList{self, other, alpha},
|
||||
self.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class AddmmNode : public Node {
|
||||
public:
|
||||
AddmmNode(Value input, Value mat1, Value mat2, Value beta, Value alpha)
|
||||
: Node(OpKind::Get("aten::addmm"), OpList{input, mat1, mat2, beta, alpha},
|
||||
std::vector<int64_t>{mat1.sizes()[0], mat2.sizes()[1]}){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class AsStridedNode : public Node {
|
||||
public:
|
||||
AsStridedNode(Value input, at::IntArrayRef size, at::IntArrayRef stride,
|
||||
c10::optional<int64_t> storage_offset)
|
||||
: Node(OpKind::Get("aten::as_strided"),
|
||||
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(size)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(stride))},
|
||||
input.sizes()),
|
||||
size(size.begin(), size.end()), stride(stride.begin(), stride.end()),
|
||||
storage_offset(storage_offset) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override;
|
||||
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
|
||||
|
||||
std::vector<int64_t> strides() const override { return stride; }
|
||||
std::vector<int64_t> strides(size_t i) const override { return strides(); }
|
||||
|
||||
std::vector<int64_t> size;
|
||||
std::vector<int64_t> stride;
|
||||
c10::optional<int64_t> storage_offset;
|
||||
};
|
||||
|
||||
class BatchNormNode : public Node {
|
||||
public:
|
||||
BatchNormNode(Value input, Value weight, Value bias, Value running_mean,
|
||||
Value running_var, bool training, double momentum, double eps)
|
||||
: Node(OpKind::Get("aten::native_batch_norm"),
|
||||
OpList{
|
||||
input, weight, bias, running_mean, running_var,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(training)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>((float)momentum)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>((float)eps))},
|
||||
input.sizes()),
|
||||
training(training), momentum(momentum), eps(eps) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
private:
|
||||
bool training;
|
||||
double momentum;
|
||||
double eps;
|
||||
};
|
||||
|
||||
class BatchNormBackwardNode : public Node {
|
||||
public:
|
||||
BatchNormBackwardNode(Value grad_out, Value input, Value weight,
|
||||
Value running_mean, Value running_var, Value save_mean,
|
||||
Value save_invstd, bool train, double eps,
|
||||
std::array<bool, 3> output_mask)
|
||||
: Node(OpKind::Get("aten::native_batch_norm_backward"),
|
||||
OpList{grad_out, input, weight, running_mean, running_var,
|
||||
save_mean, save_invstd,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(train)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>((float)eps))},
|
||||
input.sizes()),
|
||||
train(train), eps(eps), output_mask(output_mask) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override {
|
||||
assert(0 && "Cannot call sizes() for multiple outputs");
|
||||
}
|
||||
std::vector<int64_t> sizes(size_t i) const override;
|
||||
|
||||
private:
|
||||
bool train;
|
||||
double eps;
|
||||
std::array<bool, 3> output_mask;
|
||||
};
|
||||
|
||||
class Conv2dNode : public Node {
|
||||
public:
|
||||
Conv2dNode(Value input, Value weight, Value bias, at::IntArrayRef stride,
|
||||
at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed,
|
||||
at::IntArrayRef output_padding, int64_t groups)
|
||||
: Node(OpKind::Get("aten::_convolution"),
|
||||
OpList{
|
||||
input, weight, bias,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(stride)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(padding)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(dilation)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(transposed)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(output_padding)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(groups))},
|
||||
input.sizes()),
|
||||
stride(stride.begin(), stride.end()),
|
||||
padding(padding.begin(), padding.end()),
|
||||
dilation(dilation.begin(), dilation.end()), transposed(transposed),
|
||||
output_padding(output_padding.begin(), output_padding.end()),
|
||||
groups(groups), has_bias(true) {}
|
||||
|
||||
Conv2dNode(Value input, Value weight, at::IntArrayRef stride,
|
||||
at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed,
|
||||
at::IntArrayRef output_padding, int64_t groups)
|
||||
: Node(OpKind::Get("aten::_convolution"),
|
||||
OpList{
|
||||
input, weight,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(stride)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(padding)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(dilation)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(transposed)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(output_padding)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(groups))},
|
||||
input.sizes()),
|
||||
stride(stride.begin(), stride.end()),
|
||||
padding(padding.begin(), padding.end()),
|
||||
dilation(dilation.begin(), dilation.end()), transposed(transposed),
|
||||
output_padding(output_padding.begin(), output_padding.end()),
|
||||
groups(groups), has_bias(false) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override;
|
||||
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
|
||||
|
||||
private:
|
||||
std::vector<int64_t> stride;
|
||||
std::vector<int64_t> padding;
|
||||
std::vector<int64_t> dilation;
|
||||
bool transposed;
|
||||
std::vector<int64_t> output_padding;
|
||||
int64_t groups;
|
||||
bool has_bias;
|
||||
};
|
||||
|
||||
class Conv2dBackwardNode : public Node {
|
||||
public:
|
||||
Conv2dBackwardNode(Value grad_output, Value input, Value weight,
|
||||
at::IntArrayRef stride, at::IntArrayRef padding,
|
||||
at::IntArrayRef dilation, bool transposed,
|
||||
at::IntArrayRef output_padding, int64_t groups)
|
||||
: Node(OpKind::Get("aten::_convolution_backward"),
|
||||
OpList{
|
||||
grad_output, input, weight,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(stride)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(padding)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(dilation)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(transposed)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(output_padding)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(groups))},
|
||||
input.sizes()),
|
||||
stride(stride.begin(), stride.end()),
|
||||
padding(padding.begin(), padding.end()),
|
||||
dilation(dilation.begin(), dilation.end()), transposed(transposed),
|
||||
output_padding(output_padding.begin(), output_padding.end()),
|
||||
groups(groups) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override {
|
||||
assert(0 && "Cannot call sizes() for multiple outputs");
|
||||
}
|
||||
std::vector<int64_t> sizes(size_t i) const override;
|
||||
|
||||
private:
|
||||
std::vector<int64_t> stride;
|
||||
std::vector<int64_t> padding;
|
||||
std::vector<int64_t> dilation;
|
||||
bool transposed;
|
||||
std::vector<int64_t> output_padding;
|
||||
int64_t groups;
|
||||
};
|
||||
|
||||
class DivNode : public Node {
|
||||
public:
|
||||
DivNode(Value rhs, Value lhs)
|
||||
: Node(OpKind::Get("aten::div"), OpList{rhs, lhs}, rhs.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class DivInPlaceNode : public Node {
|
||||
public:
|
||||
DivInPlaceNode(Value self, Value other)
|
||||
: Node(OpKind::Get("aten::div_"), OpList{self, other}, self.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class ExpandNode : public Node {
|
||||
public:
|
||||
ExpandNode(Value input, at::IntArrayRef size, bool implicit)
|
||||
: Node(OpKind::Get("aten::expand"),
|
||||
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(size)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(implicit))},
|
||||
input.sizes()),
|
||||
output_size(size.begin(), size.end()), implicit(implicit) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override { return output_size; }
|
||||
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
|
||||
|
||||
private:
|
||||
std::vector<int64_t> output_size;
|
||||
bool implicit;
|
||||
};
|
||||
|
||||
class GatherNode : public Node {
|
||||
public:
|
||||
GatherNode(Value input, int64_t dim, Value index, bool sparse_grad)
|
||||
: Node(OpKind::Get("aten::gather"),
|
||||
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim)),
|
||||
index,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(sparse_grad))},
|
||||
input.sizes()) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class HardtanhNode : public Node {
|
||||
public:
|
||||
HardtanhNode(Value self, Value min_val, Value max_val)
|
||||
: Node(OpKind::Get("aten::hardtanh"), OpList{self, min_val, max_val},
|
||||
self.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class HardtanhInPlaceNode : public Node {
|
||||
public:
|
||||
HardtanhInPlaceNode(Value self, Value min_val, Value max_val)
|
||||
: Node(OpKind::Get("aten::hardtanh_"), OpList{self, min_val, max_val},
|
||||
self.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class HardtanhBackwardNode : public Node {
|
||||
public:
|
||||
HardtanhBackwardNode(Value grad_output, Value self, Value min_val,
|
||||
Value max_val)
|
||||
: Node(OpKind::Get("aten::hardtanh_backward"),
|
||||
OpList{grad_output, self, min_val, max_val}, self.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class LogSoftmaxNode : public Node {
|
||||
public:
|
||||
LogSoftmaxNode(Value input, int64_t dim, bool half_to_float)
|
||||
: Node(OpKind::Get("aten::_log_softmax"),
|
||||
OpList{
|
||||
input, ir::Value(std::make_shared<ir::ConstantNode>(dim)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(half_to_float))},
|
||||
input.sizes()),
|
||||
dim(dim), half_to_float(half_to_float) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
private:
|
||||
int64_t dim;
|
||||
bool half_to_float;
|
||||
};
|
||||
|
||||
class LogSoftmaxBackwardNode : public Node {
|
||||
public:
|
||||
LogSoftmaxBackwardNode(Value grad_output, Value output, int64_t dim,
|
||||
Value input)
|
||||
: Node(OpKind::Get("aten::_log_softmax_backward_data"),
|
||||
OpList{grad_output, output,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(dim)), input},
|
||||
input.sizes()),
|
||||
dim(dim) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
private:
|
||||
int64_t dim;
|
||||
};
|
||||
|
||||
class MaxPool2dWithIndicesNode : public Node {
|
||||
public:
|
||||
MaxPool2dWithIndicesNode(Value input, at::IntArrayRef kernel_size,
|
||||
at::IntArrayRef stride, at::IntArrayRef padding,
|
||||
at::IntArrayRef dilation, bool ceil_mode)
|
||||
: Node(OpKind::Get("aten::max_pool2d_with_indices"),
|
||||
OpList{input,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(kernel_size)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(stride)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(padding)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(dilation)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(ceil_mode))},
|
||||
input.sizes()),
|
||||
kernel_size(kernel_size.begin(), kernel_size.end()),
|
||||
stride(stride.begin(), stride.end()),
|
||||
padding(padding.begin(), padding.end()),
|
||||
dilation(dilation.begin(), dilation.end()), ceil_mode(ceil_mode){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override {
|
||||
assert(0 && "Cannot call sizes() for multiple outputs");
|
||||
}
|
||||
std::vector<int64_t> sizes(size_t i) const override;
|
||||
|
||||
private:
|
||||
std::vector<int64_t> kernel_size;
|
||||
std::vector<int64_t> stride;
|
||||
std::vector<int64_t> padding;
|
||||
std::vector<int64_t> dilation;
|
||||
bool ceil_mode;
|
||||
};
|
||||
|
||||
class MaxPool2dWithIndicesBackwardNode : public Node {
|
||||
public:
|
||||
MaxPool2dWithIndicesBackwardNode(Value grad_output, Value input,
|
||||
at::IntArrayRef kernel_size,
|
||||
at::IntArrayRef stride,
|
||||
at::IntArrayRef padding,
|
||||
at::IntArrayRef dilation, bool ceil_mode,
|
||||
Value indices)
|
||||
: Node(OpKind::Get("aten::max_pool2d_with_indices_backward"),
|
||||
OpList{grad_output, input,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(kernel_size)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(stride)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(padding)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(dilation)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(ceil_mode)),
|
||||
indices},
|
||||
input.sizes()),
|
||||
kernel_size(kernel_size.begin(), kernel_size.end()),
|
||||
stride(stride.begin(), stride.end()),
|
||||
padding(padding.begin(), padding.end()),
|
||||
dilation(dilation.begin(), dilation.end()), ceil_mode(ceil_mode){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
private:
|
||||
std::vector<int64_t> kernel_size;
|
||||
std::vector<int64_t> stride;
|
||||
std::vector<int64_t> padding;
|
||||
std::vector<int64_t> dilation;
|
||||
bool ceil_mode;
|
||||
};
|
||||
|
||||
class MeanNode : public Node {
|
||||
public:
|
||||
MeanNode(Value input, at::IntArrayRef dim, bool keepdim,
|
||||
c10::optional<at::ScalarType> dtype)
|
||||
: Node(OpKind::Get("aten::mean"),
|
||||
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(keepdim))},
|
||||
input.sizes()),
|
||||
dim(dim.begin(), dim.end()), keepdim(keepdim), dtype(dtype) {}
|
||||
|
||||
MeanNode(Value input, c10::optional<at::ScalarType> dtype)
|
||||
: Node(OpKind::Get("aten::mean"), OpList{input}, input.sizes()),
|
||||
dtype(dtype) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override;
|
||||
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
|
||||
|
||||
private:
|
||||
std::vector<int64_t> dim;
|
||||
bool keepdim;
|
||||
c10::optional<at::ScalarType> dtype;
|
||||
};
|
||||
|
||||
class MMNode : public Node {
|
||||
public:
|
||||
MMNode(Value input, Value mat2)
|
||||
: Node(OpKind::Get("aten::mm"), OpList{input, mat2},
|
||||
std::vector<int64_t>{input.sizes()[0], mat2.sizes()[1]}){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class MulNode : public Node {
|
||||
public:
|
||||
MulNode(Value rhs, Value lhs)
|
||||
: Node(OpKind::Get("aten::mul"), OpList{rhs, lhs}, rhs.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class MulInPlaceNode : public Node {
|
||||
public:
|
||||
MulInPlaceNode(Value self, Value other)
|
||||
: Node(OpKind::Get("aten::mul_"), OpList{self, other}, self.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class NegNode : public Node {
|
||||
public:
|
||||
NegNode(Value input)
|
||||
: Node(OpKind::Get("aten::neg"), OpList{input}, input.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class NllLoss2dForwardNode : public Node {
|
||||
public:
|
||||
NllLoss2dForwardNode(Value self, Value target, Value weight,
|
||||
int64_t reduction, int64_t ignore_index)
|
||||
: Node(
|
||||
OpKind::Get("aten::nll_loss2d_forward"),
|
||||
OpList{self, target, weight,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(reduction)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(ignore_index))},
|
||||
1 /*target.sizes()*/),
|
||||
reduction(reduction), ignore_index(ignore_index) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
private:
|
||||
int64_t reduction;
|
||||
int64_t ignore_index;
|
||||
};
|
||||
|
||||
class NllLoss2dBackwardNode : public Node {
|
||||
public:
|
||||
NllLoss2dBackwardNode(Value grad_output, Value self, Value target,
|
||||
Value weight, int64_t reduction, int64_t ignore_index,
|
||||
Value total_weight)
|
||||
: Node(OpKind::Get("aten::nll_loss2d_backward"),
|
||||
OpList{grad_output, self, target, weight,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(reduction)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(ignore_index)),
|
||||
total_weight},
|
||||
self.sizes()),
|
||||
reduction(reduction), ignore_index(ignore_index) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
private:
|
||||
int64_t reduction;
|
||||
int64_t ignore_index;
|
||||
};
|
||||
|
||||
class NllLossForwardNode : public Node {
|
||||
public:
|
||||
NllLossForwardNode(Value self, Value target, Value weight, int64_t reduction,
|
||||
int64_t ignore_index)
|
||||
: Node(
|
||||
OpKind::Get("aten::nll_loss_forward"),
|
||||
OpList{self, target, weight,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(reduction)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(ignore_index))},
|
||||
1 /*target.sizes()*/),
|
||||
reduction(reduction), ignore_index(ignore_index) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
private:
|
||||
int64_t reduction;
|
||||
int64_t ignore_index;
|
||||
};
|
||||
|
||||
class NllLossBackwardNode : public Node {
|
||||
public:
|
||||
NllLossBackwardNode(Value grad_output, Value self, Value target, Value weight,
|
||||
int64_t reduction, int64_t ignore_index,
|
||||
Value total_weight)
|
||||
: Node(OpKind::Get("aten::nll_loss_backward"),
|
||||
OpList{grad_output, self, target, weight,
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(reduction)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(ignore_index)),
|
||||
total_weight},
|
||||
self.sizes()),
|
||||
reduction(reduction), ignore_index(ignore_index) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
private:
|
||||
int64_t reduction;
|
||||
int64_t ignore_index;
|
||||
};
|
||||
|
||||
class SumNode : public Node {
|
||||
public:
|
||||
SumNode(Value input, at::IntArrayRef dim, bool keepdim,
|
||||
c10::optional<at::ScalarType> dtype)
|
||||
: Node(OpKind::Get("aten::sum"),
|
||||
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(keepdim))},
|
||||
input.sizes()),
|
||||
dim(dim.begin(), dim.end()), keepdim(keepdim), dtype(dtype) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override;
|
||||
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
|
||||
|
||||
private:
|
||||
std::vector<int64_t> dim;
|
||||
bool keepdim;
|
||||
c10::optional<at::ScalarType> dtype;
|
||||
};
|
||||
|
||||
class ReLUNode : public Node {
|
||||
public:
|
||||
ReLUNode(Value input)
|
||||
: Node(OpKind::Get("aten::relu"), OpList{input}, input.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class ReLUInPlaceNode : public Node {
|
||||
public:
|
||||
ReLUInPlaceNode(Value input)
|
||||
: Node(OpKind::Get("aten::relu_"), OpList{input}, input.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class ThresholdBackwardNode : public Node {
|
||||
public:
|
||||
ThresholdBackwardNode(Value grad_output, Value input, Value threshold)
|
||||
: Node(OpKind::Get("aten::threshold_backward"),
|
||||
OpList{grad_output, input, threshold}, input.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class TransposeNode : public Node {
|
||||
public:
|
||||
TransposeNode(Value input)
|
||||
: Node(OpKind::Get("aten::t"), OpList{input},
|
||||
std::vector<int64_t>{input.sizes()[1], input.sizes()[0]}){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class SizeNode : public Node {
|
||||
public:
|
||||
SizeNode(Value input, int64_t dim)
|
||||
: Node(OpKind::Get("aten::size"),
|
||||
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim))},
|
||||
1),
|
||||
dim(dim) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
private:
|
||||
int64_t dim;
|
||||
};
|
||||
|
||||
class SqueezeNode : public Node {
|
||||
public:
|
||||
SqueezeNode(Value input, int64_t dim)
|
||||
: Node(OpKind::Get("aten::squeeze"),
|
||||
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim))},
|
||||
input.sizes()),
|
||||
dim(dim) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override;
|
||||
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
|
||||
|
||||
private:
|
||||
int64_t dim;
|
||||
};
|
||||
|
||||
class SubNode : public Node {
|
||||
public:
|
||||
SubNode(Value rhs, Value lhs, Value alpha)
|
||||
: Node(OpKind::Get("aten::sub"), OpList{rhs, lhs, alpha}, rhs.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class SubInPlaceNode : public Node {
|
||||
public:
|
||||
SubInPlaceNode(Value self, Value other, Value alpha)
|
||||
: Node(OpKind::Get("aten::sub_"), OpList{self, other, alpha},
|
||||
self.sizes()){};
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
};
|
||||
|
||||
class UnsqueezeNode : public Node {
|
||||
public:
|
||||
UnsqueezeNode(Value input, int64_t dim)
|
||||
: Node(OpKind::Get("aten::unsqueeze"),
|
||||
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(dim))},
|
||||
input.sizes()),
|
||||
dim(dim) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override;
|
||||
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
|
||||
|
||||
private:
|
||||
int64_t dim;
|
||||
};
|
||||
|
||||
class ViewNode : public Node {
|
||||
public:
|
||||
ViewNode(Value input, at::IntArrayRef size)
|
||||
: Node(OpKind::Get("aten::view"),
|
||||
OpList{input, ir::Value(std::make_shared<ir::ConstantNode>(size))},
|
||||
input.sizes()),
|
||||
view_size(size.begin(), size.end()) {}
|
||||
|
||||
mlir::Operation *
|
||||
genMLIR(std::unique_ptr<mlir::OpBuilder> &builder, mlir::MLIRContext &context,
|
||||
std::map<const ir::Value, mlir::Value> &symbolTable) override;
|
||||
|
||||
std::vector<int64_t> sizes() const override;
|
||||
std::vector<int64_t> sizes(size_t i) const override { return sizes(); }
|
||||
|
||||
private:
|
||||
std::vector<int64_t> view_size;
|
||||
};
|
||||
|
||||
class TorchDataNode : public Node {
|
||||
|
||||
public:
|
||||
TorchDataNode(at::Tensor tensor)
|
||||
: Node(ir::OpKind::Get("aten::torch_data"), {}, tensor.sizes()),
|
||||
tensor_(std::move(tensor)) {}
|
||||
|
||||
at::Tensor tensor() { return tensor_; }
|
||||
|
||||
private:
|
||||
at::Tensor tensor_;
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace torch_mlir
|
|
@ -1,333 +0,0 @@
|
|||
//===- jit.cpp --------------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// This file drives the generation and lowering of MLIR, followed by JIT
|
||||
// compiling the resulting LLVM dialect.
|
||||
|
||||
#include "npcomp/Dialect/ATen/ATenDialect.h"
|
||||
#include "npcomp/Dialect/ATen/ATenPasses.h"
|
||||
|
||||
#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
|
||||
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
|
||||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
||||
#include "mlir/ExecutionEngine/ExecutionEngine.h"
|
||||
#include "mlir/ExecutionEngine/JitRunner.h"
|
||||
#include "mlir/ExecutionEngine/OptUtils.h"
|
||||
#include "mlir/IR/Attributes.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/Function.h"
|
||||
#include "mlir/IR/Location.h"
|
||||
#include "mlir/IR/MLIRContext.h"
|
||||
#include "mlir/IR/Module.h"
|
||||
#include "mlir/IR/StandardTypes.h"
|
||||
#include "mlir/IR/Types.h"
|
||||
#include "mlir/IR/Verifier.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "mlir/Pass/PassManager.h"
|
||||
#include "mlir/Target/LLVMIR.h"
|
||||
#include "mlir/Transforms/Passes.h"
|
||||
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
|
||||
#include "ATen/ArrayRef.h"
|
||||
namespace at {
|
||||
template <typename T> using ArrayRef = c10::ArrayRef<T>;
|
||||
}
|
||||
#include "ATen/Tensor.h"
|
||||
#include <ATen/CPUType.h>
|
||||
|
||||
#include "jit.h"
|
||||
#include "mlir_gen.h"
|
||||
#include "tensor.h"
|
||||
#include "torch_util.h"
|
||||
|
||||
#define DEBUG_TYPE "torch_mlir"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
namespace torch_mlir {
|
||||
|
||||
namespace {
|
||||
|
||||
int LowerATenDialect(mlir::ModuleOp module) {
|
||||
PassManager pm0(module.getContext());
|
||||
pm0.addPass(mlir::createCSEPass());
|
||||
|
||||
// Lower to function calls.
|
||||
pm0.addPass(mlir::NPCOMP::aten::createATenLoweringPass());
|
||||
pm0.addPass(mlir::NPCOMP::aten::createReturnEliminationPass());
|
||||
|
||||
if (failed(pm0.run(module))) {
|
||||
llvm::errs() << "aten to loops conversion failed ";
|
||||
return 1;
|
||||
}
|
||||
|
||||
PassManager pm1(module.getContext());
|
||||
pm1.addPass(mlir::createLowerAffinePass());
|
||||
pm1.addPass(mlir::createLowerToCFGPass());
|
||||
pm1.addPass(mlir::createCSEPass());
|
||||
|
||||
if (failed(pm1.run(module))) {
|
||||
llvm::errs() << "loops to std conversion failed ";
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int LowerStdDialect(mlir::ModuleOp module) {
|
||||
PassManager pm(module.getContext());
|
||||
|
||||
struct LowerToLLVMOptions options;
|
||||
options.emitCWrappers = true;
|
||||
LLVM_DEBUG(module.print(llvm::outs()));
|
||||
|
||||
pm.addPass(mlir::createLowerToLLVMPass(options));
|
||||
pm.addPass(mlir::createCSEPass());
|
||||
|
||||
LLVM_DEBUG(module.print(llvm::outs()));
|
||||
|
||||
if (failed(pm.run(module))) {
|
||||
llvm::errs() << "std to llvm conversion failed ";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!module)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename T, int N> struct llvm_tensor_t {
|
||||
T *d;
|
||||
T *aligned;
|
||||
size_t offset;
|
||||
size_t shape[N];
|
||||
size_t stride[N];
|
||||
};
|
||||
|
||||
template <typename T, int N> void *setupArg(at::Tensor &t) {
|
||||
llvm_tensor_t<T, N> *arg = new llvm_tensor_t<T, N>;
|
||||
llvm_tensor_t<T, N> **arg_storage = new llvm_tensor_t<T, N> *;
|
||||
*arg_storage = arg;
|
||||
arg->d = arg->aligned = (T *)t.data_ptr();
|
||||
arg->offset = 0;
|
||||
assert(t.dim() == N);
|
||||
for (int j = 0; j < N; j++) {
|
||||
arg->shape[j] = t.sizes()[j];
|
||||
arg->stride[j] = t.stride(j);
|
||||
}
|
||||
return (void *)arg_storage;
|
||||
}
|
||||
|
||||
at::Tensor LowerAndRun(mlir::ModuleOp module,
|
||||
std::vector<at::Tensor> &arguments, const ir::Value &v,
|
||||
mlir::MLIRContext &context) {
|
||||
|
||||
LowerATenDialect(module);
|
||||
LowerStdDialect(module);
|
||||
|
||||
llvm::InitializeNativeTarget();
|
||||
llvm::InitializeNativeTargetAsmPrinter();
|
||||
|
||||
Optional<llvm::CodeGenOpt::Level> jitCodeGenOptLevel =
|
||||
llvm::CodeGenOpt::Level::Aggressive;
|
||||
std::string libpath;
|
||||
if (const char *path = std::getenv("TEST_BUILD_PATH")) {
|
||||
libpath = path;
|
||||
}
|
||||
|
||||
std::vector<std::string> sharedLibs{libpath +
|
||||
"/frontends/pytorch/lib/libaten_ops.so"};
|
||||
llvm::errs() << "Loading " << sharedLibs[0] << "\n";
|
||||
|
||||
llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
|
||||
|
||||
llvm::SmallVector<llvm::StringRef, 1> libs(sharedLibs.begin(),
|
||||
sharedLibs.end());
|
||||
auto expectedEngine = mlir::ExecutionEngine::create(
|
||||
module, {}, jitCodeGenOptLevel, libs, false, false, false);
|
||||
assert(expectedEngine && "no engine, cannot fly");
|
||||
|
||||
llvm::StringRef entryPoint("_mlir_ciface_graph");
|
||||
auto engine = std::move(*expectedEngine);
|
||||
auto expectedFPtr = engine->lookup(entryPoint);
|
||||
assert(expectedFPtr && "entryPoint missing");
|
||||
|
||||
void (*fptr)(void **) = *expectedFPtr;
|
||||
|
||||
// this array holds pointers to the function arguments
|
||||
void **args = (void **)malloc((arguments.size() + 1) * sizeof(void *));
|
||||
|
||||
// allocate and setup the function arguments
|
||||
for (int i = 0, e = arguments.size(); i < e; i++) {
|
||||
at::Tensor &t = arguments[i];
|
||||
auto dtype = t.dtype();
|
||||
int dim = t.dim();
|
||||
if (dim == 4) {
|
||||
if (dtype == at::kFloat)
|
||||
args[i] = setupArg<float, 4>(t);
|
||||
else if (dtype == at::kLong)
|
||||
args[i] = setupArg<uint64_t, 4>(t);
|
||||
else
|
||||
assert(0);
|
||||
} else if (dim == 3) {
|
||||
if (dtype == at::kFloat)
|
||||
args[i] = setupArg<float, 3>(t);
|
||||
else if (dtype == at::kLong)
|
||||
args[i] = setupArg<uint64_t, 3>(t);
|
||||
else
|
||||
assert(0);
|
||||
} else if (dim == 2) {
|
||||
if (dtype == at::kFloat)
|
||||
args[i] = setupArg<float, 2>(t);
|
||||
else if (dtype == at::kLong)
|
||||
args[i] = setupArg<uint64_t, 2>(t);
|
||||
else
|
||||
assert(0);
|
||||
} else if (dim == 1) {
|
||||
if (dtype == at::kFloat)
|
||||
args[i] = setupArg<float, 1>(t);
|
||||
else if (dtype == at::kLong)
|
||||
args[i] = setupArg<uint64_t, 1>(t);
|
||||
else
|
||||
assert(0);
|
||||
} else {
|
||||
assert(0 && "unhandled dim");
|
||||
}
|
||||
}
|
||||
|
||||
// allocate the result tensors
|
||||
// TODO: num results > 1
|
||||
at::Tensor result = util::Zeros(v.sizes(), at::kFloat);
|
||||
if (result.dim() == 4) {
|
||||
args[arguments.size()] = setupArg<float, 4>(result);
|
||||
} else if (result.dim() == 3) {
|
||||
args[arguments.size()] = setupArg<float, 3>(result);
|
||||
} else if (result.dim() == 2) {
|
||||
args[arguments.size()] = setupArg<float, 2>(result);
|
||||
} else if (result.dim() == 1) {
|
||||
args[arguments.size()] = setupArg<float, 1>(result);
|
||||
} else {
|
||||
assert(0 && "unhandled dim");
|
||||
}
|
||||
|
||||
// call the JITed function
|
||||
fptr(args);
|
||||
|
||||
// free pointers to the results
|
||||
// TODO: num results > 1
|
||||
if (result.dim() == 4) {
|
||||
auto arg_storage =
|
||||
static_cast<llvm_tensor_t<float, 4> **>(args[arguments.size()]);
|
||||
auto arg = *arg_storage;
|
||||
delete arg;
|
||||
delete arg_storage;
|
||||
} else if (result.dim() == 3) {
|
||||
auto arg_storage =
|
||||
static_cast<llvm_tensor_t<float, 3> **>(args[arguments.size()]);
|
||||
auto arg = *arg_storage;
|
||||
delete arg;
|
||||
delete arg_storage;
|
||||
} else if (result.dim() == 2) {
|
||||
auto arg_storage =
|
||||
static_cast<llvm_tensor_t<float, 2> **>(args[arguments.size()]);
|
||||
auto arg = *arg_storage;
|
||||
delete arg;
|
||||
delete arg_storage;
|
||||
} else if (result.dim() == 1) {
|
||||
auto arg_storage =
|
||||
static_cast<llvm_tensor_t<float, 1> **>(args[arguments.size()]);
|
||||
auto arg = *arg_storage;
|
||||
delete arg;
|
||||
delete arg_storage;
|
||||
} else {
|
||||
assert(0 && "unhandled dim");
|
||||
}
|
||||
|
||||
// free pointers to the arguments
|
||||
for (int i = 0, e = arguments.size(); i < e; i++) {
|
||||
at::Tensor &t = arguments[i];
|
||||
int dim = t.dim();
|
||||
if (dim == 4) {
|
||||
auto arg_storage = static_cast<llvm_tensor_t<float, 4> **>(args[i]);
|
||||
auto arg = *arg_storage;
|
||||
delete arg;
|
||||
delete arg_storage;
|
||||
} else if (dim == 3) {
|
||||
auto arg_storage = static_cast<llvm_tensor_t<float, 3> **>(args[i]);
|
||||
auto arg = *arg_storage;
|
||||
delete arg;
|
||||
delete arg_storage;
|
||||
} else if (dim == 2) {
|
||||
auto arg_storage = static_cast<llvm_tensor_t<float, 2> **>(args[i]);
|
||||
auto arg = *arg_storage;
|
||||
delete arg;
|
||||
delete arg_storage;
|
||||
} else if (dim == 1) {
|
||||
auto arg_storage = static_cast<llvm_tensor_t<float, 1> **>(args[i]);
|
||||
auto arg = *arg_storage;
|
||||
delete arg;
|
||||
delete arg_storage;
|
||||
} else {
|
||||
assert(0 && "unhandled dim");
|
||||
}
|
||||
}
|
||||
|
||||
// free the array of void* ptrs
|
||||
free(args);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
at::Tensor JitAndRun(const ir::Value &v, mlir::MLIRContext &context) {
|
||||
|
||||
// generate the MLIR
|
||||
std::vector<ir::Value> vs{v};
|
||||
auto mlir_gen = MLIRGen(context).genModule(vs);
|
||||
mlir::OwningModuleRef module = std::move(std::get<0>(mlir_gen));
|
||||
std::vector<at::Tensor> arguments = std::move(std::get<1>(mlir_gen));
|
||||
|
||||
return LowerAndRun(module.get(), arguments, v, context);
|
||||
}
|
||||
|
||||
at::Tensor JitAndRun(const ir::Value &v) {
|
||||
mlir::MLIRContext context;
|
||||
return JitAndRun(v, context);
|
||||
}
|
||||
|
||||
at::Tensor Interpret(const ir::Value &v) { assert(0 && "unsupported"); }
|
||||
} // anonymous namespace
|
||||
|
||||
// FIXME: Why is this code here and not in tensor.cpp?
|
||||
std::string MLIRTensor::GetMLIR() const {
|
||||
|
||||
// generate the MLIR
|
||||
mlir::MLIRContext context;
|
||||
ir::Value ir_value = CurrentIrValue();
|
||||
if (!ir_value)
|
||||
return "<tensor>";
|
||||
|
||||
std::vector<ir::Value> vs{ir_value};
|
||||
auto mlir_gen = MLIRGen(context).genModule(vs);
|
||||
mlir::OwningModuleRef module = std::move(std::get<0>(mlir_gen));
|
||||
|
||||
std::string aten;
|
||||
llvm::raw_string_ostream ss(aten);
|
||||
module->print(ss);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
at::Tensor MLIRTensor::CompileAndRun() const {
|
||||
return JitAndRun(CurrentIrValue());
|
||||
}
|
||||
|
||||
} // namespace torch_mlir
|
|
@ -1,16 +0,0 @@
|
|||
//===- jit.h ----------------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace torch_mlir {
|
||||
// namespace jit {
|
||||
|
||||
// at::Tensor CompileAndRun(const MLIRTensor &tensor);
|
||||
// at::Tensor JitAndRun(const ir::Value &v);
|
||||
//}
|
||||
} // namespace torch_mlir
|
|
@ -1,214 +0,0 @@
|
|||
//===- mlir_gen.cpp ---------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/Function.h"
|
||||
#include "mlir/IR/Location.h"
|
||||
#include "mlir/IR/MLIRContext.h"
|
||||
#include "mlir/IR/Module.h"
|
||||
#include "mlir/IR/StandardTypes.h"
|
||||
#include "mlir/IR/Types.h"
|
||||
#include "mlir/IR/Verifier.h"
|
||||
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#include "npcomp/Dialect/ATen/ATenDialect.h"
|
||||
|
||||
#include "ATen/ArrayRef.h"
|
||||
namespace at {
|
||||
template <typename T> using ArrayRef = c10::ArrayRef<T>;
|
||||
}
|
||||
#include "ATen/Tensor.h"
|
||||
|
||||
#include "ir.h"
|
||||
#include "mlir_gen.h"
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#define DEBUG_TYPE "torch_mlir"
|
||||
|
||||
namespace torch_mlir {
|
||||
|
||||
MLIRGen::MLIRGen(mlir::MLIRContext &context) : context(context) {
|
||||
context.getOrLoadDialect<mlir::NPCOMP::aten::ATenDialect>();
|
||||
context.getOrLoadDialect<mlir::StandardOpsDialect>();
|
||||
}
|
||||
|
||||
std::tuple<mlir::OwningModuleRef, std::vector<at::Tensor>>
|
||||
MLIRGen::genModule(std::vector<ir::Value> &v) {
|
||||
// the module
|
||||
module = mlir::ModuleOp::create(mlir::UnknownLoc::get(&context));
|
||||
|
||||
auto fn = genFunction(v);
|
||||
if (fn) {
|
||||
module->push_back(fn);
|
||||
if (failed(mlir::verify(*module))) {
|
||||
emitError(mlir::UnknownLoc::get(&context), "module verification error");
|
||||
}
|
||||
}
|
||||
return std::make_tuple(std::move(module), arguments);
|
||||
}
|
||||
|
||||
mlir::Value MLIRGen::genValue(const ir::Value &v) {
|
||||
|
||||
if (symbolTable.count(v))
|
||||
return symbolTable[v];
|
||||
|
||||
LLVM_DEBUG(llvm::dbgs() << "genValue node: " << v.node->op() << "\n");
|
||||
|
||||
ir::NodePtr node = v.node;
|
||||
auto loc = mlir::UnknownLoc::get(&context);
|
||||
|
||||
for (auto &operand : node->operands())
|
||||
genValue(operand);
|
||||
|
||||
mlir::Value mlirValue = nullptr;
|
||||
if (opTable.count(v.node)) {
|
||||
mlirValue = opTable[v.node]->getResult(v.index);
|
||||
} else {
|
||||
mlir::Operation *mlirOp = node->genMLIR(builder, context, symbolTable);
|
||||
opTable.insert({v.node, mlirOp});
|
||||
assert(mlirOp && "failed to generate mlir op");
|
||||
mlirValue = mlirOp->getResult(v.index);
|
||||
}
|
||||
|
||||
declareSymbol(v, mlirValue);
|
||||
|
||||
return mlirValue;
|
||||
}
|
||||
|
||||
// generate function parameters for the IR rooted at v
|
||||
void MLIRGen::genParameters(const ir::Value &v, std::set<ir::Value> &visited) {
|
||||
ir::NodePtr node = v.node;
|
||||
if (visited.count(v))
|
||||
return;
|
||||
visited.insert(v);
|
||||
for (const ir::Value &operand : node->operands()) {
|
||||
// if the operand is a leaf
|
||||
if (operand.node->op() == ir::OpKind::Get("aten::torch_data")) {
|
||||
parameters.push_back(operand);
|
||||
} else {
|
||||
genParameters(operand, visited);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mlir::FuncOp MLIRGen::genFunction(std::vector<ir::Value> &vs) {
|
||||
|
||||
auto loc = mlir::UnknownLoc::get(&context);
|
||||
|
||||
auto gen_tensor_ty = [&](const ir::Value &v) {
|
||||
auto shape = v.sizes();
|
||||
auto tdn = dynamic_cast<ir::TorchDataNode *>(v.node.get());
|
||||
mlir::Type elemTy;
|
||||
if (tdn) {
|
||||
auto dtype = tdn->tensor().dtype();
|
||||
if (dtype == at::kFloat)
|
||||
elemTy = mlir::FloatType::getF32(&context);
|
||||
else if (dtype == at::kDouble)
|
||||
elemTy = mlir::FloatType::getF64(&context);
|
||||
else if (dtype == at::kLong)
|
||||
elemTy = mlir::IntegerType::get(64, &context);
|
||||
else if (dtype == at::kInt)
|
||||
elemTy = mlir::IntegerType::get(32, &context);
|
||||
else if (dtype == at::kShort)
|
||||
elemTy = mlir::IntegerType::get(16, &context);
|
||||
else if (dtype == at::kChar || dtype == at::kByte)
|
||||
elemTy = mlir::IntegerType::get(8, &context);
|
||||
else {
|
||||
std::cout << tdn->tensor().dtype() << "\n";
|
||||
assert(0 && "bad type");
|
||||
}
|
||||
} else {
|
||||
elemTy = mlir::FloatType::getF32(&context);
|
||||
}
|
||||
return mlir::RankedTensorType::get(shape, elemTy);
|
||||
};
|
||||
|
||||
std::set<ir::Value> visited;
|
||||
for (auto &v : vs)
|
||||
genParameters(v, visited);
|
||||
|
||||
std::map<ir::Value, ir::Value> parameter_map;
|
||||
std::vector<ir::Value> unique_parameters;
|
||||
|
||||
for (const ir::Value &p : parameters) {
|
||||
bool found = false;
|
||||
for (const ir::Value &q : unique_parameters) {
|
||||
if (p.node->op() == ir::OpKind::Get("aten::torch_data") &&
|
||||
q.node->op() == ir::OpKind::Get("aten::torch_data")) {
|
||||
auto &ptd = *dynamic_cast<ir::TorchDataNode *>(p.node.get());
|
||||
auto &qtd = *dynamic_cast<ir::TorchDataNode *>(q.node.get());
|
||||
if (ptd.tensor().is_same(qtd.tensor())) {
|
||||
found = true;
|
||||
parameter_map.insert({p, q});
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
unique_parameters.push_back(p);
|
||||
}
|
||||
}
|
||||
|
||||
// collect the argument types and tensors
|
||||
std::vector<mlir::Type> arg_types;
|
||||
for (const ir::Value &p : unique_parameters) {
|
||||
// tensor type for the function signature
|
||||
arg_types.push_back(gen_tensor_ty(p));
|
||||
|
||||
// tensor itself for actually calling the graph
|
||||
auto tdn = dynamic_cast<ir::TorchDataNode *>(p.node.get());
|
||||
arguments.push_back(tdn->tensor());
|
||||
}
|
||||
|
||||
// construct return type
|
||||
std::vector<mlir::Type> ret_types;
|
||||
for (auto &v : vs)
|
||||
ret_types.push_back(gen_tensor_ty(v));
|
||||
|
||||
// create the function type and the function itself
|
||||
auto func_type = mlir::FunctionType::get(arg_types, ret_types, &context);
|
||||
auto function =
|
||||
mlir::FuncOp::create(loc, "graph", func_type, /* attrs = */ {});
|
||||
|
||||
// entry
|
||||
auto &entryBlock = *function.addEntryBlock();
|
||||
|
||||
// Declare all the function arguments in the symbol table.
|
||||
for (const auto &i :
|
||||
llvm::zip(unique_parameters, entryBlock.getArguments())) {
|
||||
declareSymbol(std::get<0>(i), std::get<1>(i));
|
||||
}
|
||||
// Declare all the duplicates from the original
|
||||
// parameter list in the symbol table
|
||||
for (auto &k_v : parameter_map) {
|
||||
assert(symbolTable.count(k_v.second));
|
||||
declareSymbol(k_v.first, symbolTable[k_v.second]);
|
||||
}
|
||||
|
||||
builder = std::make_unique<mlir::OpBuilder>(function.getBody());
|
||||
|
||||
std::vector<mlir::Value> rets;
|
||||
for (auto &v : vs)
|
||||
rets.push_back(genValue(v));
|
||||
|
||||
builder->create<mlir::ReturnOp>(loc, rets);
|
||||
return function;
|
||||
}
|
||||
|
||||
bool MLIRGen::declareSymbol(const ir::Value &irValue, mlir::Value mlirValue) {
|
||||
if (symbolTable.count(irValue)) {
|
||||
return false;
|
||||
}
|
||||
symbolTable.insert({irValue, mlirValue});
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace torch_mlir
|
|
@ -1,45 +0,0 @@
|
|||
//===- mlir_gen.h -----------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mlir/IR/MLIRContext.h"
|
||||
|
||||
#include "ir.h"
|
||||
|
||||
namespace torch_mlir {
|
||||
|
||||
/// This class generates MLIR from a pytorch graph
|
||||
class MLIRGen {
|
||||
|
||||
public:
|
||||
MLIRGen(mlir::MLIRContext &context);
|
||||
|
||||
// Generate an MLIR model that computes the given outputs.
|
||||
std::tuple<mlir::OwningModuleRef, std::vector<at::Tensor>>
|
||||
genModule(std::vector<ir::Value> &v);
|
||||
|
||||
private:
|
||||
mlir::Value genValue(const ir::Value &v);
|
||||
|
||||
void genParameters(const ir::Value &v, std::set<ir::Value> &visited);
|
||||
|
||||
mlir::FuncOp genFunction(std::vector<ir::Value> &v);
|
||||
|
||||
bool declareSymbol(const ir::Value &irValue, mlir::Value mlirValue);
|
||||
|
||||
private:
|
||||
mlir::MLIRContext &context;
|
||||
mlir::OwningModuleRef module;
|
||||
std::unique_ptr<mlir::OpBuilder> builder;
|
||||
std::map<const ir::Value, mlir::Value> symbolTable;
|
||||
std::map<const ir::NodePtr, mlir::Operation *> opTable;
|
||||
std::vector<ir::Value> parameters;
|
||||
std::vector<at::Tensor> arguments;
|
||||
};
|
||||
|
||||
} // namespace torch_mlir
|
|
@ -1,137 +0,0 @@
|
|||
//===- init_python_bindings.cpp ---------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// This file implements Python bindings to the MLIR/NPCOMP ATen dialect.
|
||||
// Roughly speaking, it enables something like this:
|
||||
//
|
||||
// dev = torch_mlir.mlir_device()
|
||||
// t0 = torch.randn((4,4), device=dev)
|
||||
// t1 = torch.randn((4,4), device=dev)
|
||||
// t2 = t0 + t1
|
||||
// t2_mlir = torch_mlir.get_mlir( t2 )
|
||||
// t2_cpu = t2.to('cpu')
|
||||
//
|
||||
// In this case t2_cpu contains the result of the computation, and t2_mlir
|
||||
// contains the mlir description of the computation.
|
||||
|
||||
#include "../pybind.h"
|
||||
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
|
||||
#include "mlir/IR/MLIRContext.h"
|
||||
#include "mlir/IR/Module.h"
|
||||
#include "mlir/IR/Verifier.h"
|
||||
#include "mlir/Parser.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "mlir/Pass/PassManager.h"
|
||||
#include "mlir/Transforms/Passes.h"
|
||||
|
||||
#include "npcomp/Dialect/ATen/ATenDialect.h"
|
||||
#include "npcomp/Dialect/ATen/ATenOpReport.h"
|
||||
#include "npcomp/Dialect/ATen/ATenPasses.h"
|
||||
#include "npcomp/Dialect/ATen/LivenessReport.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
// Then ATen headers with workarounds
|
||||
#include "ATen/ArrayRef.h"
|
||||
namespace at {
|
||||
template <typename T> using ArrayRef = c10::ArrayRef<T>;
|
||||
}
|
||||
#include "ATen/SmallVector.h"
|
||||
namespace at {
|
||||
template <typename T, int S> using SmallVector = c10::SmallVector<T, S>;
|
||||
}
|
||||
#include <ATen/Tensor.h>
|
||||
|
||||
// other headers
|
||||
|
||||
#include "aten_mlir_bridge.h"
|
||||
#include "aten_mlir_type.h"
|
||||
#include "init_python_bindings.h"
|
||||
#include "mlir_gen.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
namespace llvm {
|
||||
extern bool DebugFlag;
|
||||
}
|
||||
|
||||
namespace torch_mlir {
|
||||
namespace {
|
||||
|
||||
mlir::OwningModuleRef LoadModule(mlir::MLIRContext &context, std::string mlir) {
|
||||
|
||||
mlir::OwningModuleRef module;
|
||||
|
||||
std::unique_ptr<llvm::MemoryBuffer> membuf =
|
||||
llvm::MemoryBuffer::getMemBuffer(mlir);
|
||||
|
||||
llvm::SourceMgr sourceMgr;
|
||||
sourceMgr.AddNewSourceBuffer(std::move(membuf), llvm::SMLoc());
|
||||
module = mlir::parseSourceFile(sourceMgr, &context);
|
||||
|
||||
if (!module) {
|
||||
llvm::errs() << "Error can't parse mlir module\n";
|
||||
return nullptr;
|
||||
}
|
||||
if (failed(mlir::verify(*module))) {
|
||||
llvm::errs() << "Error verifying MLIR module\n";
|
||||
return nullptr;
|
||||
}
|
||||
if (!module)
|
||||
return nullptr;
|
||||
return module;
|
||||
}
|
||||
|
||||
void InitModuleBindings(py::module &m) {
|
||||
m.def("_initialize_aten_bindings",
|
||||
[]() { ATenMLIRType::InitializeAtenBindings(); });
|
||||
m.def("_set_default_device", []() {});
|
||||
|
||||
m.def("_get_mlir", [](std::vector<at::Tensor> &ts) -> std::string {
|
||||
if (ts.size() == 0)
|
||||
return std::string();
|
||||
|
||||
mlir::MLIRContext context;
|
||||
|
||||
// gather IR for all the tensors
|
||||
std::vector<ir::Value> recorded_ir;
|
||||
for (auto &t : ts)
|
||||
if (c10::optional<MLIRTensor> at = bridge::TryGetMLIRTensor(t))
|
||||
recorded_ir.push_back(at->GetIrValue());
|
||||
|
||||
// generate MLIR from IR
|
||||
auto mlir_gen = MLIRGen(context).genModule(recorded_ir);
|
||||
mlir::OwningModuleRef module = std::move(std::get<0>(mlir_gen));
|
||||
|
||||
mlir::PassManager pm(module->getContext());
|
||||
|
||||
pm.addPass(mlir::createCSEPass());
|
||||
pm.addPass(mlir::NPCOMP::aten::createATenLayerNamePass());
|
||||
if (failed(pm.run(*module))) {
|
||||
llvm::errs() << "ATenLayerNamePass failed";
|
||||
return "<error>";
|
||||
}
|
||||
|
||||
// dump MLIR to string and return
|
||||
std::string s;
|
||||
llvm::raw_string_ostream ss(s);
|
||||
module->print(ss);
|
||||
return ss.str();
|
||||
});
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void InitTypeDispatchBindings(py::module &m) { InitModuleBindings(m); }
|
||||
|
||||
} // namespace torch_mlir
|
|
@ -1,613 +0,0 @@
|
|||
//===- tensor.cpp -----------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#include "ATen/ArrayRef.h"
|
||||
namespace at {
|
||||
template <typename T> using ArrayRef = c10::ArrayRef<T>;
|
||||
}
|
||||
#include "ATen/Tensor.h"
|
||||
|
||||
#include "jit.h"
|
||||
#include "tensor.h"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#define DEBUG_TYPE "torch_mlir"
|
||||
|
||||
namespace torch_mlir {
|
||||
|
||||
MLIRTensor MLIRTensor::Create(const at::Tensor &tensor, const Device &device) {
|
||||
assert(tensor.device().type() == at::kCPU);
|
||||
MLIRTensor device_tensor(tensor, device);
|
||||
return device_tensor;
|
||||
}
|
||||
|
||||
MLIRTensor
|
||||
MLIRTensor::Create(ir::Value ir_value, const Device &device,
|
||||
c10::optional<at::ScalarType> logical_element_type) {
|
||||
MLIRTensor device_tensor(std::move(ir_value), device, logical_element_type);
|
||||
return device_tensor;
|
||||
}
|
||||
|
||||
MLIRTensor::MLIRTensor(const at::Tensor &tensor, const Device &device)
|
||||
: data_(std::make_shared<Data>(tensor, device)) {}
|
||||
|
||||
MLIRTensor::MLIRTensor(ir::Value ir_value, const Device &device,
|
||||
c10::optional<at::ScalarType> logical_element_type)
|
||||
: data_(std::make_shared<Data>(std::move(ir_value), device,
|
||||
logical_element_type)) {}
|
||||
|
||||
MLIRTensor::Data *MLIRTensor::data() const {
|
||||
assert(data_ != nullptr && "Trying to access null data");
|
||||
return data_.get();
|
||||
}
|
||||
|
||||
at::ScalarType MLIRTensor::dtype() const {
|
||||
return data()->logical_element_type ? *data()->logical_element_type
|
||||
: at::ScalarType::Float;
|
||||
}
|
||||
|
||||
const Device &MLIRTensor::GetDevice() const { return data()->device; }
|
||||
|
||||
uint64_t MLIRTensor::GetNextTensorId() {
|
||||
static std::atomic<uint64_t> *id_generator = new std::atomic<uint64_t>(1);
|
||||
return id_generator->fetch_add(1);
|
||||
}
|
||||
|
||||
void MLIRTensor::SetTensorData(at::Tensor tensor_data) {
|
||||
data()->tensor_data = std::move(tensor_data);
|
||||
}
|
||||
|
||||
ir::Value MLIRTensor::GetIrValue() const {
|
||||
ir::Value ir_value = CurrentIrValue();
|
||||
if (ir_value) {
|
||||
return ir_value;
|
||||
}
|
||||
c10::optional<at::Tensor> tensor_data = CurrentTensorData();
|
||||
if (tensor_data) {
|
||||
at::Tensor tensor = *tensor_data;
|
||||
if (!tensor.dim()) {
|
||||
auto dtype = tensor.dtype();
|
||||
if (dtype == at::kFloat) {
|
||||
auto d = tensor.data_ptr<float>();
|
||||
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
|
||||
} else if (dtype == at::kDouble) {
|
||||
auto d = tensor.data_ptr<double>();
|
||||
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
|
||||
} else if (dtype == at::kLong) {
|
||||
auto d = tensor.data_ptr<int64_t>();
|
||||
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
|
||||
} else if (dtype == at::kInt) {
|
||||
auto d = tensor.data_ptr<int32_t>();
|
||||
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
|
||||
} else if (dtype == at::kShort) {
|
||||
auto d = tensor.data_ptr<int16_t>();
|
||||
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
|
||||
} else if (dtype == at::kChar || dtype == at::kByte) {
|
||||
auto d = tensor.data_ptr<int8_t>();
|
||||
return ir::Value(std::make_shared<ir::ConstantNode>(d[0]));
|
||||
}
|
||||
// fall through to TorchDataNode below
|
||||
}
|
||||
return ir::Value(std::make_shared<ir::TorchDataNode>(*tensor_data));
|
||||
}
|
||||
assert(0 && "Could not create ir value from leaf tensor");
|
||||
return ir::Value();
|
||||
}
|
||||
|
||||
ir::Value MLIRTensor::CurrentIrValue() const { return data()->ir_value; }
|
||||
|
||||
void MLIRTensor::SetIrValue(ir::Value ir_value) {
|
||||
data()->generation += 1;
|
||||
data()->ir_value = std::move(ir_value);
|
||||
}
|
||||
|
||||
c10::optional<at::Tensor> MLIRTensor::CurrentTensorData() const {
|
||||
return data()->tensor_data;
|
||||
}
|
||||
|
||||
void MLIRTensor::SetTensor(at::Tensor tensor) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
SetTensorData(tensor);
|
||||
data()->generation += 1;
|
||||
}
|
||||
|
||||
at::Tensor MLIRTensor::ToTensor() const {
|
||||
c10::optional<at::Tensor> tensor_data = CurrentTensorData();
|
||||
if (!tensor_data)
|
||||
tensor_data = CompileAndRun();
|
||||
assert(tensor_data);
|
||||
return *tensor_data;
|
||||
}
|
||||
|
||||
void MLIRTensor::ShallowCopyTo(MLIRTensor *dest) const {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
|
||||
auto data = CurrentTensorData();
|
||||
if (data)
|
||||
dest->SetTensor(*data);
|
||||
else
|
||||
dest->SetIrValue(CurrentIrValue());
|
||||
|
||||
dest->SetScalarType(dtype());
|
||||
assert(GetDevice() == dest->GetDevice());
|
||||
}
|
||||
|
||||
void MLIRTensor::SetScalarType(
|
||||
c10::optional<at::ScalarType> logical_element_type) {
|
||||
data()->logical_element_type = logical_element_type;
|
||||
}
|
||||
|
||||
std::vector<int64_t> MLIRTensor::sizes() const {
|
||||
if (data()->ir_value) {
|
||||
return data()->ir_value.sizes();
|
||||
}
|
||||
assert(data()->tensor_data && "tensor has no shape information");
|
||||
if (data()->tensor_data) {
|
||||
auto s = data()->tensor_data->sizes();
|
||||
return {s.begin(), s.end()};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<int64_t> MLIRTensor::strides() const {
|
||||
if (data()->ir_value) {
|
||||
return data()->ir_value.strides();
|
||||
}
|
||||
assert(data()->tensor_data && "tensor has no shape information");
|
||||
if (data()->tensor_data) {
|
||||
auto s = data()->tensor_data->strides();
|
||||
return {s.begin(), s.end()};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::CreateFrom(ir::Value ir_value) const {
|
||||
return Create(std::move(ir_value), GetDevice(), dtype());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// aten tensor methods
|
||||
////////////////////////////////////////////
|
||||
|
||||
MLIRTensor MLIRTensor::_adaptive_avg_pool2d(const MLIRTensor &self,
|
||||
at::IntArrayRef output_size) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::AdaptiveAvgPool2dNode>(
|
||||
self.GetIrValue(), output_size);
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor
|
||||
MLIRTensor::_adaptive_avg_pool2d_backward(const MLIRTensor &grad_output,
|
||||
const MLIRTensor &self) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::AdaptiveAvgPool2dBackwardNode>(
|
||||
grad_output.GetIrValue(), self.GetIrValue());
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::add(const MLIRTensor &self, const MLIRTensor &other,
|
||||
at::Scalar alpha) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::AddNode>(
|
||||
self.GetIrValue(), other.GetIrValue(),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(alpha)));
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::add_(MLIRTensor &self, const MLIRTensor &other,
|
||||
at::Scalar alpha) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::AddInPlaceNode>(
|
||||
self.GetIrValue(), other.GetIrValue(),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(alpha)));
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::addmm(const MLIRTensor &input, const MLIRTensor &mat1,
|
||||
const MLIRTensor &mat2, at::Scalar beta,
|
||||
at::Scalar alpha) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::AddmmNode>(
|
||||
input.GetIrValue(), mat1.GetIrValue(), mat2.GetIrValue(),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(beta)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(alpha)));
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::as_strided(const MLIRTensor &input, at::IntArrayRef size,
|
||||
at::IntArrayRef stride,
|
||||
c10::optional<int64_t> storage_offset) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::AsStridedNode>(
|
||||
input.GetIrValue(), size, stride, storage_offset);
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::clone(const MLIRTensor &input) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
return MLIRTensor::Create(std::move(input.ToTensor()), input.GetDevice());
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::convolution(
|
||||
const MLIRTensor &input, const MLIRTensor &weight, const MLIRTensor &bias,
|
||||
at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation,
|
||||
bool transposed, at::IntArrayRef output_padding, int64_t groups) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::Conv2dNode>(
|
||||
input.GetIrValue(), weight.GetIrValue(), bias.GetIrValue(), stride,
|
||||
padding, dilation, transposed, output_padding, groups);
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
std::tuple<MLIRTensor, MLIRTensor, MLIRTensor> MLIRTensor::convolution_backward(
|
||||
const MLIRTensor &grad_output, const MLIRTensor &input,
|
||||
const MLIRTensor &weight, at::IntArrayRef stride, at::IntArrayRef padding,
|
||||
at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding,
|
||||
int64_t groups, std::array<bool, 3> output_mask) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::Conv2dBackwardNode>(
|
||||
grad_output.GetIrValue(), input.GetIrValue(), weight.GetIrValue(), stride,
|
||||
padding, dilation, transposed, output_padding, groups /*, output_mask*/);
|
||||
auto result0 = input.CreateFrom(ir::Value(node, 0));
|
||||
auto result1 = input.CreateFrom(ir::Value(node, 1));
|
||||
auto result2 = input.CreateFrom(ir::Value(node, 2));
|
||||
return std::make_tuple(result0, result1, result2);
|
||||
}
|
||||
|
||||
void MLIRTensor::copy_(MLIRTensor &self, MLIRTensor &src) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
src.ShallowCopyTo(&self);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::div(const MLIRTensor &self, at::Scalar other) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::DivNode>(
|
||||
self.GetIrValue(), ir::Value(std::make_shared<ir::ConstantNode>(other)));
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::div(const MLIRTensor &self, const MLIRTensor &other) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::DivNode>(self.GetIrValue(), other.GetIrValue());
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::div_(MLIRTensor &self, const MLIRTensor &other) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::DivInPlaceNode>(
|
||||
self.GetIrValue(), other.GetIrValue());
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::expand(const MLIRTensor &self, at::IntArrayRef size,
|
||||
bool implicit) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::ExpandNode>(self.GetIrValue(), size, implicit);
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::gather(const MLIRTensor &self, int64_t dim,
|
||||
const MLIRTensor &index, bool sparse_grad) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::GatherNode>(
|
||||
self.GetIrValue(), dim, index.GetIrValue(), sparse_grad);
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::hardtanh(const MLIRTensor &self, at::Scalar min_val,
|
||||
at::Scalar max_val) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::HardtanhNode>(
|
||||
self.GetIrValue(), ir::Value(std::make_shared<ir::ConstantNode>(min_val)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(max_val)));
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::hardtanh_(MLIRTensor &self, at::Scalar min_val,
|
||||
at::Scalar max_val) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::HardtanhInPlaceNode>(
|
||||
self.GetIrValue(), ir::Value(std::make_shared<ir::ConstantNode>(min_val)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(max_val)));
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::hardtanh_backward(const MLIRTensor &grad_output,
|
||||
const MLIRTensor &self,
|
||||
at::Scalar min_val,
|
||||
at::Scalar max_val) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::HardtanhBackwardNode>(
|
||||
grad_output.GetIrValue(), self.GetIrValue(),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(min_val)),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(max_val)));
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::_log_softmax(const MLIRTensor &input, int64_t dim,
|
||||
bool half_to_float) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::LogSoftmaxNode>(
|
||||
input.GetIrValue(), dim, half_to_float);
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::_log_softmax_backward_data(const MLIRTensor &grad_output,
|
||||
const MLIRTensor &output,
|
||||
int64_t dim,
|
||||
const MLIRTensor &input) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::LogSoftmaxBackwardNode>(
|
||||
grad_output.GetIrValue(), output.GetIrValue(), dim, input.GetIrValue());
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
std::tuple<MLIRTensor, MLIRTensor> MLIRTensor::max_pool2d_with_indices(
|
||||
const MLIRTensor &input, at::IntArrayRef kernel_size,
|
||||
at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation,
|
||||
bool ceil_mode) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::MaxPool2dWithIndicesNode>(
|
||||
input.GetIrValue(), kernel_size, stride, padding, dilation,
|
||||
ceil_mode);
|
||||
auto result0 = input.CreateFrom(ir::Value(node, 0));
|
||||
auto result1 = input.CreateFrom(ir::Value(node, 1));
|
||||
return std::make_tuple(result0, result1);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::max_pool2d_with_indices_backward(
|
||||
const MLIRTensor &grad_output, const MLIRTensor &input,
|
||||
at::IntArrayRef kernel_size, at::IntArrayRef stride,
|
||||
at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode,
|
||||
const MLIRTensor &indices) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::MaxPool2dWithIndicesBackwardNode>(
|
||||
grad_output.GetIrValue(), input.GetIrValue(), kernel_size, stride,
|
||||
padding, dilation, ceil_mode, indices.GetIrValue());
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::mean(const MLIRTensor &input,
|
||||
c10::optional<at::ScalarType> dtype) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::MeanNode>(input.GetIrValue(), dtype);
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::mean(const MLIRTensor &input, at::IntArrayRef dim,
|
||||
bool keepdim, c10::optional<at::ScalarType> dtype) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::MeanNode>(input.GetIrValue(), dim, keepdim, dtype);
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::mm(const MLIRTensor &input, const MLIRTensor &mat1) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::MMNode>(input.GetIrValue(), mat1.GetIrValue());
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::mul(const MLIRTensor &self, const MLIRTensor &other) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::MulNode>(self.GetIrValue(), other.GetIrValue());
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::mul_(MLIRTensor &self, const MLIRTensor &other) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::MulInPlaceNode>(
|
||||
self.GetIrValue(), other.GetIrValue());
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
std::tuple<MLIRTensor, MLIRTensor, MLIRTensor> MLIRTensor::native_batch_norm(
|
||||
const MLIRTensor &self, const MLIRTensor &weight, const MLIRTensor &bias,
|
||||
const MLIRTensor &running_mean, const MLIRTensor &running_var,
|
||||
bool training, double momentum, double eps) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::BatchNormNode>(
|
||||
self.GetIrValue(), weight.GetIrValue(), bias.GetIrValue(),
|
||||
running_mean.GetIrValue(), running_var.GetIrValue(), training, momentum,
|
||||
eps);
|
||||
auto result0 = self.CreateFrom(ir::Value(node, 0));
|
||||
auto result1 = self.CreateFrom(ir::Value(node, 1));
|
||||
auto result2 = self.CreateFrom(ir::Value(node, 2));
|
||||
return std::make_tuple(result0, result1, result2);
|
||||
}
|
||||
|
||||
std::tuple<MLIRTensor, MLIRTensor, MLIRTensor>
|
||||
MLIRTensor::native_batch_norm_backward(
|
||||
const MLIRTensor &grad_out, const MLIRTensor &input,
|
||||
const MLIRTensor &weight, const MLIRTensor &running_mean,
|
||||
const MLIRTensor &running_var, const MLIRTensor &save_mean,
|
||||
const MLIRTensor &save_invstd, bool train, double eps,
|
||||
std::array<bool, 3> output_mask) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::BatchNormBackwardNode>(
|
||||
grad_out.GetIrValue(), input.GetIrValue(), weight.GetIrValue(),
|
||||
running_mean.GetIrValue(), running_var.GetIrValue(),
|
||||
save_mean.GetIrValue(), save_invstd.GetIrValue(), train, eps,
|
||||
output_mask);
|
||||
auto result0 = input.CreateFrom(ir::Value(node, 0));
|
||||
auto result1 = input.CreateFrom(ir::Value(node, 1));
|
||||
auto result2 = input.CreateFrom(ir::Value(node, 2));
|
||||
return std::make_tuple(result0, result1, result2);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::neg(const MLIRTensor &input) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::NegNode>(input.GetIrValue());
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
std::tuple<MLIRTensor, MLIRTensor>
|
||||
MLIRTensor::nll_loss2d_forward(const MLIRTensor &self, const MLIRTensor &target,
|
||||
const MLIRTensor &weight, int64_t reduction,
|
||||
int64_t ignore_index) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::NllLoss2dForwardNode>(
|
||||
self.GetIrValue(), target.GetIrValue(), weight.GetIrValue(), reduction,
|
||||
ignore_index);
|
||||
auto result0 = self.CreateFrom(ir::Value(node, 0));
|
||||
auto result1 = self.CreateFrom(ir::Value(node, 1));
|
||||
return std::make_tuple(result0, result1);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::nll_loss2d_backward(
|
||||
const MLIRTensor &grad_output, const MLIRTensor &self,
|
||||
const MLIRTensor &target, const MLIRTensor &weight, int64_t reduction,
|
||||
int64_t ignore_index, const MLIRTensor &total_weight) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::NllLoss2dBackwardNode>(
|
||||
grad_output.GetIrValue(), self.GetIrValue(), target.GetIrValue(),
|
||||
weight.GetIrValue(), reduction, ignore_index, total_weight.GetIrValue());
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
std::tuple<MLIRTensor, MLIRTensor>
|
||||
MLIRTensor::nll_loss_forward(const MLIRTensor &self, const MLIRTensor &target,
|
||||
const MLIRTensor &weight, int64_t reduction,
|
||||
int64_t ignore_index) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::NllLossForwardNode>(
|
||||
self.GetIrValue(), target.GetIrValue(), weight.GetIrValue(), reduction,
|
||||
ignore_index);
|
||||
auto result0 = self.CreateFrom(ir::Value(node, 0));
|
||||
auto result1 = self.CreateFrom(ir::Value(node, 1));
|
||||
return std::make_tuple(result0, result1);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::nll_loss_backward(
|
||||
const MLIRTensor &grad_output, const MLIRTensor &self,
|
||||
const MLIRTensor &target, const MLIRTensor &weight, int64_t reduction,
|
||||
int64_t ignore_index, const MLIRTensor &total_weight) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::NllLossBackwardNode>(
|
||||
grad_output.GetIrValue(), self.GetIrValue(), target.GetIrValue(),
|
||||
weight.GetIrValue(), reduction, ignore_index, total_weight.GetIrValue());
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::sum(const MLIRTensor &input, at::IntArrayRef dim,
|
||||
bool keepdim, c10::optional<at::ScalarType> dtype) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::SumNode>(input.GetIrValue(), dim, keepdim, dtype);
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::relu(const MLIRTensor &input) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::ReLUNode>(input.GetIrValue());
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::relu_(MLIRTensor &input) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::ReLUInPlaceNode>(input.GetIrValue());
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::size(const MLIRTensor &input, int64_t dim) {
|
||||
assert(0);
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::SizeNode>(input.GetIrValue(), dim);
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::squeeze(const MLIRTensor &input, int64_t dim) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::SqueezeNode>(input.GetIrValue(), dim);
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::sub(const MLIRTensor &self, const MLIRTensor &other,
|
||||
at::Scalar alpha) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::SubNode>(
|
||||
self.GetIrValue(), other.GetIrValue(),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(alpha)));
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::sub_(MLIRTensor &self, const MLIRTensor &other,
|
||||
at::Scalar alpha) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::SubInPlaceNode>(
|
||||
self.GetIrValue(), other.GetIrValue(),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(alpha)));
|
||||
return self.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::t(const MLIRTensor &input) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::TransposeNode>(input.GetIrValue());
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::threshold_backward(const MLIRTensor &grad_output,
|
||||
const MLIRTensor &input,
|
||||
at::Scalar threshold) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node = std::make_shared<ir::ThresholdBackwardNode>(
|
||||
grad_output.GetIrValue(), input.GetIrValue(),
|
||||
ir::Value(std::make_shared<ir::ConstantNode>(threshold)));
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::to(MLIRTensor &input, c10::optional<Device> device,
|
||||
c10::optional<at::ScalarType> scalar_type) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
if (!device) {
|
||||
device = input.GetDevice();
|
||||
}
|
||||
if (!scalar_type) {
|
||||
scalar_type = input.dtype();
|
||||
}
|
||||
|
||||
MLIRTensor new_tensor = Create(input.ToTensor(), *device);
|
||||
|
||||
if (input.dtype() != *scalar_type) {
|
||||
new_tensor.SetScalarType(*scalar_type);
|
||||
}
|
||||
return new_tensor;
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::unsqueeze(const MLIRTensor &input, int64_t dim) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::UnsqueezeNode>(input.GetIrValue(), dim);
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
MLIRTensor MLIRTensor::view(const MLIRTensor &input, at::IntArrayRef size) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "MLIRTensor::" << __func__ << "\n");
|
||||
std::shared_ptr<ir::Node> node =
|
||||
std::make_shared<ir::ViewNode>(input.GetIrValue(), size);
|
||||
return input.CreateFrom(node);
|
||||
}
|
||||
|
||||
} // namespace torch_mlir
|
|
@ -1,275 +0,0 @@
|
|||
//===- tensor.h -------------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "device.h"
|
||||
#include "ir.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include <ATen/Tensor.h>
|
||||
#include <c10/util/ArrayRef.h>
|
||||
|
||||
namespace torch_mlir {
|
||||
|
||||
class MLIRTensor {
|
||||
struct Data;
|
||||
|
||||
public:
|
||||
static MLIRTensor Create(const at::Tensor &tensor, const Device &device);
|
||||
static MLIRTensor Create(ir::Value ir_value, const Device &device,
|
||||
c10::optional<at::ScalarType> logical_element_type);
|
||||
|
||||
MLIRTensor() = default;
|
||||
|
||||
bool is_null() const { return data_ptr() == nullptr; }
|
||||
|
||||
void ShallowCopyTo(MLIRTensor *dest) const;
|
||||
|
||||
void SetTensor(at::Tensor tensor);
|
||||
void SetIrValue(ir::Value ir_value);
|
||||
|
||||
at::ScalarType dtype() const;
|
||||
|
||||
// Set logical_element_type which is visible to upstream PyTorch.
|
||||
void SetScalarType(c10::optional<at::ScalarType> logical_element_type);
|
||||
|
||||
std::vector<int64_t> sizes() const;
|
||||
std::vector<int64_t> strides() const;
|
||||
|
||||
at::Tensor ToTensor() const;
|
||||
|
||||
const Device &GetDevice() const;
|
||||
|
||||
size_t generation() const { return data()->generation; }
|
||||
|
||||
std::string GetMLIR() const;
|
||||
|
||||
// Retrieves the IR Node representing this MLIRTensor. One will be created if
|
||||
// missing. Note that although this is a const API, it actually changes the
|
||||
// internal state of the object.
|
||||
ir::Value GetIrValue() const;
|
||||
|
||||
at::Tensor CompileAndRun() const;
|
||||
|
||||
uint64_t id() const { return data()->unique_id; }
|
||||
|
||||
private:
|
||||
struct Data {
|
||||
Data(at::Tensor tensor_data, const Device &device)
|
||||
: logical_element_type(tensor_data.scalar_type()),
|
||||
tensor_data(std::move(tensor_data)), device(device),
|
||||
unique_id(GetNextTensorId()) {}
|
||||
|
||||
Data(ir::Value ir_value, const Device &device,
|
||||
c10::optional<at::ScalarType> logical_element_type)
|
||||
: logical_element_type(logical_element_type),
|
||||
ir_value(std::move(ir_value)), device(device),
|
||||
unique_id(GetNextTensorId()) {}
|
||||
|
||||
~Data(){};
|
||||
|
||||
c10::optional<at::ScalarType> logical_element_type;
|
||||
c10::optional<at::Tensor> tensor_data;
|
||||
ir::Value ir_value;
|
||||
|
||||
const Device device;
|
||||
const uint64_t unique_id = 0;
|
||||
size_t generation = 1;
|
||||
};
|
||||
|
||||
MLIRTensor(const at::Tensor &tensor, const Device &device);
|
||||
|
||||
MLIRTensor(ir::Value ir_value, const Device &device,
|
||||
c10::optional<at::ScalarType> logical_element_type = c10::nullopt);
|
||||
|
||||
void SetTensorData(at::Tensor tensor_data);
|
||||
|
||||
c10::optional<at::Tensor> CurrentTensorData() const;
|
||||
|
||||
// Retrieves the current IR Node, or nullptr in case no active IR Node is
|
||||
// available.
|
||||
ir::Value CurrentIrValue() const;
|
||||
|
||||
Data *data() const;
|
||||
|
||||
std::shared_ptr<Data> data_ptr() const { return data_; }
|
||||
|
||||
MLIRTensor CreateFrom(ir::Value ir_value) const;
|
||||
|
||||
static uint64_t GetNextTensorId();
|
||||
|
||||
std::shared_ptr<Data> data_;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// ATEN operators follows here, listed in alphabetical order.
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
public:
|
||||
static MLIRTensor _adaptive_avg_pool2d(const MLIRTensor &self,
|
||||
at::IntArrayRef output_size);
|
||||
|
||||
static MLIRTensor _adaptive_avg_pool2d_backward(const MLIRTensor &grad_output,
|
||||
const MLIRTensor &self);
|
||||
|
||||
static MLIRTensor add(const MLIRTensor &input, const MLIRTensor &other,
|
||||
at::Scalar alpha);
|
||||
|
||||
static MLIRTensor add_(MLIRTensor &input, const MLIRTensor &other,
|
||||
at::Scalar alpha);
|
||||
|
||||
static MLIRTensor addmm(const MLIRTensor &input, const MLIRTensor &mat1,
|
||||
const MLIRTensor &mat2, at::Scalar beta,
|
||||
at::Scalar alpha);
|
||||
|
||||
static MLIRTensor as_strided(const MLIRTensor &self, at::IntArrayRef size,
|
||||
at::IntArrayRef stride,
|
||||
c10::optional<int64_t> storage_offset);
|
||||
|
||||
static MLIRTensor clone(const MLIRTensor &self);
|
||||
|
||||
static MLIRTensor convolution(const MLIRTensor &input,
|
||||
const MLIRTensor &weight,
|
||||
const MLIRTensor &bias, at::IntArrayRef stride,
|
||||
at::IntArrayRef padding,
|
||||
at::IntArrayRef dilation, bool transposed,
|
||||
at::IntArrayRef output_padding, int64_t groups);
|
||||
|
||||
static std::tuple<MLIRTensor, MLIRTensor, MLIRTensor>
|
||||
convolution_backward(const MLIRTensor &grad_output, const MLIRTensor &input,
|
||||
const MLIRTensor &weight, at::IntArrayRef stride,
|
||||
at::IntArrayRef padding, at::IntArrayRef dilation,
|
||||
bool transposed, at::IntArrayRef output_padding,
|
||||
int64_t groups, std::array<bool, 3> output_mask);
|
||||
|
||||
static void copy_(MLIRTensor &input, MLIRTensor &src);
|
||||
|
||||
static MLIRTensor div(const MLIRTensor &self, at::Scalar other);
|
||||
|
||||
static MLIRTensor div(const MLIRTensor &self, const MLIRTensor &other);
|
||||
|
||||
static MLIRTensor div_(MLIRTensor &self, const MLIRTensor &other);
|
||||
|
||||
static MLIRTensor expand(const MLIRTensor &self, at::IntArrayRef size,
|
||||
bool implicit);
|
||||
|
||||
static MLIRTensor gather(const MLIRTensor &self, int64_t dim,
|
||||
const MLIRTensor &index, bool sparse_grad);
|
||||
|
||||
static MLIRTensor hardtanh(const MLIRTensor &self, at::Scalar min_val,
|
||||
at::Scalar max_val);
|
||||
|
||||
static MLIRTensor hardtanh_(MLIRTensor &self, at::Scalar min_val,
|
||||
at::Scalar max_val);
|
||||
|
||||
static MLIRTensor hardtanh_backward(const MLIRTensor &grad_output,
|
||||
const MLIRTensor &self,
|
||||
at::Scalar min_val, at::Scalar max_val);
|
||||
|
||||
static MLIRTensor _log_softmax(const MLIRTensor &input, int64_t dim,
|
||||
bool half_to_float);
|
||||
|
||||
static MLIRTensor _log_softmax_backward_data(const MLIRTensor &grad_output,
|
||||
const MLIRTensor &output,
|
||||
int64_t dim,
|
||||
const MLIRTensor &self);
|
||||
|
||||
static std::tuple<MLIRTensor, MLIRTensor>
|
||||
max_pool2d_with_indices(const MLIRTensor &input, at::IntArrayRef kernel_size,
|
||||
at::IntArrayRef stride, at::IntArrayRef padding,
|
||||
at::IntArrayRef dilation, bool ceil_mode);
|
||||
|
||||
static MLIRTensor max_pool2d_with_indices_backward(
|
||||
const MLIRTensor &grad_output, const MLIRTensor &self,
|
||||
at::IntArrayRef kernel_size, at::IntArrayRef stride,
|
||||
at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode,
|
||||
const MLIRTensor &indices);
|
||||
|
||||
static MLIRTensor mean(const MLIRTensor &input,
|
||||
c10::optional<at::ScalarType> dtype);
|
||||
|
||||
static MLIRTensor mean(const MLIRTensor &input, at::IntArrayRef dim,
|
||||
bool keepdim, c10::optional<at::ScalarType> dtype);
|
||||
|
||||
static MLIRTensor mm(const MLIRTensor &input, const MLIRTensor &mat1);
|
||||
|
||||
static MLIRTensor mul(const MLIRTensor &self, const MLIRTensor &other);
|
||||
|
||||
static MLIRTensor mul_(MLIRTensor &self, const MLIRTensor &other);
|
||||
|
||||
static std::tuple<MLIRTensor, MLIRTensor, MLIRTensor>
|
||||
native_batch_norm(const MLIRTensor &input, const MLIRTensor &weight,
|
||||
const MLIRTensor &bias, const MLIRTensor &running_mean,
|
||||
const MLIRTensor &running_var, bool training,
|
||||
double momentum, double eps);
|
||||
|
||||
static std::tuple<MLIRTensor, MLIRTensor, MLIRTensor>
|
||||
native_batch_norm_backward(const MLIRTensor &grad_out,
|
||||
const MLIRTensor &input, const MLIRTensor &weight,
|
||||
const MLIRTensor &running_mean,
|
||||
const MLIRTensor &running_var,
|
||||
const MLIRTensor &save_mean,
|
||||
const MLIRTensor &save_invstd, bool train,
|
||||
double eps, std::array<bool, 3> output_mask);
|
||||
|
||||
static MLIRTensor neg(const MLIRTensor &input);
|
||||
|
||||
static std::tuple<MLIRTensor, MLIRTensor>
|
||||
nll_loss2d_forward(const MLIRTensor &self, const MLIRTensor &target,
|
||||
const MLIRTensor &weight, int64_t reduction,
|
||||
int64_t ignore_index);
|
||||
|
||||
static MLIRTensor nll_loss2d_backward(const MLIRTensor &grad_output,
|
||||
const MLIRTensor &self,
|
||||
const MLIRTensor &target,
|
||||
const MLIRTensor &weight,
|
||||
int64_t reduction, int64_t ignore_index,
|
||||
const MLIRTensor &total_weight);
|
||||
|
||||
static std::tuple<MLIRTensor, MLIRTensor>
|
||||
nll_loss_forward(const MLIRTensor &self, const MLIRTensor &target,
|
||||
const MLIRTensor &weight, int64_t reduction,
|
||||
int64_t ignore_index);
|
||||
|
||||
static MLIRTensor nll_loss_backward(const MLIRTensor &grad_output,
|
||||
const MLIRTensor &self,
|
||||
const MLIRTensor &target,
|
||||
const MLIRTensor &weight,
|
||||
int64_t reduction, int64_t ignore_index,
|
||||
const MLIRTensor &total_weight);
|
||||
|
||||
static MLIRTensor size(const MLIRTensor &self, int64_t dim);
|
||||
|
||||
static MLIRTensor squeeze(const MLIRTensor &self, int64_t dim);
|
||||
|
||||
static MLIRTensor sub(const MLIRTensor &input, const MLIRTensor &other,
|
||||
at::Scalar alpha);
|
||||
|
||||
static MLIRTensor sub_(MLIRTensor &input, const MLIRTensor &other,
|
||||
at::Scalar alpha);
|
||||
|
||||
static MLIRTensor sum(const MLIRTensor &self, at::IntArrayRef dim,
|
||||
bool keepdim, c10::optional<at::ScalarType> dtype);
|
||||
|
||||
static MLIRTensor relu(const MLIRTensor &input);
|
||||
|
||||
static MLIRTensor relu_(MLIRTensor &input);
|
||||
|
||||
static MLIRTensor t(const MLIRTensor &input);
|
||||
|
||||
static MLIRTensor threshold_backward(const MLIRTensor &grad_output,
|
||||
const MLIRTensor &self,
|
||||
at::Scalar threshold);
|
||||
|
||||
static MLIRTensor to(MLIRTensor &input, c10::optional<Device> device,
|
||||
c10::optional<at::ScalarType> scalar_type);
|
||||
|
||||
static MLIRTensor unsqueeze(const MLIRTensor &self, int64_t dim);
|
||||
|
||||
static MLIRTensor view(const MLIRTensor &input, at::IntArrayRef size);
|
||||
};
|
||||
} // namespace torch_mlir
|
|
@ -1,156 +0,0 @@
|
|||
//===- tensor_impl.cpp ------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "tensor_impl.h"
|
||||
#include "aten_mlir_bridge.h"
|
||||
|
||||
#include <c10/core/impl/DeviceGuardImplInterface.h>
|
||||
#include <c10/macros/Macros.h>
|
||||
|
||||
namespace torch_mlir {
|
||||
namespace {
|
||||
|
||||
thread_local c10::Device g_current_device(at::DeviceType::XLA, 0);
|
||||
|
||||
struct MLIRGuardImpl : public c10::impl::DeviceGuardImplInterface {
|
||||
at::DeviceType type() const override { return at::DeviceType::XLA; }
|
||||
|
||||
c10::Device exchangeDevice(c10::Device device) const override {
|
||||
std::swap(g_current_device, device);
|
||||
return device;
|
||||
}
|
||||
|
||||
c10::Device getDevice() const override { return g_current_device; }
|
||||
|
||||
void setDevice(c10::Device device) const override {
|
||||
g_current_device = device;
|
||||
}
|
||||
|
||||
void uncheckedSetDevice(c10::Device device) const noexcept override {
|
||||
g_current_device = device;
|
||||
}
|
||||
|
||||
c10::Stream getStream(c10::Device device) const noexcept override {
|
||||
return c10::Stream(c10::Stream::DEFAULT, device);
|
||||
}
|
||||
|
||||
c10::Stream exchangeStream(c10::Stream s) const noexcept override {
|
||||
return c10::Stream(c10::Stream::DEFAULT, g_current_device);
|
||||
}
|
||||
|
||||
c10::DeviceIndex deviceCount() const noexcept override { return 0; }
|
||||
};
|
||||
|
||||
C10_REGISTER_GUARD_IMPL(XLA, MLIRGuardImpl);
|
||||
|
||||
} // namespace
|
||||
|
||||
MLIRTensorImpl::MLIRTensorImpl(MLIRTensor tensor)
|
||||
: c10::TensorImpl(c10::XLATensorId(), GetTypeMeta(tensor),
|
||||
bridge::MLIRDeviceToAtenDevice(tensor.GetDevice())),
|
||||
tensor_(std::move(tensor)) {}
|
||||
|
||||
c10::intrusive_ptr<c10::TensorImpl> MLIRTensorImpl::shallow_copy_and_detach(
|
||||
const c10::VariableVersion &version_counter,
|
||||
bool allow_tensor_metadata_change) const {
|
||||
// std::cout << "MLIRTensorImpl::" << __func__ << std::endl;
|
||||
auto impl = c10::make_intrusive<MLIRTensorImpl>(tensor_);
|
||||
copy_tensor_metadata(
|
||||
/*src_impl=*/this,
|
||||
/*dest_impl=*/impl.get(),
|
||||
/*version_counter=*/version_counter,
|
||||
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
|
||||
return impl;
|
||||
}
|
||||
|
||||
void MLIRTensorImpl::shallow_copy_from(
|
||||
const c10::intrusive_ptr<TensorImpl> &impl) {
|
||||
// std::cout << "MLIRTensorImpl::" << __func__ << std::endl;
|
||||
MLIRTensorImpl *tensor_impl = dynamic_cast<MLIRTensorImpl *>(impl.get());
|
||||
copy_tensor_metadata(
|
||||
/*src_impl=*/tensor_impl,
|
||||
/*dest_impl=*/this,
|
||||
/*version_counter=*/version_counter(),
|
||||
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change());
|
||||
tensor_impl->tensor_.ShallowCopyTo(&tensor_);
|
||||
generation_ = 0;
|
||||
}
|
||||
|
||||
at::IntArrayRef MLIRTensorImpl::sizes() const {
|
||||
const_cast<MLIRTensorImpl *>(this)->SetupSizeProperties();
|
||||
return c10::TensorImpl::sizes();
|
||||
}
|
||||
|
||||
at::IntArrayRef MLIRTensorImpl::strides() const {
|
||||
const_cast<MLIRTensorImpl *>(this)->SetupSizeProperties();
|
||||
return c10::TensorImpl::strides();
|
||||
}
|
||||
|
||||
int64_t MLIRTensorImpl::dim() const {
|
||||
const_cast<MLIRTensorImpl *>(this)->SetupSizeProperties();
|
||||
return c10::TensorImpl::dim();
|
||||
}
|
||||
|
||||
int64_t MLIRTensorImpl::numel() const {
|
||||
const_cast<MLIRTensorImpl *>(this)->SetupSizeProperties();
|
||||
return c10::TensorImpl::numel();
|
||||
}
|
||||
|
||||
bool MLIRTensorImpl::is_contiguous(at::MemoryFormat memory_format) const {
|
||||
// Only check that the storage is already contiguous.
|
||||
assert(is_contiguous_ && "Non-contiguous storage for MLIR tensor");
|
||||
return true;
|
||||
}
|
||||
|
||||
int64_t MLIRTensorImpl::size(int64_t d) const {
|
||||
const_cast<MLIRTensorImpl *>(this)->SetupSizeProperties();
|
||||
return c10::TensorImpl::size(d);
|
||||
}
|
||||
|
||||
void MLIRTensorImpl::SetupSizeProperties() {
|
||||
size_t generation = tensor_.generation();
|
||||
if (generation != generation_) {
|
||||
// Fill up the basic dimension data members which the base class
|
||||
// implementation uses in its APIs.
|
||||
auto sizes = tensor_.sizes();
|
||||
auto strides = tensor_.strides();
|
||||
|
||||
strides_.clear();
|
||||
sizes_.clear();
|
||||
numel_ = 1;
|
||||
|
||||
for (auto t : llvm::zip(sizes, strides)) {
|
||||
auto size = std::get<0>(t);
|
||||
sizes_.push_back(size);
|
||||
strides_.push_back(std::get<1>(t));
|
||||
numel_ *= size;
|
||||
}
|
||||
|
||||
generation_ = generation;
|
||||
}
|
||||
}
|
||||
|
||||
caffe2::TypeMeta MLIRTensorImpl::GetTypeMeta(const MLIRTensor &tensor) {
|
||||
return c10::scalarTypeToTypeMeta(tensor.dtype());
|
||||
}
|
||||
|
||||
c10::Device MLIRTensorImpl::GetCurrentAtenDevice() { return g_current_device; }
|
||||
|
||||
c10::Device MLIRTensorImpl::SetCurrentAtenDevice(c10::Device device) {
|
||||
std::swap(g_current_device, device);
|
||||
return device;
|
||||
}
|
||||
|
||||
void MLIRTensorImpl::AtenInitialize() {}
|
||||
|
||||
const at::Storage &MLIRTensorImpl::storage() const {
|
||||
assert(0 && "MLIR tensors do not have storage");
|
||||
}
|
||||
|
||||
bool MLIRTensorImpl::has_storage() const { return false; }
|
||||
|
||||
} // namespace torch_mlir
|
|
@ -1,60 +0,0 @@
|
|||
//===- tensor_impl.h --------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "tensor.h"
|
||||
|
||||
#include <ATen/Tensor.h>
|
||||
#include <c10/core/Storage.h>
|
||||
#include <c10/core/TensorImpl.h>
|
||||
|
||||
namespace torch_mlir {
|
||||
|
||||
class MLIRTensorImpl : public c10::TensorImpl {
|
||||
public:
|
||||
explicit MLIRTensorImpl(MLIRTensor tensor);
|
||||
|
||||
MLIRTensor &tensor() { return tensor_; }
|
||||
|
||||
c10::intrusive_ptr<TensorImpl>
|
||||
shallow_copy_and_detach(const c10::VariableVersion &version_counter,
|
||||
bool allow_tensor_metadata_change) const override;
|
||||
|
||||
void shallow_copy_from(const c10::intrusive_ptr<TensorImpl> &impl) override;
|
||||
|
||||
at::IntArrayRef sizes() const override;
|
||||
|
||||
at::IntArrayRef strides() const override;
|
||||
|
||||
int64_t dim() const override;
|
||||
|
||||
int64_t numel() const override;
|
||||
|
||||
bool is_contiguous(at::MemoryFormat memory_format) const override;
|
||||
|
||||
int64_t size(int64_t d) const override;
|
||||
|
||||
static c10::Device GetCurrentAtenDevice();
|
||||
|
||||
static c10::Device SetCurrentAtenDevice(c10::Device device);
|
||||
|
||||
static void AtenInitialize();
|
||||
|
||||
const at::Storage &storage() const override;
|
||||
|
||||
bool has_storage() const override;
|
||||
|
||||
private:
|
||||
static caffe2::TypeMeta GetTypeMeta(const MLIRTensor &tensor);
|
||||
|
||||
void SetupSizeProperties();
|
||||
|
||||
MLIRTensor tensor_;
|
||||
size_t generation_ = 0;
|
||||
};
|
||||
} // namespace torch_mlir
|
|
@ -1,44 +0,0 @@
|
|||
//===- torch_util.cpp -------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "torch_util.h"
|
||||
|
||||
#include <ATen/Functions.h>
|
||||
#include <ATen/Tensor.h>
|
||||
|
||||
namespace torch_mlir {
|
||||
namespace util {
|
||||
|
||||
at::Tensor Zeros(at::IntArrayRef sizes, at::ScalarType type) {
|
||||
return at::zeros(sizes, type);
|
||||
}
|
||||
|
||||
at::Tensor CopyTensor(const at::Tensor &ref) {
|
||||
return ref.to(ref.options(), /*non_blocking=*/false, /*copy=*/true);
|
||||
}
|
||||
|
||||
// Same as above, with an additional cast.
|
||||
at::Tensor CopyTensor(const at::Tensor &ref, at::ScalarType dest_type) {
|
||||
return ref.to(ref.options().dtype(dest_type), /*non_blocking=*/false,
|
||||
/*copy=*/true);
|
||||
}
|
||||
|
||||
at::ScalarType GetScalarType(at::Scalar scalar) {
|
||||
if (scalar.isFloatingPoint()) {
|
||||
return at::kDouble;
|
||||
} else if (scalar.isIntegral(/*includeBool=*/false)) {
|
||||
return at::kLong;
|
||||
} else if (scalar.isBoolean()) {
|
||||
return at::kBool;
|
||||
} else if (scalar.isComplex()) {
|
||||
return at::kComplexDouble;
|
||||
}
|
||||
assert(0 && "Unknown type for scalar");
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
} // namespace torch_mlir
|
|
@ -1,34 +0,0 @@
|
|||
//===- torch_util.h ---------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ATen/Tensor.h>
|
||||
#include <c10/core/ScalarType.h>
|
||||
#include <c10/util/Optional.h>
|
||||
|
||||
namespace torch_mlir {
|
||||
namespace util {
|
||||
|
||||
at::Tensor Zeros(at::IntArrayRef sizes, at::ScalarType type);
|
||||
|
||||
// Makes a deep copy of an ATEN tensor.
|
||||
at::Tensor CopyTensor(const at::Tensor &ref);
|
||||
|
||||
// Same as above, with an additional cast.
|
||||
at::Tensor CopyTensor(const at::Tensor &ref, at::ScalarType dest_type);
|
||||
|
||||
// Return at::ScalarType from at::Scalar
|
||||
at::ScalarType GetScalarType(at::Scalar scalar);
|
||||
|
||||
template <typename T, typename S>
|
||||
T OptionalOr(const c10::optional<S> &value, T defval) {
|
||||
return value ? static_cast<T>(*value) : defval;
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
} // namespace torch_mlir
|
|
@ -1,10 +0,0 @@
|
|||
include_directories(
|
||||
${TORCH_INCLUDE_DIRS}
|
||||
)
|
||||
add_library(aten_ops SHARED
|
||||
aten_ops.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(aten_ops
|
||||
${TORCH_LIBRARIES}
|
||||
)
|
|
@ -1,772 +0,0 @@
|
|||
//===- aten_ops.cpp ---------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// This file is licensed under a pytorch-style license
|
||||
// See frontends/pytorch/LICENSE for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// This file implements C libraries that are targetted by MLIR code generation
|
||||
// from the ATen dialect. This library is intended to support a functional
|
||||
// proof of concept rather than optimized for high performance. Most of the
|
||||
// functions are implemented by calling back into the torch libraries.
|
||||
|
||||
#include <assert.h>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#include <torch/torch.h>
|
||||
|
||||
#include <nnpack.h>
|
||||
#include <ATen/CPUType.h>
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T, int N> struct tensor_t {
|
||||
T *d;
|
||||
T *aligned;
|
||||
size_t offset;
|
||||
size_t shape[N];
|
||||
size_t stride[N];
|
||||
|
||||
size_t index(size_t n, size_t channel, size_t row, size_t col) const {
|
||||
size_t channels = shape[1];
|
||||
size_t height = shape[2];
|
||||
size_t width = shape[3];
|
||||
return n * height * width * channels + channel * height * width +
|
||||
row * width + col;
|
||||
}
|
||||
|
||||
tensor_t() {
|
||||
d = aligned = nullptr;
|
||||
offset = 0;
|
||||
for (int i = 0; i < N; i++)
|
||||
shape[i] = stride[i] = 0;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int N>
|
||||
std::vector<int64_t> translate_shape(tensor_t<T, N> *t) {
|
||||
std::vector<int64_t> shape;
|
||||
for (int i = 0; i < N; i++) {
|
||||
shape.push_back(t->shape[i]);
|
||||
// std::cout << i << " shape " << t->shape[i] << std::endl;
|
||||
}
|
||||
return shape;
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
std::vector<int64_t> translate_stride(tensor_t<T, N> *t) {
|
||||
std::vector<int64_t> stride;
|
||||
for (int i = 0; i < N; i++) {
|
||||
stride.push_back(t->stride[i]);
|
||||
// std::cout << i << " stride " << t->stride[i] << std::endl;
|
||||
}
|
||||
return stride;
|
||||
}
|
||||
|
||||
template <int N> void dumpTensor(std::ostream &o, tensor_t<float, N> *t) {
|
||||
o << "Shape:";
|
||||
for (int i = 0; i < N; i++)
|
||||
o << t->shape[i] << " ";
|
||||
o << "Stride:";
|
||||
for (int i = 0; i < N; i++)
|
||||
o << t->stride[i] << " ";
|
||||
o << "\n";
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
at::Tensor to_torch(tensor_t<T, N> *t,
|
||||
const at::TensorOptions &options = at::TensorOptions()) {
|
||||
// std::cout << "to_torch\n";
|
||||
return torch::from_blob((void *)t->d, translate_shape(t), translate_stride(t),
|
||||
options);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void mm_out(tensor_t<T, 2> *a, tensor_t<T, 2> *b, tensor_t<T, 2> *r);
|
||||
|
||||
template <typename T, int N>
|
||||
void add_out(tensor_t<T, N> *a, tensor_t<T, N> *b, T alpha, tensor_t<T, N> *r) {
|
||||
at::Tensor torch_a = to_torch(a);
|
||||
at::Tensor torch_b = to_torch(b);
|
||||
at::Tensor result = at::native::add(torch_a, torch_b, alpha).clone();
|
||||
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void addmm_out(tensor_t<T, 1> *a, tensor_t<T, 2> *b, tensor_t<T, 2> *c,
|
||||
int32_t alpha, int32_t beta, tensor_t<T, 2> *r) {
|
||||
at::Tensor torch_a = to_torch(a);
|
||||
at::Tensor torch_b = to_torch(b);
|
||||
at::Tensor torch_c = to_torch(c);
|
||||
at::Tensor result =
|
||||
at::native::addmm(torch_a, torch_b, torch_c, alpha, beta).clone();
|
||||
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T, int N, int M>
|
||||
void as_strided_out(tensor_t<float, M> *a,
|
||||
/*size*/ int32_t sz0, int32_t sz1, int32_t sz2, int32_t sz3,
|
||||
/*stride*/ int32_t sd0, int32_t sd1, int32_t sd2,
|
||||
int32_t sd3, int32_t offset, tensor_t<T, N> *r) {
|
||||
at::Tensor input = to_torch(a);
|
||||
|
||||
std::vector<int64_t> size;
|
||||
std::vector<int64_t> stride;
|
||||
c10::optional<int64_t> storage_offset;
|
||||
|
||||
if (offset != 0)
|
||||
storage_offset = offset;
|
||||
if (N > 0) {
|
||||
size.push_back(sz0);
|
||||
stride.push_back(sd0);
|
||||
}
|
||||
if (N > 1) {
|
||||
size.push_back(sz1);
|
||||
stride.push_back(sd1);
|
||||
}
|
||||
if (N > 2) {
|
||||
size.push_back(sz2);
|
||||
stride.push_back(sd2);
|
||||
}
|
||||
if (N > 3) {
|
||||
size.push_back(sz3);
|
||||
stride.push_back(sd3);
|
||||
}
|
||||
|
||||
std::vector<int64_t> sizeRef{size};
|
||||
std::vector<int64_t> strideRef{stride};
|
||||
|
||||
// for (int i = 0; i<N; i++)
|
||||
// std::cout << "STRIDE " << i << " " << stride[i] << std::endl;
|
||||
at::Tensor result =
|
||||
at::native::as_strided_tensorimpl(input, size, stride, storage_offset)
|
||||
.clone();
|
||||
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
// FIXME: stride, padding, dilaection, output_padding should be IntArrayRef
|
||||
template <typename T>
|
||||
void conv2d_out(tensor_t<T, 4> *t, tensor_t<T, 4> *weight, tensor_t<T, 1> *bias,
|
||||
int32_t stride, int32_t pad, int32_t dilation,
|
||||
tensor_t<T, 4> *r) {
|
||||
at::Tensor torch_t = to_torch(t);
|
||||
at::Tensor torch_w = to_torch(weight);
|
||||
at::Tensor torch_b = to_torch(bias);
|
||||
int64_t groups = 1;
|
||||
|
||||
at::Tensor result = at::native::conv2d(torch_t, torch_w, torch_b, stride, pad,
|
||||
dilation, groups)
|
||||
.clone();
|
||||
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void conv2d_backward_out(tensor_t<T, 4> *grad_output, tensor_t<T, 4> *input,
|
||||
tensor_t<T, 4> *weight, int32_t stride, int32_t pad,
|
||||
int32_t dilation, tensor_t<T, 4> *r0,
|
||||
tensor_t<T, 4> *r1, tensor_t<T, 1> *r2) {
|
||||
const at::Tensor &arg_grad = to_torch(grad_output);
|
||||
const at::Tensor &arg_input = to_torch(input);
|
||||
const at::Tensor &arg_weight = to_torch(weight);
|
||||
|
||||
std::vector<int64_t> p{pad, pad};
|
||||
std::vector<int64_t> s{stride, stride};
|
||||
std::vector<int64_t> d{dilation, dilation};
|
||||
|
||||
std::array<bool, 3> output_mask{true, true, true};
|
||||
|
||||
std::tuple<at::Tensor, at::Tensor, at::Tensor> grads =
|
||||
at::native::mkldnn_convolution_backward(arg_input, arg_grad, arg_weight,
|
||||
p, s, d, 1, output_mask);
|
||||
|
||||
auto result0 = std::get<0>(grads);
|
||||
auto result1 = std::get<1>(grads);
|
||||
auto result2 = std::get<2>(grads);
|
||||
|
||||
memcpy(r0->d, result0.data_ptr(), result0.numel() * sizeof(T));
|
||||
memcpy(r1->d, result1.data_ptr(), result1.numel() * sizeof(T));
|
||||
memcpy(r2->d, result2.data_ptr(), result2.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
void log_softmax_out(tensor_t<T, N> *t, int32_t dim, bool half_to_float,
|
||||
tensor_t<T, N> *r) {
|
||||
at::Tensor input = to_torch(t);
|
||||
at::Tensor result = at::native::log_softmax_cpu(input, dim, half_to_float);
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
void log_softmax_backward_data_out(tensor_t<T, N> *a, tensor_t<T, N> *b,
|
||||
int32_t c, tensor_t<T, N> *d,
|
||||
tensor_t<T, N> *r) {
|
||||
at::Tensor inputA = to_torch(a);
|
||||
at::Tensor inputB = to_torch(b);
|
||||
at::Tensor inputD = to_torch(d);
|
||||
|
||||
at::Tensor result =
|
||||
at::native::log_softmax_backward_cpu(inputA, inputB, c, inputD);
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void max_pool2d_with_indices_out(tensor_t<T, 4> *t, int32_t c, int32_t d,
|
||||
int32_t e, int32_t f, bool ceil_mode,
|
||||
tensor_t<T, 4> *r0, tensor_t<int64_t, 4> *r1) {
|
||||
at::Tensor input = to_torch(t);
|
||||
|
||||
std::vector<int64_t> kernel{c, c};
|
||||
std::vector<int64_t> stride{d, d};
|
||||
std::vector<int64_t> padding{e, e};
|
||||
std::vector<int64_t> dilation{f, f};
|
||||
|
||||
auto result = at::native::max_pool2d_with_indices_cpu(
|
||||
input, kernel, stride, padding, dilation, ceil_mode);
|
||||
at::Tensor outTensor = std::get<0>(result);
|
||||
at::Tensor idxTensor = std::get<1>(result);
|
||||
memcpy(r0->d, outTensor.data_ptr(), outTensor.numel() * sizeof(T));
|
||||
memcpy(r1->d, idxTensor.data_ptr(), idxTensor.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void max_pool2d_with_indices_backward_out(tensor_t<T, 4> *a, tensor_t<T, 4> *b,
|
||||
int32_t c, int32_t d, int32_t e,
|
||||
int32_t f, bool g,
|
||||
tensor_t<int64_t, 4> *h,
|
||||
tensor_t<T, 4> *r) {
|
||||
const at::Tensor &inputA = to_torch(a);
|
||||
const at::Tensor &inputB = to_torch(b);
|
||||
at::TensorOptions options(at::ScalarType::Long);
|
||||
const at::Tensor &inputH = to_torch(h, options);
|
||||
|
||||
std::vector<int64_t> kernel{c, c};
|
||||
std::vector<int64_t> stride{d, d};
|
||||
std::vector<int64_t> padding{e, e};
|
||||
std::vector<int64_t> dilation{f, f};
|
||||
|
||||
at::Tensor result = at::native::max_pool2d_with_indices_backward_cpu(
|
||||
inputA, inputB, kernel, stride, padding, dilation, g, inputH);
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void mm_out(tensor_t<T, 2> *a, tensor_t<T, 2> *b, tensor_t<T, 2> *r) {
|
||||
at::Tensor inputA = to_torch(a);
|
||||
at::Tensor inputB = to_torch(b);
|
||||
|
||||
at::Tensor result = inputA.matmul(inputB);
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
void mul_out(tensor_t<T, N> *a, tensor_t<T, N> *b, tensor_t<T, N> *r) {
|
||||
at::Tensor inputA = to_torch(a);
|
||||
at::Tensor inputB = to_torch(b);
|
||||
|
||||
at::Tensor result = at::native::mul(inputA, inputB);
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
void relu_out(tensor_t<T, N> *a, tensor_t<T, N> *r) {
|
||||
at::Tensor inputA = to_torch(a);
|
||||
|
||||
at::Tensor result = at::native::relu(inputA);
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T> void t_out(tensor_t<T, 2> *a, tensor_t<T, 2> *r) {
|
||||
size_t h = a->shape[0];
|
||||
size_t w = a->shape[1];
|
||||
|
||||
for (size_t i = 0; i < h; i++)
|
||||
for (size_t j = 0; j < w; j++)
|
||||
r->d[j * h + i] = a->d[i * w + j];
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
void threshold_backward_out(tensor_t<T, N> *a, tensor_t<T, N> *b, int32_t c,
|
||||
tensor_t<T, N> *r) {
|
||||
at::Tensor inputA = to_torch(a);
|
||||
at::Tensor inputB = to_torch(b);
|
||||
|
||||
at::Tensor result = at::native::threshold_backward(inputA, inputB, c);
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T, int N, int M>
|
||||
void view_out(tensor_t<T, M> *a, int32_t b, int32_t c, int32_t d, int32_t e,
|
||||
tensor_t<T, N> *r) {
|
||||
tensor_t<T, N> result;
|
||||
size_t numel = 1;
|
||||
for (size_t d = 0; d < M; d++)
|
||||
numel *= a->shape[d];
|
||||
|
||||
if (N == 1)
|
||||
c = d = e = 1;
|
||||
if (N == 2)
|
||||
d = e = 1;
|
||||
if (N == 3)
|
||||
e = 1;
|
||||
|
||||
int inferred = 0;
|
||||
if (b == -1)
|
||||
inferred++;
|
||||
if (c == -1)
|
||||
inferred++;
|
||||
if (d == -1)
|
||||
inferred++;
|
||||
if (e == -1)
|
||||
inferred++;
|
||||
assert(inferred <= 1 &&
|
||||
"aten.view Error: only one dimension can be inferred");
|
||||
|
||||
if (b == -1)
|
||||
b = numel / (c * d * e);
|
||||
if (c == -1)
|
||||
c = numel / (b * d * e);
|
||||
if (d == -1)
|
||||
d = numel / (b * c * e);
|
||||
if (e == -1)
|
||||
e = numel / (b * c * d);
|
||||
|
||||
if (N > 0)
|
||||
r->shape[0] = b;
|
||||
if (N > 1)
|
||||
r->shape[1] = c;
|
||||
if (N > 2)
|
||||
r->shape[2] = d;
|
||||
if (N > 3)
|
||||
r->shape[3] = e;
|
||||
|
||||
memcpy(r->d, a->d, numel * sizeof(T));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
extern "C" {
|
||||
|
||||
// add_out
|
||||
|
||||
void _mlir_ciface_add_1F32_1F32_1F32_out(tensor_t<float, 1> *a,
|
||||
tensor_t<float, 1> *b, int32_t i,
|
||||
tensor_t<float, 1> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
add_out<float, 1>(a, b, i, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_add_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
|
||||
tensor_t<float, 2> *b, int32_t i,
|
||||
tensor_t<float, 2> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
add_out<float, 2>(a, b, i, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_add_3F32_3F32_3F32_out(tensor_t<float, 3> *a,
|
||||
tensor_t<float, 3> *b, int32_t i,
|
||||
tensor_t<float, 3> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
add_out<float, 3>(a, b, i, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_add_4F32_4F32_4F32_out(tensor_t<float, 4> *a,
|
||||
tensor_t<float, 4> *b, int32_t i,
|
||||
tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
add_out<float, 4>(a, b, i, r);
|
||||
}
|
||||
|
||||
// addmm_out
|
||||
|
||||
void _mlir_ciface_addmm_2F32_1F32_2F32_2F32_out(tensor_t<float, 1> *a,
|
||||
tensor_t<float, 2> *b,
|
||||
tensor_t<float, 2> *c,
|
||||
int32_t alpha, int32_t beta,
|
||||
tensor_t<float, 2> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
addmm_out<float>(a, b, c, alpha, beta, r);
|
||||
}
|
||||
|
||||
// as_strided_out
|
||||
|
||||
void _mlir_ciface_as_strided_1F32_1F32_out(tensor_t<float, 1> *a,
|
||||
/*size*/ int32_t sz0, int32_t sz1,
|
||||
int32_t sz2, int32_t sz3,
|
||||
/*stride*/ int32_t sd0, int32_t sd1,
|
||||
int32_t sd2, int32_t sd3,
|
||||
int32_t offset,
|
||||
tensor_t<float, 1> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
as_strided_out<float, 1, 1>(a, sz0, sz1, sz2, sz3, sd0, sd1, sd2, sd3, offset,
|
||||
r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_as_strided_4F32_2F32_out(tensor_t<float, 2> *a,
|
||||
/*size*/ int32_t sz0, int32_t sz1,
|
||||
int32_t sz2, int32_t sz3,
|
||||
/*stride*/ int32_t sd0, int32_t sd1,
|
||||
int32_t sd2, int32_t sd3,
|
||||
int32_t offset,
|
||||
tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
// std::cout << sz0 << " "
|
||||
// << sz1 << " "
|
||||
// << sz2 << " "
|
||||
// << sz3 << "\n";
|
||||
// std::cout << sd0 << " "
|
||||
// << sd1 << " "
|
||||
// << sd2 << " "
|
||||
// << sd3 << "\n";
|
||||
as_strided_out<float, 4, 2>(a, sz0, sz1, sz2, sz3, sd0, sd1, sd2, sd3, offset,
|
||||
r);
|
||||
}
|
||||
|
||||
// conv2d_out
|
||||
|
||||
void _mlir_ciface_conv2d_4F32_4F32_4F32_1F32_out(
|
||||
tensor_t<float, 4> *t, tensor_t<float, 4> *weight, tensor_t<float, 1> *bias,
|
||||
int32_t stride, int32_t padding, int32_t dilation, tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
conv2d_out<float>(t, weight, bias, stride, padding, dilation, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_conv2d_relu_4F32_4F32_4F32_1F32_out(
|
||||
tensor_t<float, 4> *t, tensor_t<float, 4> *weight, tensor_t<float, 1> *bias,
|
||||
int32_t stride, int32_t padding, int32_t dilation, tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
conv2d_out<float>(t, weight, bias, stride, padding, dilation, r);
|
||||
relu_out<float, 4>(r, r);
|
||||
}
|
||||
|
||||
// conv2d_backward_out
|
||||
|
||||
void _mlir_ciface_conv2d_backward_4F32_4F32_1F32_4F32_4F32_4F32_out(
|
||||
tensor_t<float, 4> *grad_output, tensor_t<float, 4> *t,
|
||||
tensor_t<float, 4> *weight, int32_t stride, int32_t padding,
|
||||
int32_t dilation, tensor_t<float, 4> *r0, tensor_t<float, 4> *r1,
|
||||
tensor_t<float, 1> *r2) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
conv2d_backward_out<float>(grad_output, t, weight, stride, padding, dilation,
|
||||
r0, r1, r2);
|
||||
}
|
||||
|
||||
// div
|
||||
float *div_0F32_0F32_0F32(float *a, float *b) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
float *ret = (float *)malloc(sizeof(float));
|
||||
*ret = *a / *b;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// log_softmax_out
|
||||
|
||||
void _mlir_ciface_log_softmax_1F32_1F32_out(tensor_t<float, 1> *t, int32_t dim,
|
||||
bool half_to_float,
|
||||
tensor_t<float, 1> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
log_softmax_out<float, 1>(t, dim, half_to_float, r);
|
||||
}
|
||||
void _mlir_ciface_log_softmax_2F32_2F32_out(tensor_t<float, 2> *t, int32_t dim,
|
||||
bool half_to_float,
|
||||
tensor_t<float, 2> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
log_softmax_out<float, 2>(t, dim, half_to_float, r);
|
||||
}
|
||||
void _mlir_ciface_log_softmax_3F32_3F32_out(tensor_t<float, 3> *t, int32_t dim,
|
||||
bool half_to_float,
|
||||
tensor_t<float, 3> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
log_softmax_out<float, 3>(t, dim, half_to_float, r);
|
||||
}
|
||||
void _mlir_ciface_log_softmax_4F32_4F32_out(tensor_t<float, 4> *t, int32_t dim,
|
||||
bool half_to_float,
|
||||
tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
log_softmax_out<float, 4>(t, dim, half_to_float, r);
|
||||
}
|
||||
|
||||
// log_softmax_backward_data_out
|
||||
|
||||
void _mlir_ciface_log_softmax_backward_data_2F32_2F32_2F32_2F32_out(
|
||||
tensor_t<float, 2> *a, tensor_t<float, 2> *b, int32_t c,
|
||||
tensor_t<float, 2> *d, tensor_t<float, 2> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
log_softmax_backward_data_out<float, 2>(a, b, c, d, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_log_softmax_backward_data_4F32_4F32_4F32_4F32_out(
|
||||
tensor_t<float, 4> *a, tensor_t<float, 4> *b, int32_t c,
|
||||
tensor_t<float, 4> *d, tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
log_softmax_backward_data_out<float, 4>(a, b, c, d, r);
|
||||
}
|
||||
|
||||
// max_pool2d_out
|
||||
|
||||
void _mlir_ciface_max_pool2d_with_indices_4F32_4I64_4F32_out(
|
||||
tensor_t<float, 4> *t, int32_t kernel, int32_t pad, int32_t stride,
|
||||
int32_t dilation, bool ceil_mode, tensor_t<float, 4> *r0,
|
||||
tensor_t<int64_t, 4> *r1) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
max_pool2d_with_indices_out<float>(t, kernel, pad, stride, dilation,
|
||||
ceil_mode, r0, r1);
|
||||
}
|
||||
|
||||
// max_pool2d backward_out
|
||||
|
||||
void _mlir_ciface_max_pool2d_with_indices_backward_4F32_4F32_4F32_4I64_out(
|
||||
tensor_t<float, 4> *a, tensor_t<float, 4> *b, int32_t c, int32_t d,
|
||||
int32_t e, int32_t f, bool g, tensor_t<int64_t, 4> *h,
|
||||
tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
max_pool2d_with_indices_backward_out<float>(a, b, c, d, e, f, g, h, r);
|
||||
}
|
||||
|
||||
// mm_out
|
||||
|
||||
void _mlir_ciface_mm_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
|
||||
tensor_t<float, 2> *b,
|
||||
tensor_t<float, 2> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
mm_out<float>(a, b, r);
|
||||
}
|
||||
|
||||
// mul_out
|
||||
|
||||
void _mlir_ciface_mul_1F32_1F32_1F32_out(tensor_t<float, 1> *a,
|
||||
tensor_t<float, 1> *b,
|
||||
tensor_t<float, 1> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
mul_out<float, 1>(a, b, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_mul_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
|
||||
tensor_t<float, 2> *b,
|
||||
tensor_t<float, 2> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
mul_out<float, 2>(a, b, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_mul_3F32_3F32_3F32_out(tensor_t<float, 3> *a,
|
||||
tensor_t<float, 3> *b,
|
||||
tensor_t<float, 3> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
mul_out<float, 3>(a, b, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_mul_4F32_4F32_4F32_out(tensor_t<float, 4> *a,
|
||||
tensor_t<float, 4> *b,
|
||||
tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
mul_out<float, 4>(a, b, r);
|
||||
}
|
||||
|
||||
// nll_loss2d_forward_out
|
||||
|
||||
void _mlir_ciface_nll_loss2d_forward_1F32_1F32_4F32_3I64_1F32_out(
|
||||
tensor_t<float, 4> *a, tensor_t<uint64_t, 3> *b, tensor_t<float, 1> *c,
|
||||
int64_t d, int64_t e, tensor_t<float, 1> *r0, tensor_t<float, 1> *r1) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
using T = float;
|
||||
at::Tensor inputA = to_torch(a);
|
||||
at::TensorOptions options(at::ScalarType::Long);
|
||||
at::Tensor inputB = to_torch(b, options);
|
||||
at::Tensor inputC = to_torch(c);
|
||||
|
||||
std::tuple<at::Tensor, at::Tensor> result =
|
||||
at::CPUType::nll_loss2d_forward(inputA, inputB, inputC, d, e);
|
||||
|
||||
at::Tensor result0 = std::get<0>(result);
|
||||
at::Tensor result1 = std::get<1>(result);
|
||||
memcpy(r0->d, result0.data_ptr(), result0.numel() * sizeof(T));
|
||||
memcpy(r1->d, result1.data_ptr(), result1.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
// nll_loss2d_backward_out
|
||||
|
||||
void _mlir_ciface_nll_loss2d_backward_4F32_1F32_4F32_3I64_1F32_1F32_out(
|
||||
tensor_t<float, 1> *a, tensor_t<float, 4> *b, tensor_t<uint64_t, 3> *c,
|
||||
tensor_t<float, 1> *d, int32_t e, int32_t f, tensor_t<float, 1> *g,
|
||||
tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
using T = float;
|
||||
at::Tensor inputA = to_torch(a);
|
||||
at::Tensor inputB = to_torch(b);
|
||||
at::TensorOptions options(at::ScalarType::Long);
|
||||
at::Tensor inputC = to_torch(c, options);
|
||||
at::Tensor inputD = to_torch(d);
|
||||
at::Tensor inputG = to_torch(g);
|
||||
|
||||
at::Tensor result = at::CPUType::nll_loss2d_backward(inputA, inputB, inputC,
|
||||
inputD, e, f, inputG);
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
void _mlir_ciface_nll_loss_backward_2F32_1F32_2F32_1I64_1F32_1F32_out(
|
||||
tensor_t<float, 1> *a, tensor_t<float, 2> *b, tensor_t<uint64_t, 1> *c,
|
||||
tensor_t<float, 1> *d, int32_t e, int32_t f, tensor_t<float, 1> *g,
|
||||
tensor_t<float, 2> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
using T = float;
|
||||
at::Tensor inputA = to_torch(a);
|
||||
at::Tensor inputB = to_torch(b);
|
||||
at::TensorOptions options(at::ScalarType::Long);
|
||||
at::Tensor inputC = to_torch(c, options);
|
||||
at::Tensor inputD = to_torch(d);
|
||||
at::Tensor inputG = to_torch(g);
|
||||
|
||||
at::Tensor result = at::CPUType::nll_loss_backward(inputA, inputB, inputC,
|
||||
inputD, e, f, inputG);
|
||||
|
||||
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
// nll_loss_forward_out
|
||||
|
||||
void _mlir_ciface_nll_loss_forward_1F32_1F32_2F32_1I64_1F32_out(
|
||||
tensor_t<float, 2> *a, tensor_t<uint64_t, 1> *b, tensor_t<float, 1> *c,
|
||||
int64_t d, int64_t e, tensor_t<float, 1> *r0, tensor_t<float, 1> *r1) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
using T = float;
|
||||
at::Tensor inputA = to_torch(a);
|
||||
at::TensorOptions options(at::ScalarType::Long);
|
||||
at::Tensor inputB = to_torch(b, options);
|
||||
at::Tensor inputC = to_torch(c);
|
||||
|
||||
std::tuple<at::Tensor, at::Tensor> result =
|
||||
at::CPUType::nll_loss_forward(inputA, inputB, inputC, d, e);
|
||||
|
||||
at::Tensor result0 = std::get<0>(result);
|
||||
at::Tensor result1 = std::get<1>(result);
|
||||
|
||||
memcpy(r0->d, result0.data_ptr(), result0.numel() * sizeof(T));
|
||||
memcpy(r1->d, result1.data_ptr(), result1.numel() * sizeof(T));
|
||||
}
|
||||
|
||||
// relu_out
|
||||
|
||||
void _mlir_ciface_relu_1F32_1F32_out(tensor_t<float, 1> *a,
|
||||
tensor_t<float, 1> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
relu_out<float, 1>(a, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_relu_2F32_2F32_out(tensor_t<float, 2> *a,
|
||||
tensor_t<float, 2> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
relu_out<float, 2>(a, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_relu_3F32_3F32_out(tensor_t<float, 3> *a,
|
||||
tensor_t<float, 3> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
relu_out<float, 3>(a, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_relu_4F32_4F32_out(tensor_t<float, 4> *a,
|
||||
tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
relu_out<float, 4>(a, r);
|
||||
}
|
||||
|
||||
// t_out
|
||||
|
||||
void _mlir_ciface_t_2F32_2F32_out(tensor_t<float, 2> *a,
|
||||
tensor_t<float, 2> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
t_out<float>(a, r);
|
||||
}
|
||||
|
||||
// threshold_backward_out
|
||||
|
||||
void _mlir_ciface_threshold_backward_1F32_1F32_1F32_out(tensor_t<float, 1> *a,
|
||||
tensor_t<float, 1> *b,
|
||||
int32_t c,
|
||||
tensor_t<float, 1> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
threshold_backward_out<float, 1>(a, b, c, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_threshold_backward_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
|
||||
tensor_t<float, 2> *b,
|
||||
int32_t c,
|
||||
tensor_t<float, 2> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
threshold_backward_out<float, 2>(a, b, c, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_threshold_backward_3F32_3F32_3F32_out(tensor_t<float, 3> *a,
|
||||
tensor_t<float, 3> *b,
|
||||
int32_t c,
|
||||
tensor_t<float, 3> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
threshold_backward_out<float, 3>(a, b, c, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_threshold_backward_4F32_4F32_4F32_out(tensor_t<float, 4> *a,
|
||||
tensor_t<float, 4> *b,
|
||||
int32_t c,
|
||||
tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
threshold_backward_out<float, 4>(a, b, c, r);
|
||||
}
|
||||
|
||||
// view_out
|
||||
|
||||
void _mlir_ciface_view_1F32_4F32_out(tensor_t<float, 4> *a, int32_t b,
|
||||
int32_t c, int32_t d, int32_t e,
|
||||
tensor_t<float, 1> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
view_out<float, 1, 4>(a, b, c, d, e, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_view_1F32_3F32_out(tensor_t<float, 3> *a, int32_t b,
|
||||
int32_t c, int32_t d, int32_t e,
|
||||
tensor_t<float, 1> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
view_out<float, 1, 3>(a, b, c, d, e, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_view_1F32_2F32_out(tensor_t<float, 2> *a, int32_t b,
|
||||
int32_t c, int32_t d, int32_t e,
|
||||
tensor_t<float, 1> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
view_out<float, 1, 2>(a, b, c, d, e, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_view_2F32_4F32_out(tensor_t<float, 4> *a, int32_t b,
|
||||
int32_t c, int32_t d, int32_t e,
|
||||
tensor_t<float, 2> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
view_out<float, 2, 4>(a, b, c, d, e, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_view_4F32_1F32_out(tensor_t<float, 1> *a, int32_t b,
|
||||
int32_t c, int32_t d, int32_t e,
|
||||
tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
view_out<float, 4, 1>(a, b, c, d, e, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_view_4F32_2F32_out(tensor_t<float, 2> *a, int32_t b,
|
||||
int32_t c, int32_t d, int32_t e,
|
||||
tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
view_out<float, 4, 2>(a, b, c, d, e, r);
|
||||
}
|
||||
|
||||
void _mlir_ciface_view_4F32_3F32_out(tensor_t<float, 3> *a, int32_t b,
|
||||
int32_t c, int32_t d, int32_t e,
|
||||
tensor_t<float, 4> *r) {
|
||||
// std::cout << "aten_ops " << __func__ << "\n";
|
||||
view_out<float, 4, 3>(a, b, c, d, e, r);
|
||||
}
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
# TODO: Enable these tests for the new c10 dispatch code path with pt > 1.3
|
||||
if config.enable_c10_dispatch:
|
||||
config.unsupported = True
|
|
@ -1,78 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import unittest
|
||||
from unittest import TestCase
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
|
||||
import inspect
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
class ResA(nn.Module):
|
||||
def __init__(self, channels):
|
||||
C = int(channels)
|
||||
C2 = int(channels/2)
|
||||
super(ResA, self).__init__()
|
||||
self.model = nn.Sequential(# A1
|
||||
nn.BatchNorm2d(C),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(C,C2,1,stride=1,padding=0,dilation=1,groups=1,bias=True),
|
||||
# B1
|
||||
nn.BatchNorm2d(C2),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(C2,C2,3,stride=1,padding=1,dilation=1,groups=1,bias=True),
|
||||
# C1
|
||||
nn.BatchNorm2d(C2),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(C2,C,1,stride=1,padding=0,dilation=1,groups=1,bias=True))
|
||||
def forward(self, x):
|
||||
res = self.model.forward(x)
|
||||
return x + res
|
||||
|
||||
# Prints `str` prefixed by the current test function name so we can use it in
|
||||
# Filecheck label directives.
|
||||
# This is achieved by inspecting the stack and getting the parent name.
|
||||
def printWithCurrentFunctionName(s):
|
||||
# stack[1] is the caller, i.e. "_test_model"
|
||||
# stack[2] is the caller's caller, e.g. "test_conv_1"
|
||||
print(inspect.stack()[2][3], s)
|
||||
|
||||
class TestMLIRExport(unittest.TestCase):
|
||||
def setUp(self):
|
||||
pass
|
||||
|
||||
def _test_model(self, model, model_args):
|
||||
result = model(model_args)
|
||||
|
||||
mlir = torch_mlir.get_mlir(result)
|
||||
printWithCurrentFunctionName (mlir)
|
||||
return True
|
||||
|
||||
def test_ResA_16(self):
|
||||
dev = torch_mlir.mlir_device()
|
||||
model = ResA(16).to(dev)
|
||||
passed = self._test_model(model, torch.ones((1,16,128,128), device=dev))
|
||||
# CHECK-LABEL: test_ResA_16
|
||||
# CHECK: [[V0:%[a-zA-Z0-9]+]], %{{.*}}, %{{.*}} = "aten.native_batch_norm"({{.*}}) {layer_name = "L0-native_batch_norm-0"}
|
||||
# CHECK: [[V1:%[a-zA-Z0-9]+]] = "aten.relu"([[V0]]) {layer_name = "L1-relu-0"}
|
||||
# CHECK: [[V2:%[a-zA-Z0-9]+]] = "aten.convolution_overrideable"([[V1]], {{.*}}) {layer_name = "L2-convolution_overrideable-0"}
|
||||
# CHECK: [[V3:%[a-zA-Z0-9_]+]], %{{.*}}, %{{.*}} = "aten.native_batch_norm"([[V2]]{{.*}}) {layer_name = "L3-native_batch_norm-1"}
|
||||
# CHECK: [[V4:%[a-zA-Z0-9]+]] = "aten.relu"([[V3]]) {layer_name = "L4-relu-1"}
|
||||
# CHECK: [[V5:%[a-zA-Z0-9]+]] = "aten.convolution_overrideable"([[V4]],{{.*}}) {layer_name = "L5-convolution_overrideable-1"}
|
||||
# CHECK: [[V6:%[a-zA-Z0-9_]+]], %{{.*}}, %{{.*}} = "aten.native_batch_norm"([[V5]],{{.*}}) {layer_name = "L6-native_batch_norm-2"}
|
||||
# CHECK: [[V7:%[a-zA-Z0-9]+]] = "aten.relu"([[V6]]) {layer_name = "L7-relu-2"}
|
||||
# CHECK: [[V8:%[a-zA-Z0-9]+]] = "aten.convolution_overrideable"([[V7]],{{.*}}) {layer_name = "L8-convolution_overrideable-2"}
|
||||
# CHECK: {{.*}} = "aten.add"(%arg0, [[V8]], {{.*}}) {layer_name = "L9-add-0"}
|
||||
self.assertTrue(passed)
|
||||
|
||||
verbose = False
|
||||
if __name__ == '__main__':
|
||||
verbose = True
|
||||
unittest.main()
|
|
@ -1,26 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
t0 = torch.randn((1,2,3,4), device=dev)
|
||||
t1 = torch.randn((1,2,3,4), device=dev)
|
||||
t2 = torch.randn((1,2,3,4), device=dev)
|
||||
|
||||
t3 = t0 + t1 + t2
|
||||
|
||||
#
|
||||
# Generate and check the MLIR for the result tensor
|
||||
#
|
||||
t3_mlir = torch_mlir.get_mlir( t3 )
|
||||
|
||||
# CHECK-LABEL: test_export_add3
|
||||
# CHECK: %1 = "aten.add"(%arg0, %arg1, %0) {layer_name = "L0-add-0"} : (tensor<1x2x3x4xf32>, tensor<1x2x3x4xf32>, i32) -> tensor<1x2x3x4xf32>
|
||||
# CHECK: %2 = "aten.add"(%1, %arg2, %0) {layer_name = "L1-add-1"} : (tensor<1x2x3x4xf32>, tensor<1x2x3x4xf32>, i32) -> tensor<1x2x3x4xf32>
|
||||
print("test_export_add3")
|
||||
print(t3_mlir)
|
|
@ -1,19 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
|
||||
model = torch.nn.BatchNorm2d(123).to(dev)
|
||||
result = model(torch.ones(42,123,4,5).to(dev))
|
||||
|
||||
# CHECK-LABEL: test_export_batchnorm
|
||||
# CHECK: aten.native_batch_norm
|
||||
mlir = torch_mlir.get_mlir( result )
|
||||
print("test_export_batchnorm")
|
||||
print(mlir)
|
|
@ -1,49 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
|
||||
N = 3
|
||||
Cin = 16
|
||||
Cout = 4
|
||||
w = 10
|
||||
h = 10
|
||||
|
||||
model = torch.nn.Conv2d(Cin, Cout, (3,3))
|
||||
ref_model = torch.nn.Conv2d(Cin, Cout, (3,3))
|
||||
|
||||
ref_model.weight.data = model.weight.clone()
|
||||
ref_model.bias.data = model.bias.clone()
|
||||
|
||||
model = model.to(dev)
|
||||
|
||||
softmax = torch.nn.LogSoftmax(dim=1)
|
||||
loss = torch.nn.NLLLoss()
|
||||
|
||||
tensor = torch.randn(N, Cin, h, w, device=dev)
|
||||
result = model(tensor)
|
||||
|
||||
# CHECK-LABEL: test_export_conv2d
|
||||
# CHECK: aten.convolution_overrideable
|
||||
print("test_export_conv2d")
|
||||
print(torch_mlir.get_mlir( result ))
|
||||
|
||||
target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, Cout)
|
||||
ref_target = target.clone()
|
||||
target = target.to(dev)
|
||||
|
||||
test_loss = loss( softmax(result), target )
|
||||
test_loss.backward()
|
||||
|
||||
# CHECK-LABEL: test_export_conv2d_back
|
||||
# CHECK: aten.convolution_overrideable
|
||||
# CHECK: aten._log_softmax
|
||||
# CHECK: aten.nll_loss2d_forward
|
||||
print("test_export_conv2d_back")
|
||||
print(torch_mlir.get_mlir( test_loss ))
|
|
@ -1,24 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
|
||||
t0 = torch.randn(4, device=dev)
|
||||
t1 = torch.randn(4, device=dev)
|
||||
t2 = torch.randn(4, device=dev)
|
||||
|
||||
t4 = t0 + t1 + t2
|
||||
t5 = t4 + t1
|
||||
t6 = t5 + t4
|
||||
|
||||
# CHECK-LABEL: test_multi_out
|
||||
# CHECK: return %2, %3, %4 : tensor<4xf32>, tensor<4xf32>, tensor<4xf32>
|
||||
mlir = torch_mlir.get_mlir([t4, t5, t6])
|
||||
print ("test_multi_out")
|
||||
print (mlir)
|
|
@ -1,25 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import torchvision.models as models
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
|
||||
model = models.resnet18().to(dev)
|
||||
model.training = False
|
||||
|
||||
tensor = torch.randn(32,3,32,32).to(dev)
|
||||
result = model(tensor)
|
||||
|
||||
mlir = torch_mlir.get_mlir( result )
|
||||
|
||||
# for now we just check the output shape
|
||||
# CHECK-LABEL: test_export_resnet18
|
||||
# CHECK: return %{{.*}} : tensor<32x1000xf32>
|
||||
print("test_export_resnet18")
|
||||
print(mlir)
|
|
@ -1,24 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import torchvision.models as models
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
|
||||
model = models.vgg11_bn().to(dev)
|
||||
model.training = False
|
||||
|
||||
result = model(torch.ones(32,3,32,32).to(dev))
|
||||
|
||||
mlir = torch_mlir.get_mlir( result )
|
||||
|
||||
# for now we just check the output shape
|
||||
# CHECK-LABEL: test_export_vgg11
|
||||
# CHECK: return %{{.*}} : tensor<32x1000xf32>
|
||||
print("test_export_vgg11")
|
||||
print(mlir)
|
|
@ -1,27 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
t0 = torch.randn((4,4), device=dev)
|
||||
t1 = torch.randn((4,4), device=dev)
|
||||
|
||||
t2 = t0 + t1
|
||||
|
||||
#
|
||||
# Check the result tensor against the CPU
|
||||
#
|
||||
t0_cpu = t0.to('cpu')
|
||||
t1_cpu = t1.to('cpu')
|
||||
t2_cpu = t2.to('cpu')
|
||||
|
||||
print (t0_cpu, " +\n", t1_cpu, " =\n", t2_cpu)
|
||||
|
||||
# CHECK: PASS! add2 check
|
||||
test.compare(t2, t0_cpu + t1_cpu, "add2")
|
|
@ -1,29 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
t0 = torch.randn((1,2,3,4), device=dev)
|
||||
t1 = torch.randn((1,2,3,4), device=dev)
|
||||
t2 = torch.randn((1,2,3,4), device=dev)
|
||||
|
||||
t3 = t0 + t1 + t2
|
||||
|
||||
#
|
||||
# Check the result tensor against the CPU
|
||||
#
|
||||
t0_cpu = t0.to('cpu')
|
||||
t1_cpu = t1.to('cpu')
|
||||
t2_cpu = t2.to('cpu')
|
||||
t3_cpu = t3.to('cpu')
|
||||
|
||||
print (t0_cpu, " +\n", t1_cpu, " +\n", t2_cpu, " =\n", t3_cpu)
|
||||
|
||||
# CHECK: PASS!
|
||||
test.compare(t3, t0_cpu + t1_cpu + t2_cpu, "add3")
|
|
@ -1,42 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
t0 = torch.randn((4,16,4), device=dev)
|
||||
t1 = torch.randn((4,16,4), device=dev)
|
||||
|
||||
t3 = torch.randn((4,64), device=dev)
|
||||
t4 = torch.randn((4,64), device=dev)
|
||||
|
||||
t2 = t0 + t1
|
||||
t5 = t3 + t4
|
||||
|
||||
t6 = t5.view((4,4,4,4))
|
||||
t7 = t2.view((4,4,4,4))
|
||||
|
||||
t8 = t6 + t7
|
||||
|
||||
t0_cpu = t0.to('cpu')
|
||||
t1_cpu = t1.to('cpu')
|
||||
|
||||
# CHECK: PASS! add_views_0 check
|
||||
test.compare(t2, t0_cpu + t1_cpu, "add_views_0")
|
||||
|
||||
t3_cpu = t3.to('cpu')
|
||||
t4_cpu = t4.to('cpu')
|
||||
|
||||
# CHECK: PASS! add_views_1 check
|
||||
test.compare(t5, t3_cpu + t4_cpu, "add_views_1")
|
||||
|
||||
t6_cpu = t6.to('cpu')
|
||||
t7_cpu = t7.to('cpu')
|
||||
|
||||
# CHECK: PASS! add_views_2 check
|
||||
test.compare(t8, t6_cpu + t7_cpu, "add_views_2")
|
|
@ -1,43 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
|
||||
x = torch.rand((3,64,8,8), device=dev)
|
||||
y = x*x
|
||||
print (y.stride())
|
||||
|
||||
dim = [64,24,24]
|
||||
dim = [4,4,4]
|
||||
N = 2;
|
||||
count = dim[0]*dim[1]*dim[2]
|
||||
sizes = (N,dim[0],dim[1],dim[2])
|
||||
strides = (1,dim[1]*dim[2],dim[2],1)
|
||||
print(count)
|
||||
t0 = torch.randn((N,count), device=dev)
|
||||
t0_like = torch.randn((N,count))
|
||||
|
||||
|
||||
t1 = t0.as_strided(sizes, strides)
|
||||
t1_ref = t0.to('cpu').as_strided(sizes, strides)
|
||||
t1_like = t0_like.as_strided(sizes, strides)
|
||||
|
||||
t1_ref = t1_ref.clone()
|
||||
|
||||
# check that the IR has recorded the
|
||||
# stride properly before invoking JIT
|
||||
# CHECK: PASS! stride check
|
||||
test.compare_eq(t1.stride(), t1_like.stride(), "stride")
|
||||
|
||||
# CHECK: PASS! as_stride check
|
||||
test.compare(t1_ref, t1, "as_stride")
|
||||
|
||||
# CHECK: PASS! as_stride stride check
|
||||
test.compare_eq(t1_ref.stride(), t1.to("cpu").stride(), "as_stride stride")
|
|
@ -1,17 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
model = torch.nn.Conv2d(2,16,7,stride=[2,2], padding=[3,3],
|
||||
dilation=1, groups=1, bias=True)
|
||||
|
||||
tensor = torch.randn((1,2,128,128))
|
||||
|
||||
# CHECK: PASS! fwd check
|
||||
test.check_ref(model, tensor)
|
|
@ -1,46 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
|
||||
N = 3
|
||||
Cin = 16
|
||||
Cout = 4
|
||||
w = 10
|
||||
h = 10
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(Cin, Cout, (3,3))
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
output = F.log_softmax(x, dim=1)
|
||||
return output
|
||||
|
||||
model = Net()
|
||||
tensor = torch.randn(N, Cin, h, w)
|
||||
|
||||
# CHECK: PASS! fwd check
|
||||
fwd_path = test.check_ref(model, tensor)
|
||||
|
||||
loss = torch.nn.NLLLoss()
|
||||
target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, Cout)
|
||||
|
||||
# CHECK: PASS! back check
|
||||
test.check_back(fwd_path, target, loss)
|
||||
|
||||
# CHECK: PASS! weight_grad check
|
||||
test.compare(model.conv1.weight.grad, fwd_path[0].conv1.weight.grad, "weight_grad")
|
||||
# CHECK: PASS! bias_grad check
|
||||
test.compare(model.conv1.bias.grad, fwd_path[0].conv1.bias.grad, "bias_grad")
|
|
@ -1,60 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(1, 32, 3, 1, bias=True)
|
||||
self.conv2 = nn.Conv2d(32, 64, 3, 1, bias=True)
|
||||
#self.maxpool2d = nn.MaxPool2d(2,2)
|
||||
self.fc1 = nn.Linear(9216*4, 128, bias=True)
|
||||
self.fc2 = nn.Linear(128, 10, bias=True)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = F.relu(x)
|
||||
x = self.conv2(x)
|
||||
#x = self.maxpool2d(x)
|
||||
x = x.view((64,9216*4))
|
||||
x = self.fc1(x)
|
||||
x = F.relu(x)
|
||||
x = self.fc2(x)
|
||||
output = F.log_softmax(x, dim=1)
|
||||
return output
|
||||
|
||||
def main():
|
||||
model = Net()
|
||||
tensor = torch.randn((64, 1, 28, 28), requires_grad=True)
|
||||
|
||||
# CHECK: PASS! fwd check
|
||||
fwd_path = test.check_fwd(model, tensor)
|
||||
|
||||
target = torch.ones((64), dtype=torch.long)
|
||||
loss = F.nll_loss
|
||||
|
||||
# CHECK: PASS! back check
|
||||
test.check_back(fwd_path, target, loss)
|
||||
|
||||
# CHECK: PASS! weight_grad check
|
||||
test.compare(model.conv2.weight.grad,
|
||||
fwd_path[0].conv2.weight.grad, "weight_grad")
|
||||
# CHECK: PASS! bias_grad check
|
||||
test.compare(model.conv2.bias.grad,
|
||||
fwd_path[0].conv2.bias.grad, "bias_grad")
|
||||
# CHECK: PASS! fc1_weight_grad check
|
||||
test.compare(model.fc1.weight.grad,
|
||||
fwd_path[0].fc1.weight.grad, "fc1_weight_grad")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,53 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
from __future__ import print_function
|
||||
import argparse
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from torchvision import datasets, transforms
|
||||
from torch.optim.lr_scheduler import StepLR
|
||||
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(1, 32, 3, 1)
|
||||
self.conv2 = nn.Conv2d(32, 64, 3, 1)
|
||||
self.maxpool2d = nn.MaxPool2d(2,2)
|
||||
#self.dropout1 = nn.Dropout2d(0.25)
|
||||
#self.dropout2 = nn.Dropout2d(0.5)
|
||||
self.fc1 = nn.Linear(9216, 128)
|
||||
self.fc2 = nn.Linear(128, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = F.relu(x)
|
||||
x = self.conv2(x)
|
||||
x = self.maxpool2d(x)
|
||||
#x = self.dropout1(x)
|
||||
x = x.view((4,9216))
|
||||
x = self.fc1(x)
|
||||
x = F.relu(x)
|
||||
#x = self.dropout2(x)
|
||||
x = self.fc2(x)
|
||||
output = F.log_softmax(x, dim=1)
|
||||
return output
|
||||
|
||||
|
||||
def main():
|
||||
model = Net()
|
||||
tensor = torch.randn((4, 1, 28, 28))
|
||||
|
||||
# CHECK: PASS! fwd check
|
||||
fwd_path = test.check_fwd(model, tensor)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,17 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
|
||||
model = torch.nn.Linear(1024,16).to(dev)
|
||||
tensor = torch.randn(4,1024).to(dev)
|
||||
|
||||
# CHECK: PASS! fwd check
|
||||
fwd_path = test.check_fwd(model, tensor)
|
|
@ -1,15 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
model = torch.nn.LogSoftmax(dim=0)
|
||||
tensor = torch.ones(1,2,3,4)
|
||||
|
||||
# CHECK: PASS! fwd check
|
||||
fwd_path = test.check_fwd(model, tensor)
|
|
@ -1,18 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
model = torch.nn.MaxPool2d(kernel_size=(3,3), stride=(2,2), padding=(1,1),
|
||||
dilation=1, return_indices=False, ceil_mode=False)
|
||||
|
||||
tensor = torch.randn(1,32,16,16)
|
||||
|
||||
# CHECK: PASS! fwd check
|
||||
fwd_path = test.check_fwd(model, tensor)
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
from __future__ import print_function
|
||||
import argparse
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from torchvision import datasets, transforms
|
||||
from torch.optim.lr_scheduler import StepLR
|
||||
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.fc1 = nn.Linear(28*28, 50)
|
||||
self.fc2 = nn.Linear(50, 50)
|
||||
self.fc3 = nn.Linear(50, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = x.view(-1, 28*28)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
return F.log_softmax(self.fc3(x), dim=1)
|
||||
|
||||
def main():
|
||||
device = torch_mlir.mlir_device()
|
||||
model = Net()
|
||||
tensor = torch.randn((64, 1, 28, 28),requires_grad=True)
|
||||
# CHECK: PASS! fwd check
|
||||
fwd_path = test.check_ref(model, tensor)
|
||||
|
||||
target = torch.ones((64), dtype=torch.long)
|
||||
loss = F.nll_loss
|
||||
|
||||
# CHECK: PASS! back check
|
||||
test.check_back(fwd_path, target, loss)
|
||||
|
||||
# CHECK: PASS! fc1_weight_grad check
|
||||
test.compare(model.fc1.weight.grad, fwd_path[0].fc1.weight.grad, "fc1_weight_grad")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,32 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
|
||||
t0 = torch.randn((3,13), device=dev)
|
||||
t1 = torch.randn((13,5), device=dev)
|
||||
print(t0.to('cpu'), t1.to('cpu'))
|
||||
print(torch.mm(t0.to('cpu'), t1.to('cpu')))
|
||||
|
||||
t2 = torch.mm(t0, t1)
|
||||
|
||||
#
|
||||
# Check the result tensor against the CPU
|
||||
#
|
||||
t0_cpu = t0.to('cpu')
|
||||
t1_cpu = t1.to('cpu')
|
||||
t2_cpu = t2.to('cpu')
|
||||
|
||||
print (t0_cpu, " *\n", t1_cpu, " =\n", t2_cpu)
|
||||
|
||||
ref_tensor = torch.mm(t0_cpu, t1_cpu)
|
||||
# CHECK: PASS! mm check
|
||||
test.compare(t2, ref_tensor, "mm")
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
t0 = torch.randn((4,4), device=dev)
|
||||
t1 = torch.randn((4,4), device=dev)
|
||||
|
||||
t2 = t0 * t1
|
||||
#
|
||||
# Check the result tensor against the CPU
|
||||
#
|
||||
t0_cpu = t0.to('cpu')
|
||||
t1_cpu = t1.to('cpu')
|
||||
t2_cpu = t2.to('cpu')
|
||||
|
||||
print (t0_cpu, " *\n", t1_cpu, " =\n", t2_cpu)
|
||||
|
||||
# CHECK: PASS! mul2 check
|
||||
test.compare(t2, t0_cpu * t1_cpu, "mul2")
|
|
@ -1,21 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
model = torch.nn.LogSoftmax(dim=1)
|
||||
tensor = torch.randn(3,5,requires_grad=True)
|
||||
|
||||
# CHECK: PASS! fwd check
|
||||
fwd_path = test.check_fwd(model, tensor)
|
||||
|
||||
target = torch.tensor([1, 0, 4])
|
||||
loss = torch.nn.NLLLoss()
|
||||
|
||||
# CHECK: PASS! back check
|
||||
test.check_back(fwd_path, target, loss)
|
|
@ -1,15 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
model = torch.nn.ReLU()
|
||||
tensor = torch.randn(10)
|
||||
|
||||
# CHECK: PASS! fwd check
|
||||
fwd_path = test.check_ref(model, tensor)
|
|
@ -1,18 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import npcomp.frontends.pytorch.test as test
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
|
||||
tensor = torch.randn(2,3).to(dev)
|
||||
result = tensor.t()
|
||||
|
||||
ref_result = tensor.to('cpu').t()
|
||||
# CHECK: PASS! transpose check
|
||||
test.compare(ref_result, result, "transpose")
|
|
@ -1,31 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import torch
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
dev = torch_mlir.mlir_device()
|
||||
|
||||
model = torch.nn.Conv2d(2,16,7,stride=[2,2], padding=[3,3], dilation=1, groups=1, bias=True).to(dev)
|
||||
|
||||
tensor = torch.randn((1,2,128,128), device=dev)
|
||||
result = model(tensor)
|
||||
|
||||
mlir = torch_mlir.get_mlir( result )
|
||||
report = torch_mlir.op_report(mlir)
|
||||
|
||||
# CHECK-LABEL: "L0-convolution_overrideable-0"
|
||||
# CHECK-NEXT: "activation_in": 32768
|
||||
# CHECK-NEXT: "activation_out": 65536
|
||||
# CHECK-NEXT: "ops:+": 65536
|
||||
# CHECK-NEXT: "ops:MAC": 6422528
|
||||
# CHECK-NEXT: "parameters_in": 1584
|
||||
# CHECK-NEXT: "reads": 34352
|
||||
# CHECK-NEXT: "writes": 65536
|
||||
for k,v in report.items():
|
||||
print("\"{}\"".format(k))
|
||||
for k,v in v.items():
|
||||
print("\"{}\": {}".format(k,v))
|
|
@ -1,107 +0,0 @@
|
|||
# -*- Python -*-
|
||||
# This file is licensed under a pytorch-style license
|
||||
# See frontends/pytorch/LICENSE for license information.
|
||||
|
||||
import argparse
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from torchvision import datasets, transforms
|
||||
from torch.optim.lr_scheduler import StepLR
|
||||
|
||||
import npcomp.frontends.pytorch as torch_mlir
|
||||
import json
|
||||
|
||||
# RUN: %PYTHON %s | FileCheck %s
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(1, 8, 3, padding=1)
|
||||
self.conv2 = nn.Conv2d(8, 16, 3, padding=0)
|
||||
self.maxpool1 = nn.MaxPool2d(2,2)
|
||||
self.maxpool2 = nn.MaxPool2d(2,2)
|
||||
self.fc1 = nn.Linear(576, 128)
|
||||
self.fc2 = nn.Linear(128, 64)
|
||||
self.fc3 = nn.Linear(64, 8)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
print(x.shape)
|
||||
x = F.relu(x)
|
||||
print(x.shape)
|
||||
x = self.maxpool1(x)
|
||||
print(x.shape)
|
||||
|
||||
x = self.conv2(x)
|
||||
print(x.shape)
|
||||
x = F.relu(x)
|
||||
print(x.shape)
|
||||
x = self.maxpool2(x)
|
||||
print(x.shape)
|
||||
x = x.view(8, 6*6*16)
|
||||
|
||||
x = self.fc1(x)
|
||||
x = F.relu(x)
|
||||
|
||||
x = self.fc2(x)
|
||||
x = F.relu(x)
|
||||
|
||||
x = self.fc3(x)
|
||||
output = F.log_softmax(x, dim=1)
|
||||
|
||||
return output
|
||||
|
||||
def main():
|
||||
|
||||
test_status = "PASS!"
|
||||
|
||||
# CHECK-LABEL: test_op_report_vgg_style_lenet
|
||||
# CHECK: PASS!
|
||||
print("test_op_report_vgg_style_lenet")
|
||||
|
||||
device = torch_mlir.mlir_device()
|
||||
|
||||
model = Net().to(device)
|
||||
ref_tensor = torch.randn((8, 1, 30, 30))
|
||||
tensor = ref_tensor.clone().to(device)
|
||||
|
||||
result = model(tensor)
|
||||
target = torch.ones((8), dtype=torch.long).to(device)
|
||||
loss = F.nll_loss(result, target)
|
||||
loss.backward()
|
||||
|
||||
mlir0 = torch_mlir.get_mlir(model.conv1.weight.grad)
|
||||
print(mlir0)
|
||||
report = torch_mlir.op_report(mlir0)
|
||||
print(report)
|
||||
|
||||
report_dict = report
|
||||
expected = 32
|
||||
if (len(report_dict) != expected):
|
||||
print("### ERROR: Expecting",expected,"items in the report, but got ",len(report_dict))
|
||||
test_status = "FAIL!"
|
||||
|
||||
# Every item should have a read and a write
|
||||
for key, value in report_dict.items():
|
||||
if not 'reads' in value:
|
||||
print(f"### ERROR: {key} does not contain the required reads field")
|
||||
test_status = "FAIL!"
|
||||
if not 'writes' in value:
|
||||
print(f"### ERROR: {key} does not contain the required writes field")
|
||||
test_status = "FAIL!"
|
||||
if "convolution" in key:
|
||||
if not 'ops:MAC' in value:
|
||||
print(f"### ERROR: convolution {key} does not contain the required MAC field")
|
||||
test_status = "FAIL!"
|
||||
if "mm" in key:
|
||||
if not 'ops:MAC' in value:
|
||||
print(f"### ERROR: mm {key} does not contain the required MAC field")
|
||||
test_status = "FAIL!"
|
||||
|
||||
|
||||
print(test_status)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,2 +0,0 @@
|
|||
if not config.enable_c10_dispatch:
|
||||
config.unsupported = True
|
|
@ -34,7 +34,6 @@ config.llvm_host_triple = '@LLVM_HOST_TRIPLE@'
|
|||
config.host_arch = "@HOST_ARCH@"
|
||||
config.npcomp_src_root = "@CMAKE_SOURCE_DIR@"
|
||||
config.npcomp_obj_root = "@CMAKE_BINARY_DIR@"
|
||||
config.enable_c10_dispatch = not @NPCOMP_ENABLE_TORCH_TYPE_DISPATCH@
|
||||
|
||||
# Support substitution of the tools_dir with user parameters. This is
|
||||
# used when we can't determine the tool dir at configuration time.
|
||||
|
|
Loading…
Reference in New Issue