torch-mlir/frontends/pytorch/lib/aten_ops.cpp

773 lines
27 KiB
C++

//===- aten_ops.cpp ---------------------------------------------*- C++ -*-===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//
// This file implements C libraries that are targetted by MLIR code generation
// from the ATen dialect. This library is intended to support a functional
// proof of concept rather than optimized for high performance. Most of the
// functions are implemented by calling back into the torch libraries.
#include <assert.h>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <ATen/ATen.h>
#include <torch/torch.h>
#include "nnpack.h"
#include <ATen/CPUType.h>
namespace {
template <typename T, int N> struct tensor_t {
T *d;
T *aligned;
size_t offset;
size_t shape[N];
size_t stride[N];
size_t index(size_t n, size_t channel, size_t row, size_t col) const {
size_t channels = shape[1];
size_t height = shape[2];
size_t width = shape[3];
return n * height * width * channels + channel * height * width +
row * width + col;
}
tensor_t() {
d = aligned = nullptr;
offset = 0;
for (int i = 0; i < N; i++)
shape[i] = stride[i] = 0;
}
};
template <typename T, int N>
std::vector<int64_t> translate_shape(tensor_t<T, N> *t) {
std::vector<int64_t> shape;
for (int i = 0; i < N; i++) {
shape.push_back(t->shape[i]);
// std::cout << i << " shape " << t->shape[i] << std::endl;
}
return shape;
}
template <typename T, int N>
std::vector<int64_t> translate_stride(tensor_t<T, N> *t) {
std::vector<int64_t> stride;
for (int i = 0; i < N; i++) {
stride.push_back(t->stride[i]);
// std::cout << i << " stride " << t->stride[i] << std::endl;
}
return stride;
}
template <int N> void dumpTensor(std::ostream &o, tensor_t<float, N> *t) {
o << "Shape:";
for (int i = 0; i < N; i++)
o << t->shape[i] << " ";
o << "Stride:";
for (int i = 0; i < N; i++)
o << t->stride[i] << " ";
o << "\n";
}
template <typename T, int N>
at::Tensor to_torch(tensor_t<T, N> *t,
const at::TensorOptions &options = at::TensorOptions()) {
// std::cout << "to_torch\n";
return torch::from_blob((void *)t->d, translate_shape(t), translate_stride(t),
options);
}
template <typename T>
void mm_out(tensor_t<T, 2> *a, tensor_t<T, 2> *b, tensor_t<T, 2> *r);
template <typename T, int N>
void add_out(tensor_t<T, N> *a, tensor_t<T, N> *b, T alpha, tensor_t<T, N> *r) {
at::Tensor torch_a = to_torch(a);
at::Tensor torch_b = to_torch(b);
at::Tensor result = at::native::add(torch_a, torch_b, alpha).clone();
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T>
void addmm_out(tensor_t<T, 1> *a, tensor_t<T, 2> *b, tensor_t<T, 2> *c,
int32_t alpha, int32_t beta, tensor_t<T, 2> *r) {
at::Tensor torch_a = to_torch(a);
at::Tensor torch_b = to_torch(b);
at::Tensor torch_c = to_torch(c);
at::Tensor result =
at::native::addmm(torch_a, torch_b, torch_c, alpha, beta).clone();
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T, int N, int M>
void as_strided_out(tensor_t<float, M> *a,
/*size*/ int32_t sz0, int32_t sz1, int32_t sz2, int32_t sz3,
/*stride*/ int32_t sd0, int32_t sd1, int32_t sd2,
int32_t sd3, int32_t offset, tensor_t<T, N> *r) {
at::Tensor input = to_torch(a);
std::vector<int64_t> size;
std::vector<int64_t> stride;
c10::optional<int64_t> storage_offset;
if (offset != 0)
storage_offset = offset;
if (N > 0) {
size.push_back(sz0);
stride.push_back(sd0);
}
if (N > 1) {
size.push_back(sz1);
stride.push_back(sd1);
}
if (N > 2) {
size.push_back(sz2);
stride.push_back(sd2);
}
if (N > 3) {
size.push_back(sz3);
stride.push_back(sd3);
}
std::vector<int64_t> sizeRef{size};
std::vector<int64_t> strideRef{stride};
// for (int i = 0; i<N; i++)
// std::cout << "STRIDE " << i << " " << stride[i] << std::endl;
at::Tensor result =
at::native::as_strided_tensorimpl(input, size, stride, storage_offset)
.clone();
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
// FIXME: stride, padding, dilaection, output_padding should be IntArrayRef
template <typename T>
void conv2d_out(tensor_t<T, 4> *t, tensor_t<T, 4> *weight, tensor_t<T, 1> *bias,
int32_t stride, int32_t pad, int32_t dilation,
tensor_t<T, 4> *r) {
at::Tensor torch_t = to_torch(t);
at::Tensor torch_w = to_torch(weight);
at::Tensor torch_b = to_torch(bias);
int64_t groups = 1;
at::Tensor result = at::native::conv2d(torch_t, torch_w, torch_b, stride, pad,
dilation, groups)
.clone();
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T>
void conv2d_backward_out(tensor_t<T, 4> *grad_output, tensor_t<T, 4> *input,
tensor_t<T, 4> *weight, int32_t stride, int32_t pad,
int32_t dilation, tensor_t<T, 4> *r0,
tensor_t<T, 4> *r1, tensor_t<T, 1> *r2) {
const at::Tensor &arg_grad = to_torch(grad_output);
const at::Tensor &arg_input = to_torch(input);
const at::Tensor &arg_weight = to_torch(weight);
std::vector<int64_t> p{pad, pad};
std::vector<int64_t> s{stride, stride};
std::vector<int64_t> d{dilation, dilation};
std::array<bool, 3> output_mask{true, true, true};
std::tuple<at::Tensor, at::Tensor, at::Tensor> grads =
at::native::mkldnn_convolution_backward(arg_input, arg_grad, arg_weight,
p, s, d, 1, output_mask);
auto result0 = std::get<0>(grads);
auto result1 = std::get<1>(grads);
auto result2 = std::get<2>(grads);
memcpy(r0->d, result0.data_ptr(), result0.numel() * sizeof(T));
memcpy(r1->d, result1.data_ptr(), result1.numel() * sizeof(T));
memcpy(r2->d, result2.data_ptr(), result2.numel() * sizeof(T));
}
template <typename T, int N>
void log_softmax_out(tensor_t<T, N> *t, int32_t dim, bool half_to_float,
tensor_t<T, N> *r) {
at::Tensor input = to_torch(t);
at::Tensor result = at::native::log_softmax_cpu(input, dim, half_to_float);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T, int N>
void log_softmax_backward_data_out(tensor_t<T, N> *a, tensor_t<T, N> *b,
int32_t c, tensor_t<T, N> *d,
tensor_t<T, N> *r) {
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::Tensor inputD = to_torch(d);
at::Tensor result =
at::native::log_softmax_backward_cpu(inputA, inputB, c, inputD);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T>
void max_pool2d_with_indices_out(tensor_t<T, 4> *t, int32_t c, int32_t d,
int32_t e, int32_t f, bool ceil_mode,
tensor_t<T, 4> *r0, tensor_t<int64_t, 4> *r1) {
at::Tensor input = to_torch(t);
std::vector<int64_t> kernel{c, c};
std::vector<int64_t> stride{d, d};
std::vector<int64_t> padding{e, e};
std::vector<int64_t> dilation{f, f};
auto result = at::native::max_pool2d_with_indices_cpu(
input, kernel, stride, padding, dilation, ceil_mode);
at::Tensor outTensor = std::get<0>(result);
at::Tensor idxTensor = std::get<1>(result);
memcpy(r0->d, outTensor.data_ptr(), outTensor.numel() * sizeof(T));
memcpy(r1->d, idxTensor.data_ptr(), idxTensor.numel() * sizeof(T));
}
template <typename T>
void max_pool2d_with_indices_backward_out(tensor_t<T, 4> *a, tensor_t<T, 4> *b,
int32_t c, int32_t d, int32_t e,
int32_t f, bool g,
tensor_t<int64_t, 4> *h,
tensor_t<T, 4> *r) {
const at::Tensor &inputA = to_torch(a);
const at::Tensor &inputB = to_torch(b);
at::TensorOptions options(at::ScalarType::Long);
const at::Tensor &inputH = to_torch(h, options);
std::vector<int64_t> kernel{c, c};
std::vector<int64_t> stride{d, d};
std::vector<int64_t> padding{e, e};
std::vector<int64_t> dilation{f, f};
at::Tensor result = at::native::max_pool2d_with_indices_backward_cpu(
inputA, inputB, kernel, stride, padding, dilation, g, inputH);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T>
void mm_out(tensor_t<T, 2> *a, tensor_t<T, 2> *b, tensor_t<T, 2> *r) {
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::Tensor result = inputA.matmul(inputB);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T, int N>
void mul_out(tensor_t<T, N> *a, tensor_t<T, N> *b, tensor_t<T, N> *r) {
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::Tensor result = at::native::mul(inputA, inputB);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T, int N>
void relu_out(tensor_t<T, N> *a, tensor_t<T, N> *r) {
at::Tensor inputA = to_torch(a);
at::Tensor result = at::native::relu(inputA);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T> void t_out(tensor_t<T, 2> *a, tensor_t<T, 2> *r) {
size_t h = a->shape[0];
size_t w = a->shape[1];
for (size_t i = 0; i < h; i++)
for (size_t j = 0; j < w; j++)
r->d[j * h + i] = a->d[i * w + j];
}
template <typename T, int N>
void threshold_backward_out(tensor_t<T, N> *a, tensor_t<T, N> *b, int32_t c,
tensor_t<T, N> *r) {
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::Tensor result = at::native::threshold_backward(inputA, inputB, c);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
template <typename T, int N, int M>
void view_out(tensor_t<T, M> *a, int32_t b, int32_t c, int32_t d, int32_t e,
tensor_t<T, N> *r) {
tensor_t<T, N> result;
size_t numel = 1;
for (size_t d = 0; d < M; d++)
numel *= a->shape[d];
if (N == 1)
c = d = e = 1;
if (N == 2)
d = e = 1;
if (N == 3)
e = 1;
int inferred = 0;
if (b == -1)
inferred++;
if (c == -1)
inferred++;
if (d == -1)
inferred++;
if (e == -1)
inferred++;
assert(inferred <= 1 &&
"aten.view Error: only one dimension can be inferred");
if (b == -1)
b = numel / (c * d * e);
if (c == -1)
c = numel / (b * d * e);
if (d == -1)
d = numel / (b * c * e);
if (e == -1)
e = numel / (b * c * d);
if (N > 0)
r->shape[0] = b;
if (N > 1)
r->shape[1] = c;
if (N > 2)
r->shape[2] = d;
if (N > 3)
r->shape[3] = e;
memcpy(r->d, a->d, numel * sizeof(T));
}
} // namespace
extern "C" {
// add_out
void _mlir_ciface_add_1F32_1F32_1F32_out(tensor_t<float, 1> *a,
tensor_t<float, 1> *b, int32_t i,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
add_out<float, 1>(a, b, i, r);
}
void _mlir_ciface_add_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *b, int32_t i,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
add_out<float, 2>(a, b, i, r);
}
void _mlir_ciface_add_3F32_3F32_3F32_out(tensor_t<float, 3> *a,
tensor_t<float, 3> *b, int32_t i,
tensor_t<float, 3> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
add_out<float, 3>(a, b, i, r);
}
void _mlir_ciface_add_4F32_4F32_4F32_out(tensor_t<float, 4> *a,
tensor_t<float, 4> *b, int32_t i,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
add_out<float, 4>(a, b, i, r);
}
// addmm_out
void _mlir_ciface_addmm_2F32_1F32_2F32_2F32_out(tensor_t<float, 1> *a,
tensor_t<float, 2> *b,
tensor_t<float, 2> *c,
int32_t alpha, int32_t beta,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
addmm_out<float>(a, b, c, alpha, beta, r);
}
// as_strided_out
void _mlir_ciface_as_strided_1F32_1F32_out(tensor_t<float, 1> *a,
/*size*/ int32_t sz0, int32_t sz1,
int32_t sz2, int32_t sz3,
/*stride*/ int32_t sd0, int32_t sd1,
int32_t sd2, int32_t sd3,
int32_t offset,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
as_strided_out<float, 1, 1>(a, sz0, sz1, sz2, sz3, sd0, sd1, sd2, sd3, offset,
r);
}
void _mlir_ciface_as_strided_4F32_2F32_out(tensor_t<float, 2> *a,
/*size*/ int32_t sz0, int32_t sz1,
int32_t sz2, int32_t sz3,
/*stride*/ int32_t sd0, int32_t sd1,
int32_t sd2, int32_t sd3,
int32_t offset,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
// std::cout << sz0 << " "
// << sz1 << " "
// << sz2 << " "
// << sz3 << "\n";
// std::cout << sd0 << " "
// << sd1 << " "
// << sd2 << " "
// << sd3 << "\n";
as_strided_out<float, 4, 2>(a, sz0, sz1, sz2, sz3, sd0, sd1, sd2, sd3, offset,
r);
}
// conv2d_out
void _mlir_ciface_conv2d_4F32_4F32_4F32_1F32_out(
tensor_t<float, 4> *t, tensor_t<float, 4> *weight, tensor_t<float, 1> *bias,
int32_t stride, int32_t padding, int32_t dilation, tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
conv2d_out<float>(t, weight, bias, stride, padding, dilation, r);
}
void _mlir_ciface_conv2d_relu_4F32_4F32_4F32_1F32_out(
tensor_t<float, 4> *t, tensor_t<float, 4> *weight, tensor_t<float, 1> *bias,
int32_t stride, int32_t padding, int32_t dilation, tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
conv2d_out<float>(t, weight, bias, stride, padding, dilation, r);
relu_out<float, 4>(r, r);
}
// conv2d_backward_out
void _mlir_ciface_conv2d_backward_4F32_4F32_1F32_4F32_4F32_4F32_out(
tensor_t<float, 4> *grad_output, tensor_t<float, 4> *t,
tensor_t<float, 4> *weight, int32_t stride, int32_t padding,
int32_t dilation, tensor_t<float, 4> *r0, tensor_t<float, 4> *r1,
tensor_t<float, 1> *r2) {
// std::cout << "aten_ops " << __func__ << "\n";
conv2d_backward_out<float>(grad_output, t, weight, stride, padding, dilation,
r0, r1, r2);
}
// div
float *div_0F32_0F32_0F32(float *a, float *b) {
// std::cout << "aten_ops " << __func__ << "\n";
float *ret = (float *)malloc(sizeof(float));
*ret = *a / *b;
return ret;
}
// log_softmax_out
void _mlir_ciface_log_softmax_1F32_1F32_out(tensor_t<float, 1> *t, int32_t dim,
bool half_to_float,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_out<float, 1>(t, dim, half_to_float, r);
}
void _mlir_ciface_log_softmax_2F32_2F32_out(tensor_t<float, 2> *t, int32_t dim,
bool half_to_float,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_out<float, 2>(t, dim, half_to_float, r);
}
void _mlir_ciface_log_softmax_3F32_3F32_out(tensor_t<float, 3> *t, int32_t dim,
bool half_to_float,
tensor_t<float, 3> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_out<float, 3>(t, dim, half_to_float, r);
}
void _mlir_ciface_log_softmax_4F32_4F32_out(tensor_t<float, 4> *t, int32_t dim,
bool half_to_float,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_out<float, 4>(t, dim, half_to_float, r);
}
// log_softmax_backward_data_out
void _mlir_ciface_log_softmax_backward_data_2F32_2F32_2F32_2F32_out(
tensor_t<float, 2> *a, tensor_t<float, 2> *b, int32_t c,
tensor_t<float, 2> *d, tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_backward_data_out<float, 2>(a, b, c, d, r);
}
void _mlir_ciface_log_softmax_backward_data_4F32_4F32_4F32_4F32_out(
tensor_t<float, 4> *a, tensor_t<float, 4> *b, int32_t c,
tensor_t<float, 4> *d, tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
log_softmax_backward_data_out<float, 4>(a, b, c, d, r);
}
// max_pool2d_out
void _mlir_ciface_max_pool2d_with_indices_4F32_4I64_4F32_out(
tensor_t<float, 4> *t, int32_t kernel, int32_t pad, int32_t stride,
int32_t dilation, bool ceil_mode, tensor_t<float, 4> *r0,
tensor_t<int64_t, 4> *r1) {
// std::cout << "aten_ops " << __func__ << "\n";
max_pool2d_with_indices_out<float>(t, kernel, pad, stride, dilation,
ceil_mode, r0, r1);
}
// max_pool2d backward_out
void _mlir_ciface_max_pool2d_with_indices_backward_4F32_4F32_4F32_4I64_out(
tensor_t<float, 4> *a, tensor_t<float, 4> *b, int32_t c, int32_t d,
int32_t e, int32_t f, bool g, tensor_t<int64_t, 4> *h,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
max_pool2d_with_indices_backward_out<float>(a, b, c, d, e, f, g, h, r);
}
// mm_out
void _mlir_ciface_mm_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *b,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
mm_out<float>(a, b, r);
}
// mul_out
void _mlir_ciface_mul_1F32_1F32_1F32_out(tensor_t<float, 1> *a,
tensor_t<float, 1> *b,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
mul_out<float, 1>(a, b, r);
}
void _mlir_ciface_mul_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *b,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
mul_out<float, 2>(a, b, r);
}
void _mlir_ciface_mul_3F32_3F32_3F32_out(tensor_t<float, 3> *a,
tensor_t<float, 3> *b,
tensor_t<float, 3> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
mul_out<float, 3>(a, b, r);
}
void _mlir_ciface_mul_4F32_4F32_4F32_out(tensor_t<float, 4> *a,
tensor_t<float, 4> *b,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
mul_out<float, 4>(a, b, r);
}
// nll_loss2d_forward_out
void _mlir_ciface_nll_loss2d_forward_1F32_1F32_4F32_3I64_1F32_out(
tensor_t<float, 4> *a, tensor_t<uint64_t, 3> *b, tensor_t<float, 1> *c,
int64_t d, int64_t e, tensor_t<float, 1> *r0, tensor_t<float, 1> *r1) {
// std::cout << "aten_ops " << __func__ << "\n";
using T = float;
at::Tensor inputA = to_torch(a);
at::TensorOptions options(at::ScalarType::Long);
at::Tensor inputB = to_torch(b, options);
at::Tensor inputC = to_torch(c);
std::tuple<at::Tensor, at::Tensor> result =
at::CPUType::nll_loss2d_forward(inputA, inputB, inputC, d, e);
at::Tensor result0 = std::get<0>(result);
at::Tensor result1 = std::get<1>(result);
memcpy(r0->d, result0.data_ptr(), result0.numel() * sizeof(T));
memcpy(r1->d, result1.data_ptr(), result1.numel() * sizeof(T));
}
// nll_loss2d_backward_out
void _mlir_ciface_nll_loss2d_backward_4F32_1F32_4F32_3I64_1F32_1F32_out(
tensor_t<float, 1> *a, tensor_t<float, 4> *b, tensor_t<uint64_t, 3> *c,
tensor_t<float, 1> *d, int32_t e, int32_t f, tensor_t<float, 1> *g,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
using T = float;
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::TensorOptions options(at::ScalarType::Long);
at::Tensor inputC = to_torch(c, options);
at::Tensor inputD = to_torch(d);
at::Tensor inputG = to_torch(g);
at::Tensor result = at::CPUType::nll_loss2d_backward(inputA, inputB, inputC,
inputD, e, f, inputG);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
void _mlir_ciface_nll_loss_backward_2F32_1F32_2F32_1I64_1F32_1F32_out(
tensor_t<float, 1> *a, tensor_t<float, 2> *b, tensor_t<uint64_t, 1> *c,
tensor_t<float, 1> *d, int32_t e, int32_t f, tensor_t<float, 1> *g,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
using T = float;
at::Tensor inputA = to_torch(a);
at::Tensor inputB = to_torch(b);
at::TensorOptions options(at::ScalarType::Long);
at::Tensor inputC = to_torch(c, options);
at::Tensor inputD = to_torch(d);
at::Tensor inputG = to_torch(g);
at::Tensor result = at::CPUType::nll_loss_backward(inputA, inputB, inputC,
inputD, e, f, inputG);
memcpy(r->d, result.data_ptr(), result.numel() * sizeof(T));
}
// nll_loss_forward_out
void _mlir_ciface_nll_loss_forward_1F32_1F32_2F32_1I64_1F32_out(
tensor_t<float, 2> *a, tensor_t<uint64_t, 1> *b, tensor_t<float, 1> *c,
int64_t d, int64_t e, tensor_t<float, 1> *r0, tensor_t<float, 1> *r1) {
// std::cout << "aten_ops " << __func__ << "\n";
using T = float;
at::Tensor inputA = to_torch(a);
at::TensorOptions options(at::ScalarType::Long);
at::Tensor inputB = to_torch(b, options);
at::Tensor inputC = to_torch(c);
std::tuple<at::Tensor, at::Tensor> result =
at::CPUType::nll_loss_forward(inputA, inputB, inputC, d, e);
at::Tensor result0 = std::get<0>(result);
at::Tensor result1 = std::get<1>(result);
memcpy(r0->d, result0.data_ptr(), result0.numel() * sizeof(T));
memcpy(r1->d, result1.data_ptr(), result1.numel() * sizeof(T));
}
// relu_out
void _mlir_ciface_relu_1F32_1F32_out(tensor_t<float, 1> *a,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
relu_out<float, 1>(a, r);
}
void _mlir_ciface_relu_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
relu_out<float, 2>(a, r);
}
void _mlir_ciface_relu_3F32_3F32_out(tensor_t<float, 3> *a,
tensor_t<float, 3> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
relu_out<float, 3>(a, r);
}
void _mlir_ciface_relu_4F32_4F32_out(tensor_t<float, 4> *a,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
relu_out<float, 4>(a, r);
}
// t_out
void _mlir_ciface_t_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
t_out<float>(a, r);
}
// threshold_backward_out
void _mlir_ciface_threshold_backward_1F32_1F32_1F32_out(tensor_t<float, 1> *a,
tensor_t<float, 1> *b,
int32_t c,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
threshold_backward_out<float, 1>(a, b, c, r);
}
void _mlir_ciface_threshold_backward_2F32_2F32_2F32_out(tensor_t<float, 2> *a,
tensor_t<float, 2> *b,
int32_t c,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
threshold_backward_out<float, 2>(a, b, c, r);
}
void _mlir_ciface_threshold_backward_3F32_3F32_3F32_out(tensor_t<float, 3> *a,
tensor_t<float, 3> *b,
int32_t c,
tensor_t<float, 3> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
threshold_backward_out<float, 3>(a, b, c, r);
}
void _mlir_ciface_threshold_backward_4F32_4F32_4F32_out(tensor_t<float, 4> *a,
tensor_t<float, 4> *b,
int32_t c,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
threshold_backward_out<float, 4>(a, b, c, r);
}
// view_out
void _mlir_ciface_view_1F32_4F32_out(tensor_t<float, 4> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 1, 4>(a, b, c, d, e, r);
}
void _mlir_ciface_view_1F32_3F32_out(tensor_t<float, 3> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 1, 3>(a, b, c, d, e, r);
}
void _mlir_ciface_view_1F32_2F32_out(tensor_t<float, 2> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 1> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 1, 2>(a, b, c, d, e, r);
}
void _mlir_ciface_view_2F32_4F32_out(tensor_t<float, 4> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 2> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 2, 4>(a, b, c, d, e, r);
}
void _mlir_ciface_view_4F32_1F32_out(tensor_t<float, 1> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 4, 1>(a, b, c, d, e, r);
}
void _mlir_ciface_view_4F32_2F32_out(tensor_t<float, 2> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 4, 2>(a, b, c, d, e, r);
}
void _mlir_ciface_view_4F32_3F32_out(tensor_t<float, 3> *a, int32_t b,
int32_t c, int32_t d, int32_t e,
tensor_t<float, 4> *r) {
// std::cout << "aten_ops " << __func__ << "\n";
view_out<float, 4, 3>(a, b, c, d, e, r);
}
}