torch-mlir/frontends/pytorch/csrc/c10_dispatch/acap_dispatch.cpp

//===- acap_dispatch.cpp --------------------------------------------------===//
//
// This file is licensed under a pytorch-style license
// See frontends/pytorch/LICENSE for license information.
//
//===----------------------------------------------------------------------===//

#include "acap_dispatch.h"

#include "mlir-c/StandardAttributes.h"
#include "npcomp/Python/PybindUtils.h"

#include <ATen/core/function_schema.h>
#include <ATen/core/ivalue.h>
#include <ATen/core/stack.h>
#include <c10/core/DispatchKey.h>
#include <torch/library.h>

using namespace torch_mlir;

namespace py = pybind11;

using c10::FunctionSchema;
using c10::OperatorHandle;
using c10::Stack;

// TODO: Private use dispatch keys are not made for real uses. Allocate a proper
// dispatch key in upstream PyTorch (DispatchKey.h) prior to maturity. Note
// that the TORCH_LIBRARY_* macros expand this by name and other APIs use its
// enum value, so we define both. We can get rid of both once we have our
// own key.
#define ACAP_DISPATCH_KEY PrivateUse1
static c10::DispatchKey kAcapDispatchKey = c10::DispatchKey::ACAP_DISPATCH_KEY;

std::list<AcapController::Activation> &
AcapController::getThreadLocalActiveStack() {
  static thread_local std::list<Activation> threadLocalActiveStack;
  return threadLocalActiveStack;
}

py::object AcapController::contextEnter() {
  auto &stack = getThreadLocalActiveStack();
  stack.emplace_front(shared_from_this());
  Activation &current = stack.front();
  current.dispatchGuard =
      std::make_unique<c10::impl::IncludeDispatchKeyGuard>(kAcapDispatchKey);
  return py::cast(this);
}

void AcapController::contextExit(py::object exc_type, py::object exc_val,
                                 py::object exc_tb) {
  auto &stack = getThreadLocalActiveStack();
  if (stack.empty() || stack.front().controller.get() != this) {
    throw py::raisePyError(PyExc_RuntimeError,
                           "Mismatched context manager __exit__");
  }
  stack.pop_front();

  if (!hasReturned) {
    returns({});
  }
}

void AcapController::returns(std::vector<at::Tensor> tensors) {
  verifyHasNotReturned();

  llvm::SmallVector<MlirType, 4> returnsTypes;
  llvm::SmallVector<MlirValue, 4> returnsValues;
  for (auto &tensor : tensors) {
    MlirValue v = funcBuilder->lookupTensor(tensor);
    if (mlirValueIsNull(v)) {
      // Exclude recursive dispatch in order to print tensor.
      c10::impl::ExcludeDispatchKeyGuard exclusion(kAcapDispatchKey);
      std::stringstream msg;
      msg << "Cannot return a tensor that is not from the capture context: ";
      msg << tensor;
      throw std::invalid_argument(msg.str());
    }

    returnsTypes.push_back(mlirValueGetType(v));
    returnsValues.push_back(v);
  }

  // TODO: Get location from traceback.
  MlirLocation loc = mlirLocationUnknownGet(funcBuilder->getContext());
  OperationStateHolder s("std.return", loc);
  mlirOperationStateAddOperands(&s.state, returnsValues.size(),
                                returnsValues.data());
  funcBuilder->getEntryBlockBuilder().insertBeforeTerminator(
      s.createOperation());
  funcBuilder->rewriteFuncReturnTypes(returnsTypes);
  hasReturned = true;
}

std::vector<std::string> AcapController::getDebugLog() {
  std::vector<std::string> copy;
  captureLog.swap(copy);
  return copy;
}

std::shared_ptr<AcapController> AcapController::getCurrent() {
  auto &stack = getThreadLocalActiveStack();
  if (stack.empty())
    return nullptr;
  return stack.front().controller;
}

void AcapController::verifyHasNotReturned() {
  if (hasReturned) {
    throw std::runtime_error(
        "Function has already returned. Cannot trace more operations.");
  }
}

/* static */
void AcapController::fallbackKernel(const OperatorHandle &opHandle,
                                    Stack *stack) {
  auto current = getCurrent();
  if (!current) {
    current->redispatch(opHandle, stack);
    return;
  }
  current->fallbackKernelImpl(opHandle, stack);
}

void AcapController::redispatch(const c10::OperatorHandle &opHandle,
                                c10::Stack *stack) {
  // Exclude recursive dispatch to this kernel.
  c10::impl::ExcludeDispatchKeyGuard exclusion(kAcapDispatchKey);
  // Passthrough.
  auto &dispatcher = c10::Dispatcher::singleton();
  dispatcher.callBoxed(opHandle, stack);
}

void AcapController::fallbackKernelImpl(const OperatorHandle &opHandle,
                                        Stack *stack) {
  verifyHasNotReturned();
  // Exclude recursive dispatch to this kernel.
  c10::impl::ExcludeDispatchKeyGuard exclusion(kAcapDispatchKey);

  const FunctionSchema &schema = opHandle.schema();

  // Check for unsupported.
  if (schema.is_vararg() || schema.is_varret()) {
    throw std::invalid_argument(
        "Cannot capture ops with variable arguments or returns");
  }

  // TODO: Extract actual location from stack.
  MlirContext context = funcBuilder->getContext();
  MlirLocation loc = mlirLocationUnknownGet(context);
  OperationStateHolder stateHolder("torch.kernel_call", loc);

  // Add the kernel_name attribute.
  auto kernelName = schema.name();
  MlirNamedAttribute kernelNameAttr = mlirNamedAttributeGet(
      "kernel_name",
      mlirStringAttrGet(context, kernelName.size(), kernelName.data()));
  mlirOperationStateAddAttributes(&stateHolder.state, 1, &kernelNameAttr);

  // Map arguments to operands.
  // This must be accumulated into the OperationState prior to re-dispatch
  // since the stack is modified at that point.
  size_t argCount = schema.arguments().size();
  assert(stack->size() >= argCount && "stack too short");
  llvm::SmallVector<MlirValue, 4> operands;
  for (auto argIt = stack->end() - argCount; argIt != stack->end(); ++argIt) {
    MlirValue mlirValue = mapIValueToMlirValue(loc, *argIt);
    if (mlirValueIsNull(mlirValue)) {
      std::stringstream out;
      out << "Unsupported capture value passed to kernel (" << argIt->tagKind()
          << "): " << *argIt;
      throw std::invalid_argument(out.str());
    }
    operands.push_back(mlirValue);
  }
  mlirOperationStateAddOperands(&stateHolder.state, operands.size(),
                                operands.data());

  // Invoke the original kernel.
  redispatch(opHandle, stack);

  // Map returns to results.
  size_t returnCount = schema.returns().size();
  assert(stack->size() >= returnCount && "stack too short");
  llvm::SmallVector<MlirType, 4> resultTypes;
  llvm::SmallVector<std::pair<size_t, at::Tensor>, 4> resultIndexToTensorMap;
  for (auto returnIt = stack->end() - returnCount; returnIt != stack->end();
       ++returnIt) {
    size_t resultIndex = resultTypes.size();
    MlirType resultType = mapIValueToMlirType(loc, *returnIt);
    if (mlirTypeIsNull(resultType)) {
      std::stringstream out;
      out << "Unsupported capture value returned from kernel ("
          << returnIt->tagKind() << "): " << *returnIt;
      throw std::invalid_argument(out.str());
    }
    resultTypes.push_back(resultType);
    if (returnIt->isTensor()) {
      resultIndexToTensorMap.emplace_back(resultIndex, returnIt->toTensor());
    }
  }
  mlirOperationStateAddResults(&stateHolder.state, resultTypes.size(),
                               resultTypes.data());

  // Create operation.
  MlirOperation op = stateHolder.createOperation();
  funcBuilder->getEntryBlockBuilder().insertBeforeTerminator(op);

  // Map result tensors.
  for (auto &it : resultIndexToTensorMap) {
    MlirValue result = mlirOperationGetResult(op, it.first);
    funcBuilder->mapTensor(it.second, result);
  }

  // Add to debug log.
  std::stringstream sout;
  sout << "CAPTURE: " << opHandle.schema() << "\n";
  captureLog.push_back(sout.str());
}

MlirValue AcapController::mapIValueToMlirValue(MlirLocation loc,
                                               c10::IValue &ival) {
  if (ival.isScalar()) {
    return funcBuilder->getScalarConstant(loc, ival.toScalar());
  }
  if (ival.isTensor()) {
    // Is it an already mapped tensor?
    MlirValue mappedValue = funcBuilder->lookupTensor(ival.toTensor());
    // TODO: Add mlirValueIsNull()
    if (mappedValue.ptr) {
      return mappedValue;
    }

    throw std::invalid_argument(
        "TODO: implement tensor import for non-arg tensors");
  }
  return {nullptr};
  // TODO: Implement mappings for the whole set (relevant to this use case):
  // _(None)
  // _(Tensor)
  // _(Double)
  // _(Int)
  // _(Bool)
  // _(Tuple)
  // _(String)
  // _(Blob)
  // _(GenericList)
  // _(GenericDict)
  // _(Future)
  // _(Device)
  // _(Object)
  // _(PyObject)
  // _(Uninitialized)
  // _(Capsule)
  // _(RRef)
  // _(Generator)
}

MlirType AcapController::mapIValueToMlirType(MlirLocation loc,
                                             c10::IValue &ival) {
  if (ival.isScalar()) {
    return typeMapper.mapScalarType(ival.toScalar().type());
  }
  if (ival.isTensor()) {
    return typeMapper.forwardTensorToType(ival.toTensor());
  }
  return {nullptr};
}

TORCH_LIBRARY_IMPL(_, ACAP_DISPATCH_KEY, m) {
  m.fallback(torch::CppFunction::makeFromBoxedFunction<
             &AcapController::fallbackKernel>());
}
Add boilerplate to do device capture (pytorch 1.6). * Uses the new dispatcher API. * Just prints to the console for the moment when an op is captured. * Executes the op through the existing implementation. 2020-09-26 09:13:16 +08:00			`//===- acap_dispatch.cpp --------------------------------------------------===//`
			`//`
			`// This file is licensed under a pytorch-style license`
			`// See frontends/pytorch/LICENSE for license information.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "acap_dispatch.h"`

Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00			`#include "mlir-c/StandardAttributes.h"`
Add boilerplate to do device capture (pytorch 1.6). * Uses the new dispatcher API. * Just prints to the console for the moment when an op is captured. * Executes the op through the existing implementation. 2020-09-26 09:13:16 +08:00			`#include "npcomp/Python/PybindUtils.h"`

Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00			`#include <ATen/core/function_schema.h>`
			`#include <ATen/core/ivalue.h>`
			`#include <ATen/core/stack.h>`
Add boilerplate to do device capture (pytorch 1.6). * Uses the new dispatcher API. * Just prints to the console for the moment when an op is captured. * Executes the op through the existing implementation. 2020-09-26 09:13:16 +08:00			`#include <c10/core/DispatchKey.h>`
			`#include <torch/library.h>`

			`using namespace torch_mlir;`

			`namespace py = pybind11;`

Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00			`using c10::FunctionSchema;`
			`using c10::OperatorHandle;`
			`using c10::Stack;`

Add boilerplate to do device capture (pytorch 1.6). * Uses the new dispatcher API. * Just prints to the console for the moment when an op is captured. * Executes the op through the existing implementation. 2020-09-26 09:13:16 +08:00			`// TODO: Private use dispatch keys are not made for real uses. Allocate a proper`
			`// dispatch key in upstream PyTorch (DispatchKey.h) prior to maturity. Note`
			`// that the TORCH_LIBRARY_* macros expand this by name and other APIs use its`
			`// enum value, so we define both. We can get rid of both once we have our`
			`// own key.`
			`#define ACAP_DISPATCH_KEY PrivateUse1`
			`static c10::DispatchKey kAcapDispatchKey = c10::DispatchKey::ACAP_DISPATCH_KEY;`

			`std::list<AcapController::Activation> &`
			`AcapController::getThreadLocalActiveStack() {`
			`static thread_local std::list<Activation> threadLocalActiveStack;`
			`return threadLocalActiveStack;`
			`}`

			`py::object AcapController::contextEnter() {`
			`auto &stack = getThreadLocalActiveStack();`
			`stack.emplace_front(shared_from_this());`
			`Activation &current = stack.front();`
			`current.dispatchGuard =`
			`std::make_unique<c10::impl::IncludeDispatchKeyGuard>(kAcapDispatchKey);`
			`return py::cast(this);`
			`}`

			`void AcapController::contextExit(py::object exc_type, py::object exc_val,`
			`py::object exc_tb) {`
			`auto &stack = getThreadLocalActiveStack();`
			`if (stack.empty() \|\| stack.front().controller.get() != this) {`
			`throw py::raisePyError(PyExc_RuntimeError,`
			`"Mismatched context manager __exit__");`
			`}`
			`stack.pop_front();`
Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00
			`if (!hasReturned) {`
			`returns({});`
			`}`
			`}`

			`void AcapController::returns(std::vector<at::Tensor> tensors) {`
			`verifyHasNotReturned();`

			`llvm::SmallVector<MlirType, 4> returnsTypes;`
			`llvm::SmallVector<MlirValue, 4> returnsValues;`
			`for (auto &tensor : tensors) {`
			`MlirValue v = funcBuilder->lookupTensor(tensor);`
Set PyTorch captured function return type. * Resolves various TODOs that required an LLVM change/bump. * Bumps LLVM to 4aa217160e5f06a96c6effc4950c3b402374de58 2020-10-08 01:14:34 +08:00			`if (mlirValueIsNull(v)) {`
Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00			`// Exclude recursive dispatch in order to print tensor.`
			`c10::impl::ExcludeDispatchKeyGuard exclusion(kAcapDispatchKey);`
			`std::stringstream msg;`
			`msg << "Cannot return a tensor that is not from the capture context: ";`
			`msg << tensor;`
			`throw std::invalid_argument(msg.str());`
			`}`

			`returnsTypes.push_back(mlirValueGetType(v));`
			`returnsValues.push_back(v);`
			`}`

			`// TODO: Get location from traceback.`
			`MlirLocation loc = mlirLocationUnknownGet(funcBuilder->getContext());`
			`OperationStateHolder s("std.return", loc);`
			`mlirOperationStateAddOperands(&s.state, returnsValues.size(),`
			`returnsValues.data());`
			`funcBuilder->getEntryBlockBuilder().insertBeforeTerminator(`
			`s.createOperation());`
			`funcBuilder->rewriteFuncReturnTypes(returnsTypes);`
			`hasReturned = true;`
Add boilerplate to do device capture (pytorch 1.6). * Uses the new dispatcher API. * Just prints to the console for the moment when an op is captured. * Executes the op through the existing implementation. 2020-09-26 09:13:16 +08:00			`}`

			`std::vector<std::string> AcapController::getDebugLog() {`
			`std::vector<std::string> copy;`
			`captureLog.swap(copy);`
			`return copy;`
			`}`

			`std::shared_ptr<AcapController> AcapController::getCurrent() {`
			`auto &stack = getThreadLocalActiveStack();`
			`if (stack.empty())`
			`return nullptr;`
			`return stack.front().controller;`
			`}`

Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00			`void AcapController::verifyHasNotReturned() {`
			`if (hasReturned) {`
			`throw std::runtime_error(`
			`"Function has already returned. Cannot trace more operations.");`
			`}`
			`}`
Add boilerplate to do device capture (pytorch 1.6). * Uses the new dispatcher API. * Just prints to the console for the moment when an op is captured. * Executes the op through the existing implementation. 2020-09-26 09:13:16 +08:00
Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00			`/* static */`
			`void AcapController::fallbackKernel(const OperatorHandle &opHandle,`
			`Stack *stack) {`
Add boilerplate to do device capture (pytorch 1.6). * Uses the new dispatcher API. * Just prints to the console for the moment when an op is captured. * Executes the op through the existing implementation. 2020-09-26 09:13:16 +08:00			`auto current = getCurrent();`
Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00			`if (!current) {`
			`current->redispatch(opHandle, stack);`
			`return;`
Add boilerplate to do device capture (pytorch 1.6). * Uses the new dispatcher API. * Just prints to the console for the moment when an op is captured. * Executes the op through the existing implementation. 2020-09-26 09:13:16 +08:00			`}`
Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00			`current->fallbackKernelImpl(opHandle, stack);`
			`}`
Add boilerplate to do device capture (pytorch 1.6). * Uses the new dispatcher API. * Just prints to the console for the moment when an op is captured. * Executes the op through the existing implementation. 2020-09-26 09:13:16 +08:00
Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00			`void AcapController::redispatch(const c10::OperatorHandle &opHandle,`
			`c10::Stack *stack) {`
			`// Exclude recursive dispatch to this kernel.`
			`c10::impl::ExcludeDispatchKeyGuard exclusion(kAcapDispatchKey);`
			`// Passthrough.`
Add boilerplate to do device capture (pytorch 1.6). * Uses the new dispatcher API. * Just prints to the console for the moment when an op is captured. * Executes the op through the existing implementation. 2020-09-26 09:13:16 +08:00			`auto &dispatcher = c10::Dispatcher::singleton();`
			`dispatcher.callBoxed(opHandle, stack);`
			`}`

Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00			`void AcapController::fallbackKernelImpl(const OperatorHandle &opHandle,`
			`Stack *stack) {`
			`verifyHasNotReturned();`
			`// Exclude recursive dispatch to this kernel.`
			`c10::impl::ExcludeDispatchKeyGuard exclusion(kAcapDispatchKey);`

			`const FunctionSchema &schema = opHandle.schema();`

			`// Check for unsupported.`
			`if (schema.is_vararg() \|\| schema.is_varret()) {`
			`throw std::invalid_argument(`
			`"Cannot capture ops with variable arguments or returns");`
			`}`

			`// TODO: Extract actual location from stack.`
			`MlirContext context = funcBuilder->getContext();`
			`MlirLocation loc = mlirLocationUnknownGet(context);`
			`OperationStateHolder stateHolder("torch.kernel_call", loc);`

			`// Add the kernel_name attribute.`
			`auto kernelName = schema.name();`
			`MlirNamedAttribute kernelNameAttr = mlirNamedAttributeGet(`
			`"kernel_name",`
			`mlirStringAttrGet(context, kernelName.size(), kernelName.data()));`
			`mlirOperationStateAddAttributes(&stateHolder.state, 1, &kernelNameAttr);`

			`// Map arguments to operands.`
			`// This must be accumulated into the OperationState prior to re-dispatch`
			`// since the stack is modified at that point.`
			`size_t argCount = schema.arguments().size();`
			`assert(stack->size() >= argCount && "stack too short");`
			`llvm::SmallVector<MlirValue, 4> operands;`
			`for (auto argIt = stack->end() - argCount; argIt != stack->end(); ++argIt) {`
			`MlirValue mlirValue = mapIValueToMlirValue(loc, *argIt);`
Set PyTorch captured function return type. * Resolves various TODOs that required an LLVM change/bump. * Bumps LLVM to 4aa217160e5f06a96c6effc4950c3b402374de58 2020-10-08 01:14:34 +08:00			`if (mlirValueIsNull(mlirValue)) {`
Implement torch.kernel_call capture. * Had to stop short of modifying the function return signature because of a missing C-API upstream. * Committing here is good enough for a test and will resolve the various TODOs about upstream APIs next. 2020-10-06 14:21:21 +08:00			`std::stringstream out;`
			`out << "Unsupported capture value passed to kernel (" << argIt->tagKind()`
			`<< "): " << *argIt;`
			`throw std::invalid_argument(out.str());`
			`}`
			`operands.push_back(mlirValue);`
			`}`
			`mlirOperationStateAddOperands(&stateHolder.state, operands.size(),`
			`operands.data());`

			`// Invoke the original kernel.`
			`redispatch(opHandle, stack);`

			`// Map returns to results.`
			`size_t returnCount = schema.returns().size();`
			`assert(stack->size() >= returnCount && "stack too short");`
			`llvm::SmallVector<MlirType, 4> resultTypes;`
			`llvm::SmallVector<std::pair<size_t, at::Tensor>, 4> resultIndexToTensorMap;`
			`for (auto returnIt = stack->end() - returnCount; returnIt != stack->end();`
			`++returnIt) {`
			`size_t resultIndex = resultTypes.size();`
			`MlirType resultType = mapIValueToMlirType(loc, *returnIt);`
			`if (mlirTypeIsNull(resultType)) {`
			`std::stringstream out;`
			`out << "Unsupported capture value returned from kernel ("`
			`<< returnIt->tagKind() << "): " << *returnIt;`
			`throw std::invalid_argument(out.str());`
			`}`
			`resultTypes.push_back(resultType);`
			`if (returnIt->isTensor()) {`
			`resultIndexToTensorMap.emplace_back(resultIndex, returnIt->toTensor());`
			`}`
			`}`
			`mlirOperationStateAddResults(&stateHolder.state, resultTypes.size(),`
			`resultTypes.data());`

			`// Create operation.`
			`MlirOperation op = stateHolder.createOperation();`
			`funcBuilder->getEntryBlockBuilder().insertBeforeTerminator(op);`

			`// Map result tensors.`
			`for (auto &it : resultIndexToTensorMap) {`
			`MlirValue result = mlirOperationGetResult(op, it.first);`
			`funcBuilder->mapTensor(it.second, result);`
			`}`

			`// Add to debug log.`
			`std::stringstream sout;`
			`sout << "CAPTURE: " << opHandle.schema() << "\n";`
			`captureLog.push_back(sout.str());`
			`}`

			`MlirValue AcapController::mapIValueToMlirValue(MlirLocation loc,`
			`c10::IValue &ival) {`
			`if (ival.isScalar()) {`
			`return funcBuilder->getScalarConstant(loc, ival.toScalar());`
			`}`
			`if (ival.isTensor()) {`
			`// Is it an already mapped tensor?`
			`MlirValue mappedValue = funcBuilder->lookupTensor(ival.toTensor());`
			`// TODO: Add mlirValueIsNull()`
			`if (mappedValue.ptr) {`
			`return mappedValue;`
			`}`

			`throw std::invalid_argument(`
			`"TODO: implement tensor import for non-arg tensors");`
			`}`
			`return {nullptr};`
			`// TODO: Implement mappings for the whole set (relevant to this use case):`
			`// _(None)`
			`// _(Tensor)`
			`// _(Double)`
			`// _(Int)`
			`// _(Bool)`
			`// _(Tuple)`
			`// _(String)`
			`// _(Blob)`
			`// _(GenericList)`
			`// _(GenericDict)`
			`// _(Future)`
			`// _(Device)`
			`// _(Object)`
			`// _(PyObject)`
			`// _(Uninitialized)`
			`// _(Capsule)`
			`// _(RRef)`
			`// _(Generator)`
			`}`

			`MlirType AcapController::mapIValueToMlirType(MlirLocation loc,`
			`c10::IValue &ival) {`
			`if (ival.isScalar()) {`
			`return typeMapper.mapScalarType(ival.toScalar().type());`
			`}`
			`if (ival.isTensor()) {`
			`return typeMapper.forwardTensorToType(ival.toTensor());`
			`}`
			`return {nullptr};`
			`}`

Add boilerplate to do device capture (pytorch 1.6). * Uses the new dispatcher API. * Just prints to the console for the moment when an op is captured. * Executes the op through the existing implementation. 2020-09-26 09:13:16 +08:00			`TORCH_LIBRARY_IMPL(_, ACAP_DISPATCH_KEY, m) {`
			`m.fallback(torch::CppFunction::makeFromBoxedFunction<`
			`&AcapController::fallbackKernel>());`
			`}`