torch-mlir/include/npcomp/Dialect/Torch/IR/TorchOps.h

//===------------------------------------------------------------*- C++ -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef NPCOMP_DIALECT_TORCH_IR_TORCHOPS_H
#define NPCOMP_DIALECT_TORCH_IR_TORCHOPS_H

#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/SymbolTable.h"
#include "mlir/Interfaces/CastInterfaces.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "npcomp/Dialect/Torch/IR/OpInterfaces.h"
#include "npcomp/Dialect/Torch/IR/TorchTypes.h"
#include "npcomp/Interfaces/Traits.h"

#define GET_OP_CLASSES
#include "npcomp/Dialect/Torch/IR/TorchOps.h.inc"

template <> struct llvm::DenseMapInfo<::mlir::NPCOMP::Torch::SlotOp> {
  using SlotOp = ::mlir::NPCOMP::Torch::SlotOp;
  static SlotOp getEmptyKey() {
    auto *pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
    return SlotOp::getFromOpaquePointer(pointer);
  }
  static SlotOp getTombstoneKey() {
    auto *pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
    return SlotOp::getFromOpaquePointer(pointer);
  }
  static unsigned getHashValue(SlotOp val) {
    return hash_value(val.getAsOpaquePointer());
  }
  static bool isEqual(SlotOp lhs, SlotOp rhs) { return lhs == rhs; }
};

template <> struct llvm::DenseMapInfo<::mlir::NPCOMP::Torch::NnModuleOp> {
  using NnModuleOp = ::mlir::NPCOMP::Torch::NnModuleOp;
  static NnModuleOp getEmptyKey() {
    auto *pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
    return NnModuleOp::getFromOpaquePointer(pointer);
  }
  static NnModuleOp getTombstoneKey() {
    auto *pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
    return NnModuleOp::getFromOpaquePointer(pointer);
  }
  static unsigned getHashValue(NnModuleOp val) {
    return hash_value(val.getAsOpaquePointer());
  }
  static bool isEqual(NnModuleOp lhs, NnModuleOp rhs) { return lhs == rhs; }
};

template <> struct llvm::DenseMapInfo<::mlir::NPCOMP::Torch::ClassTypeOp> {
  using ClassTypeOp = ::mlir::NPCOMP::Torch::ClassTypeOp;
  static ClassTypeOp getEmptyKey() {
    auto *pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
    return ClassTypeOp::getFromOpaquePointer(pointer);
  }
  static ClassTypeOp getTombstoneKey() {
    auto *pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
    return ClassTypeOp::getFromOpaquePointer(pointer);
  }
  static unsigned getHashValue(ClassTypeOp val) {
    return hash_value(val.getAsOpaquePointer());
  }
  static bool isEqual(ClassTypeOp lhs, ClassTypeOp rhs) { return lhs == rhs; }
};

template <> struct llvm::DenseMapInfo<::mlir::NPCOMP::Torch::GlobalSlotOp> {
  using OpTy = ::mlir::NPCOMP::Torch::GlobalSlotOp;
  static OpTy getEmptyKey() {
    auto *pointer = llvm::DenseMapInfo<void *>::getEmptyKey();
    return OpTy::getFromOpaquePointer(pointer);
  }
  static OpTy getTombstoneKey() {
    auto *pointer = llvm::DenseMapInfo<void *>::getTombstoneKey();
    return OpTy::getFromOpaquePointer(pointer);
  }
  static unsigned getHashValue(OpTy val) {
    return hash_value(val.getAsOpaquePointer());
  }
  static bool isEqual(OpTy lhs, OpTy rhs) { return lhs == rhs; }
};

#endif // NPCOMP_DIALECT_TORCH_IR_TORCHOPS_H
Add boilerplate for Torch dialect. 2020-09-29 03:02:35 +08:00			`//===------------------------------------------------------------- C++ --===//`
			`//`
			`// This file is licensed under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#ifndef NPCOMP_DIALECT_TORCH_IR_TORCHOPS_H`
			`#define NPCOMP_DIALECT_TORCH_IR_TORCHOPS_H`

Bump llvm-project to 444822d77a7fea28aa49edf24533c987efa1b2ee Fixes: - renames StandardTypes -> BuiltinTypes - std.extract_element -> tensor.extract 2020-12-12 06:43:38 +08:00			`#include "mlir/IR/BuiltinTypes.h"`
Add boilerplate for Torch dialect. 2020-09-29 03:02:35 +08:00			`#include "mlir/IR/OpDefinition.h"`
			`#include "mlir/IR/OpImplementation.h"`
Add initial TorchScript module importer It turns out that this was easiest to structure as a general IValue importer, since torch module are just one of the possible IValue's. We import the IValue object graph in a braindead fashion into basicpy ops and a new `torch.nn_module` op that is used to model the attributes/methods of a torch::jit::Module IValue. See `Torch/ops.mlir` for an example, and also check out the .py import tests in `frontends/pytorch/test/module_import`. As part of this change, a few housekeeping tasks: - extract some helpers from graph_importer.cpp - more helpers around the C API - misc touchups 2021-01-28 08:35:44 +08:00			`#include "mlir/IR/SymbolTable.h"`
Add basic MLP's to the e2e curriculum. These tests pass on the reference backend. - Add aten.linear op + shape xfer function + ATen->Linalg lowering. - Note: this needs to be more automated, and needs to cover more cases. - Current not implemented caveats: - size-1 broadcasting for bias vector (either static-size-1 or ? case) - higher-rank aten.linear ops (not produced by torch.nn.Linear though) - type promotion (still don't even know the exact rules here) - Add folder for torch.derefine op. Now the inliner can clean it up as it inlines. (call boundaries are a main place we need to insert torch.derefine) This is brittle -- the other important case is control flow which will need to be handled via an extension to RefineTypes.cpp (as will more robust call handling). River has an in-flight patch to update it to the new dataflow framework so I didn't want to do anything intrusive here. - Also adjust torch.derefine syntax to use the keyword `to` instead of `->`, as most type-only, cast-like ops do. 2021-04-27 02:42:41 +08:00			`#include "mlir/Interfaces/CastInterfaces.h"`
Add support for prim::Loop op. This is a funny one. It combines a `for` and `while` loop in one op. We will need to write some conversions to `scf`. 2021-03-02 07:00:32 +08:00			`#include "mlir/Interfaces/ControlFlowInterfaces.h"`
Properly model "derefinement". In terms of IR structure, TorchScript allows types to vary in many circumstances where MLIR requires pointer-identical types. In particular, it is valid to pass any subtype in place of a type. For example, if an `Optional[int]` is required somewhere in the IR, it is legal to pass a value of just `int` (but not the other way around; see `torch.prim.unchecked_cast`). In effect, every use can have a different type. We introduce a new op `torch.derefine` that models that impedance mismatch. This op allows casting a value from one type to a type that it is a subtype of to model this behavior. Recommended review order: - TorchOps.td for new torch.derefine (and updated docs for `torch.prim.unchecked_cast`) - new test code in if.py, loop.py, function-derefine.py - new code in node_importer.cpp for handling derefinement insertion - function_importer.cpp and utils changes in torch_to_mlir_utils.cpp Properly handling derefinement on function boundaries required relayering the code so that graph_importer.cpp/.h is now function_importer.cpp/.h because only the `torch::jit::Function` (actually the `c10::FunctionSchema` it holds) knows the derefined types that are actually needed at the boundary (see `function-derefine.py` for a test). Annoyingly, this churns all the functions which are now prefixed with `__torch__.` but that is more correct anyway (that is their linkage name in the `torch::jit::CompilationUnit`; the previous `mb.import_function` was actually buggy in the case of functions calling each other as it would reference their unqualified name). With this change, we can import `resnet18` from `torchvision` :) IR: https://gist.github.com/silvasean/6426a5272d8a6c7caae533fce05ab704 2021-03-02 09:24:15 +08:00			`#include "mlir/Interfaces/SideEffectInterfaces.h"`
Expose signature metadata to ops and implement ATenRecognizeKernelsPass pass. * Two op interfaces, one for querying instance metadata and one for getting static data needed to construct an op from a generic form. * For torch.generic_kernel ops, metadata is splatted in during capture from Torch (it comes from the op registry, which will work for either device capture or graph import). * Moved the 'add' out of the generated set so I can experiment on it. It implements the TorchBuildableKernelOpInterface interface which provides its metadata. * The ATenRecognizeKernelsPass pass generically lowers from a torch.generic_kernel to recognized ops that implement the TorchBuildableKernelOpInterface, handling the various types of transformations that we allow at this stage. 2020-10-23 14:31:34 +08:00			`#include "npcomp/Dialect/Torch/IR/OpInterfaces.h"`
Add initial TorchScript module importer It turns out that this was easiest to structure as a general IValue importer, since torch module are just one of the possible IValue's. We import the IValue object graph in a braindead fashion into basicpy ops and a new `torch.nn_module` op that is used to model the attributes/methods of a torch::jit::Module IValue. See `Torch/ops.mlir` for an example, and also check out the .py import tests in `frontends/pytorch/test/module_import`. As part of this change, a few housekeeping tasks: - extract some helpers from graph_importer.cpp - more helpers around the C API - misc touchups 2021-01-28 08:35:44 +08:00			`#include "npcomp/Dialect/Torch/IR/TorchTypes.h"`
Add AllowsTypeRefinement trait and use it to improve RefineTypes This trait lets us model the semantics of various aten/torch/numpy ops that are insensitive to type refinements. This replaces hardcoded/inconsistent checks for this property. To show usage of this new trait, we fix up some old uses, and improve RefineTypes to be smarter about rewriting with this trait. 2021-04-29 05:36:46 +08:00			`#include "npcomp/Interfaces/Traits.h"`
Add boilerplate for Torch dialect. 2020-09-29 03:02:35 +08:00
			`#define GET_OP_CLASSES`
			`#include "npcomp/Dialect/Torch/IR/TorchOps.h.inc"`

Support multiple instances of a class in GlobalizeObjectGraph. This happens in practice with e.g. ResNet from torchvision (multiple instances of the same BatchNorm class). The key observation is that for this program, and the expected set of programs, we can convert the program to the same globalized form with a bit more static analysis and effort to suitably monomorphize the program. Though what we are doing here is fairly annoying to implement, it saves any nontrivial later pass from having to do similar analyses (or worse). E.g. shape inference would need to be object-graph aware, mutation/lifetime analyses would have to be aware, etc. Additionally, it would make us front-load what it means to have a !torch.nn.Module type on an ABI boundary, which we are just not ready to handle. I'm really, really hoping that in practice we can get away with this, otherwise it's going to be really rough designing a representation (and implementing everything to back it) that is convenient to transform and gracefully scales from full object graph (in the most dynamic case) down to a fixed set of global slots like we have here (in the most static case, which we presume a lot of practical programs fall into). This also involved introducing a `torch-prepare-for-globalize-object-graph` pass that does a minimal set of lowerings to simplify the IR into a more orthogonal and analyzable form, and a `torch-globalize-pipeline` helper. Recommended review order: - updated documentation in Passes.td - new tests in `globalize-object-graph-multiple-instances*.mlir` - implementation of GlobalizeObjectGraph.cpp - PrepareForGlobalizeObjectGraph.cpp + prepare-for-globalize-object-graph.mlir - misc stuff like torch-globalize-pipeline pipeline definition. With this, we can import, globalize, and inline resnet18 from torchvision: https://gist.github.com/silvasean/821586afc19b67d9fb72030b2e0adeb8 2021-03-10 12:33:21 +08:00			`template <> struct llvm::DenseMapInfo<::mlir::NPCOMP::Torch::SlotOp> {`
			`using SlotOp = ::mlir::NPCOMP::Torch::SlotOp;`
			`static SlotOp getEmptyKey() {`
			`auto pointer = llvm::DenseMapInfo<void >::getEmptyKey();`
			`return SlotOp::getFromOpaquePointer(pointer);`
			`}`
			`static SlotOp getTombstoneKey() {`
			`auto pointer = llvm::DenseMapInfo<void >::getTombstoneKey();`
			`return SlotOp::getFromOpaquePointer(pointer);`
			`}`
			`static unsigned getHashValue(SlotOp val) {`
			`return hash_value(val.getAsOpaquePointer());`
			`}`
			`static bool isEqual(SlotOp lhs, SlotOp rhs) { return lhs == rhs; }`
			`};`

			`template <> struct llvm::DenseMapInfo<::mlir::NPCOMP::Torch::NnModuleOp> {`
			`using NnModuleOp = ::mlir::NPCOMP::Torch::NnModuleOp;`
			`static NnModuleOp getEmptyKey() {`
			`auto pointer = llvm::DenseMapInfo<void >::getEmptyKey();`
			`return NnModuleOp::getFromOpaquePointer(pointer);`
			`}`
			`static NnModuleOp getTombstoneKey() {`
			`auto pointer = llvm::DenseMapInfo<void >::getTombstoneKey();`
			`return NnModuleOp::getFromOpaquePointer(pointer);`
			`}`
			`static unsigned getHashValue(NnModuleOp val) {`
			`return hash_value(val.getAsOpaquePointer());`
			`}`
			`static bool isEqual(NnModuleOp lhs, NnModuleOp rhs) { return lhs == rhs; }`
			`};`

			`template <> struct llvm::DenseMapInfo<::mlir::NPCOMP::Torch::ClassTypeOp> {`
			`using ClassTypeOp = ::mlir::NPCOMP::Torch::ClassTypeOp;`
			`static ClassTypeOp getEmptyKey() {`
			`auto pointer = llvm::DenseMapInfo<void >::getEmptyKey();`
			`return ClassTypeOp::getFromOpaquePointer(pointer);`
			`}`
			`static ClassTypeOp getTombstoneKey() {`
			`auto pointer = llvm::DenseMapInfo<void >::getTombstoneKey();`
			`return ClassTypeOp::getFromOpaquePointer(pointer);`
			`}`
			`static unsigned getHashValue(ClassTypeOp val) {`
			`return hash_value(val.getAsOpaquePointer());`
			`}`
			`static bool isEqual(ClassTypeOp lhs, ClassTypeOp rhs) { return lhs == rhs; }`
			`};`

Add InlineGlobalSlots pass. This inlines global slots if possible. This allows them to participate in folding, canonicalization, shape inference, etc. Example use cases: - inlining weights and biases that are readonly during inference - inlining the "training" bool to allow stuff to fold away For training use cases (especially internal training loop), we will need something smarter to get good performance. That would look like an "SSA formation" which promotes the global slots to tensors in the program, flushing them back to the slots at the minimal number of necessary places. We might want to let backends do that transformation though. This also interacts with shape inference (type bounds on the slots to even lower them to backends in the first place). 2021-04-24 04:35:44 +08:00			`template <> struct llvm::DenseMapInfo<::mlir::NPCOMP::Torch::GlobalSlotOp> {`
			`using OpTy = ::mlir::NPCOMP::Torch::GlobalSlotOp;`
			`static OpTy getEmptyKey() {`
			`auto pointer = llvm::DenseMapInfo<void >::getEmptyKey();`
			`return OpTy::getFromOpaquePointer(pointer);`
			`}`
			`static OpTy getTombstoneKey() {`
			`auto pointer = llvm::DenseMapInfo<void >::getTombstoneKey();`
			`return OpTy::getFromOpaquePointer(pointer);`
			`}`
			`static unsigned getHashValue(OpTy val) {`
			`return hash_value(val.getAsOpaquePointer());`
			`}`
			`static bool isEqual(OpTy lhs, OpTy rhs) { return lhs == rhs; }`
			`};`

Add boilerplate for Torch dialect. 2020-09-29 03:02:35 +08:00			`#endif // NPCOMP_DIALECT_TORCH_IR_TORCHOPS_H`