2020-11-11 13:38:13 +08:00
|
|
|
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
# See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
2020-11-25 11:02:50 +08:00
|
|
|
import torch
|
|
|
|
|
2020-11-11 13:38:13 +08:00
|
|
|
from mlir.ir import *
|
|
|
|
from mlir.passmanager import *
|
|
|
|
from npcomp.compiler.generic.backend import refjit as refjit_backend
|
|
|
|
from npcomp.compiler.utils import logging
|
|
|
|
|
|
|
|
__all__ = [
|
|
|
|
"is_enabled",
|
|
|
|
"CompilerBackend",
|
|
|
|
]
|
|
|
|
|
2021-03-20 05:08:04 +08:00
|
|
|
# The set of passes that lowers from a TorchScript object graph representation
|
|
|
|
# to a module semantics where symbols correspond to dotted paths into the
|
|
|
|
# module.
|
|
|
|
OBJECT_GRAPH_LOWERING_PASSES = (
|
2021-04-07 06:58:37 +08:00
|
|
|
# Globalize the program. The rest of the compiler assumes a globalized
|
|
|
|
# program, which makes all analyses and transforms significantly easier
|
|
|
|
# to write.
|
2021-03-20 05:08:04 +08:00
|
|
|
"torch-globalize-pipeline",
|
|
|
|
# symbol-dce is currently needed for correctness, as we don't have a lowering
|
|
|
|
# in the backend for torch.global_slot's.
|
|
|
|
# Torch usually inserts a few unused global slots that are otherwise
|
|
|
|
# bothersome because we don't currently have a lowering for them.
|
|
|
|
# TODO: Support global slots in backends.
|
|
|
|
"symbol-dce",
|
2021-04-07 06:58:37 +08:00
|
|
|
# Incorporate user annotations and remove signature Python-isms.
|
2021-04-02 08:36:18 +08:00
|
|
|
"torch-adjust-calling-conventions",
|
2021-03-20 05:08:04 +08:00
|
|
|
)
|
|
|
|
|
2021-04-07 06:58:37 +08:00
|
|
|
# TODO: Replace this with lowering to "TCP + guards" -- that's the real
|
|
|
|
# backend interface. Put differently, add "TCF to TCP" to the end of this
|
|
|
|
# pipeline.
|
2020-11-21 07:07:34 +08:00
|
|
|
TORCH_TO_TCF_PASSES = (
|
2021-04-07 06:58:37 +08:00
|
|
|
# Recognize ATen kernels.
|
2020-11-21 07:07:34 +08:00
|
|
|
"func(aten-recognize-kernels)",
|
2021-04-07 06:58:37 +08:00
|
|
|
|
|
|
|
# Convert the bulk of the program to ranked tensors with known dtype.
|
|
|
|
# This is the input to the backend layer that we are aiming for.
|
|
|
|
|
|
|
|
# First, unilaterally convert public functions to tensor.
|
|
|
|
# The way this pass is currently written, this implies that
|
|
|
|
# as pipeline authors, we are restricting our users to not be able to see
|
|
|
|
# updates to "out params" on their public functions.
|
|
|
|
# This is deemed ok for now.
|
2020-11-21 07:07:34 +08:00
|
|
|
"numpy-public-functions-to-tensor",
|
2021-04-07 06:58:37 +08:00
|
|
|
# Convert the bulk of non-ABI-visible arrays to tensors.
|
|
|
|
"func(numpy-array-to-tensor)",
|
|
|
|
# Do shape and dtype refinement.
|
|
|
|
# We could do it sooner, but the pass currently doesn't have transfer
|
|
|
|
# functions for array ops.
|
|
|
|
"func(torch-refine-types)",
|
|
|
|
# Propagate to ABI return types the shape/dtype information discovered by
|
|
|
|
# the previous pass. Doing this is ABI-compatible for our backends.
|
|
|
|
"numpy-refine-public-return",
|
|
|
|
# Clean up a few stray array/tensor conversion remnants.
|
|
|
|
"func(numpy-array-to-tensor)",
|
|
|
|
|
|
|
|
# Lower to TCF which is the input to RefBackend.
|
|
|
|
# Most of this pass should be subsumed by aten->linalg+guards conversions.
|
|
|
|
# (the guard generation will be automated from the linalg Op DSL)
|
|
|
|
"func(convert-aten-to-tcf)",
|
2020-11-21 07:07:34 +08:00
|
|
|
)
|
2020-11-11 13:38:13 +08:00
|
|
|
|
|
|
|
# Re-export.
|
|
|
|
is_enabled = refjit_backend.is_enabled
|
|
|
|
|
|
|
|
|
2020-11-25 11:02:50 +08:00
|
|
|
class TorchJitModuleInvoker(refjit_backend.JitModuleInvoker):
|
|
|
|
"""Allows torch.Tensor inputs to be passed to module invocations."""
|
|
|
|
|
|
|
|
def __getitem__(self, function_name: str):
|
|
|
|
numpy_invoke = super().__getitem__(function_name)
|
|
|
|
|
|
|
|
def invoke(*args):
|
|
|
|
args = tuple(
|
|
|
|
arg.numpy() if isinstance(arg, torch.Tensor) else arg for arg in args)
|
|
|
|
return numpy_invoke(*args)
|
|
|
|
|
|
|
|
return invoke
|
|
|
|
|
|
|
|
|
2020-11-11 13:38:13 +08:00
|
|
|
class CompilerBackend:
|
|
|
|
"""Main entry-point for the backend."""
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
super().__init__()
|
|
|
|
self._refjit = refjit_backend.get_refjit()
|
|
|
|
self._debug = logging.debug_enabled()
|
|
|
|
|
|
|
|
def compile(self, imported_module: Module):
|
2021-03-20 05:08:04 +08:00
|
|
|
"""Compiles an imported module, with a flat list of functions.
|
2020-11-11 13:38:13 +08:00
|
|
|
|
|
|
|
Args:
|
|
|
|
imported_module: The MLIR module consisting of funcs in the torch
|
|
|
|
dialect.
|
|
|
|
Returns:
|
|
|
|
An opaque, backend specific module object that can be passed to load.
|
|
|
|
The object may actually be something more specific to the backend (i.e.
|
|
|
|
for IREE, it is a serialized VM flatbuffer) but the contract is that
|
|
|
|
it is operated on by methods on this class.
|
|
|
|
"""
|
Add support for "trailing_" and "out" variants of various ops.
We already had the `promoteTrailingOutTensor` flag, but weren't using
it. A inplaceVariantKernelName flag needed to be added.
This change is a little dissatisfying, as the conversions done by the
RecognizeKernelsPass are currently non-orthogonal. In particular,
`kDropResultAndAliasArg0` probably won't work as intended if mixed with
these (we probably need to promote kDropResultAndAliasArg0 to not be an
arg-level thing anyway, as we have done with promoteTrailingOutTensor).
This involved adding a new op `numpy.overwrite_array`.
```
numpy.overwrite_array %arg2 overwrites %arg0 : tensor<2x3xf32>, !numpy.ndarray<[2,3]:f32>
```
This models the destructive update behavior. Note that in the above op,
we cannot simply RAUW %arg0 with a suitably conveted %arg2 (for example,
%arg0 might have uses that are not dominated by %arg2, or might have an
alias relation with some other array in the program). In general, we
need a pass analogous to "SSA-formation" which knows how to see through
these to uncover an underlying tensor program.
Also, add tanh_out_e2e.py/div_inplace_e2e.py and fix some bitrot in
refjit.py which is my running example I'm trying to get working.
2021-03-19 04:13:40 +08:00
|
|
|
with imported_module.context as context:
|
2020-11-21 07:07:34 +08:00
|
|
|
if self._debug:
|
|
|
|
logging.debug("Initial PyTorch IR:\n{}", imported_module)
|
|
|
|
|
2020-11-11 13:38:13 +08:00
|
|
|
# Frontend.
|
Add support for "trailing_" and "out" variants of various ops.
We already had the `promoteTrailingOutTensor` flag, but weren't using
it. A inplaceVariantKernelName flag needed to be added.
This change is a little dissatisfying, as the conversions done by the
RecognizeKernelsPass are currently non-orthogonal. In particular,
`kDropResultAndAliasArg0` probably won't work as intended if mixed with
these (we probably need to promote kDropResultAndAliasArg0 to not be an
arg-level thing anyway, as we have done with promoteTrailingOutTensor).
This involved adding a new op `numpy.overwrite_array`.
```
numpy.overwrite_array %arg2 overwrites %arg0 : tensor<2x3xf32>, !numpy.ndarray<[2,3]:f32>
```
This models the destructive update behavior. Note that in the above op,
we cannot simply RAUW %arg0 with a suitably conveted %arg2 (for example,
%arg0 might have uses that are not dominated by %arg2, or might have an
alias relation with some other array in the program). In general, we
need a pass analogous to "SSA-formation" which knows how to see through
these to uncover an underlying tensor program.
Also, add tanh_out_e2e.py/div_inplace_e2e.py and fix some bitrot in
refjit.py which is my running example I'm trying to get working.
2021-03-19 04:13:40 +08:00
|
|
|
pipeline_str = ",".join(TORCH_TO_TCF_PASSES)
|
|
|
|
if self._debug:
|
|
|
|
logging.debug("Running Torch->TCF pipeline '{}'", pipeline_str)
|
|
|
|
pm = PassManager.parse(pipeline_str)
|
2020-11-11 13:38:13 +08:00
|
|
|
pm.run(imported_module)
|
|
|
|
if self._debug:
|
2020-11-21 07:07:34 +08:00
|
|
|
logging.debug("TCF IR:\n{}", imported_module)
|
2020-11-11 13:38:13 +08:00
|
|
|
|
|
|
|
# Backend.
|
|
|
|
# Note that this is a separate pass manager purely to aid in debugging.
|
|
|
|
pm = PassManager()
|
|
|
|
self._refjit.build_backend_compilation_pipeline(pm)
|
|
|
|
pm.run(imported_module)
|
|
|
|
if self._debug:
|
2020-11-21 07:07:34 +08:00
|
|
|
logging.debug("Backend IR:\n{}", imported_module)
|
2020-11-11 13:38:13 +08:00
|
|
|
|
|
|
|
jit_module = self._refjit.JITModule.from_compiled_module(
|
|
|
|
imported_module, refjit_backend.get_runtime_libs())
|
|
|
|
return jit_module
|
|
|
|
|
2021-03-20 05:08:04 +08:00
|
|
|
def compile_object_graph(self, imported_module: Module):
|
|
|
|
"""Compiles an imported module, with TorchScript object graph semantics.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
imported_module: The MLIR module consisting of IR as imported by the
|
|
|
|
torch_mlir.import_module
|
|
|
|
Returns:
|
|
|
|
An opaque, backend specific module object that can be passed to load.
|
|
|
|
The object may actually be something more specific to the backend (i.e.
|
|
|
|
for IREE, it is a serialized VM flatbuffer) but the contract is that
|
|
|
|
it is operated on by methods on this class.
|
|
|
|
"""
|
|
|
|
with imported_module.context as context:
|
|
|
|
if self._debug:
|
|
|
|
logging.debug("Initial PyTorch object graph IR:\n{}", imported_module)
|
|
|
|
|
|
|
|
# Frontend.
|
|
|
|
pipeline_str = ",".join(OBJECT_GRAPH_LOWERING_PASSES)
|
|
|
|
if self._debug:
|
|
|
|
logging.debug(
|
|
|
|
"Running Torch object graph lowering pipeline '{}'", pipeline_str)
|
|
|
|
pm = PassManager.parse(pipeline_str)
|
|
|
|
pm.run(imported_module)
|
|
|
|
return self.compile(imported_module)
|
|
|
|
|
2020-11-25 11:02:50 +08:00
|
|
|
def load(self, jit_module) -> TorchJitModuleInvoker:
|
|
|
|
"""Loads a compiled artifact into the runtime."""
|
|
|
|
return TorchJitModuleInvoker(jit_module)
|