2021-04-09 04:05:16 +08:00
|
|
|
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
# See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
import torch
|
|
|
|
|
|
|
|
from mlir.ir import *
|
|
|
|
from mlir.passmanager import *
|
|
|
|
from npcomp.compiler.utils import logging
|
|
|
|
|
|
|
|
__all__ = [
|
|
|
|
"lower_object_graph",
|
|
|
|
"lower_module",
|
|
|
|
]
|
|
|
|
|
|
|
|
# The set of passes that lowers from a TorchScript object graph representation
|
|
|
|
# to a module semantics where symbols correspond to dotted paths into the
|
|
|
|
# module.
|
|
|
|
OBJECT_GRAPH_LOWERING_PASSES = (
|
Fix issue with unused functions in torch::jit::CompilationUnit
As described in the code comment:
```
When we import TorchScript IR, we import their entire "compilation unit",
which can contain numerous functions unrelated to the current program,
which breaks torch-globalization-pipeline; for example, there can be
random functions referencing types that haven't been imported
as part of the root `torch.nn.Module` we imported. Those will
be unreferenced private functions which symbol-dce will clean up nicely.
```
This situation is really easy to hit in jupyter notebooks, where the
same cell is evaluated multiple times. That results in the same
class name (at the Python level, e.g. class `Foo` in the top-level
main module). Internally to PyTorch, it handles this situation by
mangling in a unique number to the names of ClassType's and such. When
we import the new ClassType's, we see not just the new
torch::jit::Function's in the CompilationUnit, but, also all the old
ones, which reference ClassType's that are not reachable from the
`torch.nn.Module` that we imported.
Note: there is no way to avoid importing the whole CompilationUnit
(including these old remnants) without doing a fairly complicated call
graph reachability analysis of which functions are reachable from the
methods of the ClassType's we imported. It turns out that once we are
inside MLIR, we model visibility correctly so that `symbol-dce`
"Just Works" for this use case. That is to say, this is not a quick
hack, but rather seems like a totally palatable long-term solution.
2021-04-14 05:50:58 +08:00
|
|
|
# When we import TorchScript IR, we import their entire "compilation unit",
|
|
|
|
# which can contain numerous functions unrelated to the current program,
|
|
|
|
# which breaks torch-globalization-pipeline; for example, there can be
|
|
|
|
# random functions referencing types that haven't been imported
|
|
|
|
# as part of the root `torch.nn.Module` we imported. Those will
|
|
|
|
# be unreferenced private functions which symbol-dce will clean up nicely.
|
|
|
|
"symbol-dce",
|
2021-04-09 04:05:16 +08:00
|
|
|
# Globalize the program. The rest of the compiler assumes a globalized
|
|
|
|
# program, which makes all analyses and transforms significantly easier
|
|
|
|
# to write.
|
|
|
|
"torch-globalize-pipeline",
|
Fix issue with unused functions in torch::jit::CompilationUnit
As described in the code comment:
```
When we import TorchScript IR, we import their entire "compilation unit",
which can contain numerous functions unrelated to the current program,
which breaks torch-globalization-pipeline; for example, there can be
random functions referencing types that haven't been imported
as part of the root `torch.nn.Module` we imported. Those will
be unreferenced private functions which symbol-dce will clean up nicely.
```
This situation is really easy to hit in jupyter notebooks, where the
same cell is evaluated multiple times. That results in the same
class name (at the Python level, e.g. class `Foo` in the top-level
main module). Internally to PyTorch, it handles this situation by
mangling in a unique number to the names of ClassType's and such. When
we import the new ClassType's, we see not just the new
torch::jit::Function's in the CompilationUnit, but, also all the old
ones, which reference ClassType's that are not reachable from the
`torch.nn.Module` that we imported.
Note: there is no way to avoid importing the whole CompilationUnit
(including these old remnants) without doing a fairly complicated call
graph reachability analysis of which functions are reachable from the
methods of the ClassType's we imported. It turns out that once we are
inside MLIR, we model visibility correctly so that `symbol-dce`
"Just Works" for this use case. That is to say, this is not a quick
hack, but rather seems like a totally palatable long-term solution.
2021-04-14 05:50:58 +08:00
|
|
|
# "lower" `torch.global_slot` ops by deleting them if unused, which we
|
|
|
|
# currently require because we don't have a lowering path for backends to
|
|
|
|
# handle them.
|
|
|
|
# Torch usually inserts a few unused global slots so this ends up hitting
|
|
|
|
# every single module even if it doesn't have any explicit slots.
|
2021-04-09 04:05:16 +08:00
|
|
|
# TODO: Support global slots in backends.
|
|
|
|
"symbol-dce",
|
Add npcomp-verify-backend-contract pass.
This pass verifies that a given module satisfies the contract that we
have for backends. This is phrased as an "allowlist", because we want to
keep this interface tight. Also, this gives much better diagnostics than
a backend randomly crashing or failing to compile would (though they
could still be improved).
This was especially painful because if we had
`tensor<?x!numpy.any_dtype>` slip through, at some point RefBackend
would convert it to a memref type and trip the "verify type invariants"
assertion which gives no location or anything and crashed the process,
which was very unpleasant.
We implement this with the dialect conversion framework, which works
reasonably well and was quick to put together and familiar, but is still
very "op oriented". We probably want to make this hand-rolled
eventually, especially the error reporting (the most useful kind of
error for a dialect conversion user is not necessarily the best for this
use case). Also, in production, these error will go to users, and need
to be surfaced carefully such as "the compiler needs a type annotation
on this function parameter" which in general requires some special
analysis, wordsmithing, and overall awareness of the e2e use case (such
as how much we can lean into certain source locations) to provide a
meaningful user-level diagnostic.
Also, add `inline` to the current frontend lowering pass pipeline to
allow slightly more complicated programs that otherwise would fail on
shape inference.
2021-04-13 09:39:53 +08:00
|
|
|
# Currently, our shape inference is not powerful enough to deal with
|
|
|
|
# calls, so inline everything.
|
|
|
|
# TODO: Improve shape inference.
|
|
|
|
"inline",
|
2021-04-09 04:05:16 +08:00
|
|
|
# Incorporate user annotations and remove signature Python-isms.
|
|
|
|
"torch-adjust-calling-conventions",
|
|
|
|
)
|
|
|
|
|
|
|
|
TORCH_TO_TCP_PASSES = (
|
|
|
|
# Recognize ATen kernels.
|
|
|
|
"func(aten-recognize-kernels)",
|
|
|
|
|
|
|
|
# Convert the bulk of the program to ranked tensors with known dtype.
|
|
|
|
# This is the input to the backend layer that we are aiming for.
|
|
|
|
|
|
|
|
# First, unilaterally convert public functions to tensor.
|
|
|
|
# The way this pass is currently written, this implies that
|
|
|
|
# as pipeline authors, we are restricting our users to not be able to see
|
|
|
|
# updates to "out params" on their public functions.
|
|
|
|
# This is deemed ok for now.
|
|
|
|
"numpy-public-functions-to-tensor",
|
|
|
|
# Convert the bulk of non-ABI-visible arrays to tensors.
|
|
|
|
"func(numpy-array-to-tensor)",
|
|
|
|
# Do shape and dtype refinement.
|
|
|
|
# We could do it sooner, but the pass currently doesn't have transfer
|
|
|
|
# functions for array ops.
|
|
|
|
"func(torch-refine-types)",
|
|
|
|
# Propagate to ABI return types the shape/dtype information discovered by
|
|
|
|
# the previous pass. Doing this is ABI-compatible for our backends.
|
|
|
|
"numpy-refine-public-return",
|
|
|
|
# Clean up a few stray array/tensor conversion remnants.
|
|
|
|
"func(numpy-array-to-tensor)",
|
|
|
|
|
|
|
|
# Lower to TCP (+ guards) which is the input to codegen backends.
|
|
|
|
# Most of this should be subsumed by aten->linalg+guards conversions.
|
|
|
|
# (the guard generation will be automated from the linalg Op DSL)
|
2021-04-09 08:43:41 +08:00
|
|
|
"func(convert-aten-to-linalg)",
|
2021-04-09 04:05:16 +08:00
|
|
|
"func(convert-aten-to-tcf)",
|
|
|
|
"func(convert-tcf-to-std)",
|
|
|
|
"func(convert-elementwise-to-linalg)",
|
Add npcomp-verify-backend-contract pass.
This pass verifies that a given module satisfies the contract that we
have for backends. This is phrased as an "allowlist", because we want to
keep this interface tight. Also, this gives much better diagnostics than
a backend randomly crashing or failing to compile would (though they
could still be improved).
This was especially painful because if we had
`tensor<?x!numpy.any_dtype>` slip through, at some point RefBackend
would convert it to a memref type and trip the "verify type invariants"
assertion which gives no location or anything and crashed the process,
which was very unpleasant.
We implement this with the dialect conversion framework, which works
reasonably well and was quick to put together and familiar, but is still
very "op oriented". We probably want to make this hand-rolled
eventually, especially the error reporting (the most useful kind of
error for a dialect conversion user is not necessarily the best for this
use case). Also, in production, these error will go to users, and need
to be surfaced carefully such as "the compiler needs a type annotation
on this function parameter" which in general requires some special
analysis, wordsmithing, and overall awareness of the e2e use case (such
as how much we can lean into certain source locations) to provide a
meaningful user-level diagnostic.
Also, add `inline` to the current frontend lowering pass pipeline to
allow slightly more complicated programs that otherwise would fail on
shape inference.
2021-04-13 09:39:53 +08:00
|
|
|
"npcomp-verify-backend-contract",
|
2021-04-09 04:05:16 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
def lower_module(imported_module: Module):
|
|
|
|
"""Compiles an imported module, with a flat list of functions.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
imported_module: The MLIR module consisting of funcs and globals in
|
|
|
|
the torch dialect. It is lowered in place.
|
|
|
|
Returns:
|
|
|
|
The imported_module, for convenience chaining methods.
|
|
|
|
"""
|
|
|
|
with imported_module.context as context:
|
|
|
|
if logging.debug_enabled():
|
|
|
|
logging.debug("Initial PyTorch IR:\n{}", imported_module)
|
|
|
|
# Frontend.
|
|
|
|
pipeline_str = ",".join(TORCH_TO_TCP_PASSES)
|
|
|
|
if logging.debug_enabled():
|
|
|
|
logging.debug("Running Torch->TCP pipeline '{}'", pipeline_str)
|
|
|
|
pm = PassManager.parse(pipeline_str)
|
|
|
|
pm.run(imported_module)
|
|
|
|
if logging.debug_enabled():
|
|
|
|
logging.debug("TCP IR:\n{}", imported_module)
|
|
|
|
return imported_module
|
|
|
|
|
|
|
|
def lower_object_graph(imported_module: Module):
|
|
|
|
"""Lowers an imported module that has TorchScript object graph semantics.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
imported_module: The MLIR module consisting of IR as imported by the
|
|
|
|
torch_mlir.import_module. It is lowered in place.
|
|
|
|
Returns:
|
|
|
|
The imported_module, for convenience chaining methods.
|
|
|
|
"""
|
|
|
|
with imported_module.context as context:
|
|
|
|
if logging.debug_enabled():
|
|
|
|
logging.debug("Initial PyTorch object graph IR:\n{}", imported_module)
|
|
|
|
|
|
|
|
# Object graph lowering.
|
|
|
|
pipeline_str = ",".join(OBJECT_GRAPH_LOWERING_PASSES)
|
|
|
|
if logging.debug_enabled():
|
|
|
|
logging.debug(
|
|
|
|
"Running Torch object graph lowering pipeline '{}'", pipeline_str)
|
|
|
|
pm = PassManager.parse(pipeline_str)
|
|
|
|
pm.run(imported_module)
|
|
|
|
return lower_module(imported_module)
|