torch-mlir/python/npcomp/compiler/pytorch/backend/frontend_lowering.py

#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
#  See https://llvm.org/LICENSE.txt for license information.
#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import os

import torch

from mlir.ir import *
from mlir.passmanager import *
from npcomp.compiler.utils import logging

__all__ = [
    "lower_object_graph",
    "lower_module",
]

# The set of passes that lowers from a TorchScript object graph representation
# to a module semantics where symbols correspond to dotted paths into the
# module.
OBJECT_GRAPH_LOWERING_PASSES = (
    # Globalize the program. The rest of the compiler assumes a globalized
    # program, which makes all analyses and transforms significantly easier
    # to write.
    "torch-globalize-pipeline",
    # symbol-dce is currently needed for correctness, as we don't have a lowering
    # in the backend for torch.global_slot's.
    # Torch usually inserts a few unused global slots that are otherwise
    # bothersome because we don't currently have a lowering for them.
    # TODO: Support global slots in backends.
    "symbol-dce",
    # Currently, our shape inference is not powerful enough to deal with
    # calls, so inline everything.
    # TODO: Improve shape inference.
    "inline",
    # Incorporate user annotations and remove signature Python-isms.
    "torch-adjust-calling-conventions",
)

TORCH_TO_TCP_PASSES = (
    # Recognize ATen kernels.
    "func(aten-recognize-kernels)",

    # Convert the bulk of the program to ranked tensors with known dtype.
    # This is the input to the backend layer that we are aiming for.

    # First, unilaterally convert public functions to tensor.
    # The way this pass is currently written, this implies that
    # as pipeline authors, we are restricting our users to not be able to see
    # updates to "out params" on their public functions.
    # This is deemed ok for now.
    "numpy-public-functions-to-tensor",
    # Convert the bulk of non-ABI-visible arrays to tensors.
    "func(numpy-array-to-tensor)",
    # Do shape and dtype refinement.
    # We could do it sooner, but the pass currently doesn't have transfer
    # functions for array ops.
    "func(torch-refine-types)",
    # Propagate to ABI return types the shape/dtype information discovered by
    # the previous pass. Doing this is ABI-compatible for our backends.
    "numpy-refine-public-return",
    # Clean up a few stray array/tensor conversion remnants.
    "func(numpy-array-to-tensor)",

    # Lower to TCP (+ guards) which is the input to codegen backends.
    # Most of this should be subsumed by aten->linalg+guards conversions.
    # (the guard generation will be automated from the linalg Op DSL)
    "func(convert-aten-to-linalg)",
    "func(convert-aten-to-tcf)",
    "func(convert-tcf-to-std)",
    "func(convert-elementwise-to-linalg)",
    "npcomp-verify-backend-contract",
)

def lower_module(imported_module: Module):
    """Compiles an imported module, with a flat list of functions.

    Args:
        imported_module: The MLIR module consisting of funcs and globals in
        the torch dialect. It is lowered in place.
    Returns:
        The imported_module, for convenience chaining methods.
    """
    with imported_module.context as context:
        if logging.debug_enabled():
            logging.debug("Initial PyTorch IR:\n{}", imported_module)
        # Frontend.
        pipeline_str = ",".join(TORCH_TO_TCP_PASSES)
        if logging.debug_enabled():
            logging.debug("Running Torch->TCP pipeline '{}'", pipeline_str)
        pm = PassManager.parse(pipeline_str)
        pm.run(imported_module)
        if logging.debug_enabled():
            logging.debug("TCP IR:\n{}", imported_module)
    return imported_module

def lower_object_graph(imported_module: Module):
    """Lowers an imported module that has TorchScript object graph semantics.

    Args:
        imported_module: The MLIR module consisting of IR as imported by the
        torch_mlir.import_module. It is lowered in place.
    Returns:
        The imported_module, for convenience chaining methods.
    """
    with imported_module.context as context:
        if logging.debug_enabled():
            logging.debug("Initial PyTorch object graph IR:\n{}", imported_module)

        # Object graph lowering.
        pipeline_str = ",".join(OBJECT_GRAPH_LOWERING_PASSES)
        if logging.debug_enabled():
            logging.debug(
                "Running Torch object graph lowering pipeline '{}'", pipeline_str)
        pm = PassManager.parse(pipeline_str)
        pm.run(imported_module)
    return lower_module(imported_module)
Add support for compiling through IREE. Recommended review order: - Changes in frontends/pytorch/examples/ - Changes in python/npcomp/compiler/pytorch/backend/ - Boilerplate for the `npcomp-iree-backend-lower-linkage` pass. This change separates out a `npcomp.compiler.pytorch.backend.frontend_lowering` module that does the common lowering for all backends. The individual compiler backends `npcomp.compiler.pytorch.backend.{refjit,iree}` now accept a loosely defined "TCP + scalar code" IR mix that will be formalized in the future as the interface to codegen backends. This also required adding a small pass `npcomp-iree-backend-lower-linkage` which adds `iree.module.export` onto functions, and layering that into the frontend flow. The pass doesn't require a C++-level dependency on IREE, which is nice for now. TBD how we are going to handle lists (we hope we can get away with sneakerneting some td files and relying on loose IR compatibility). Running through IREE requires the ability to import `iree.compiler` and `iree.runtime`, which can be obtained as follows: ``` python3 -m pip install iree-compiler-snapshot iree-runtime-snapshot -f https://github.com/google/iree/releases/tag/snapshot-20210406.200 PYTHONPATH="${PYTHONPATH}:${MY_IREE_BUILD}/bindings/python/" ``` This patch makes it painfully clear that we don't have any e2e testing harness to really plug into, and also don't have a usable Python API to our compiler stack (something usable in a jupyter notebook). That will be addressed in subsequent commits. We've been flying by the seat of our pants with this `examples` directory that isn't subject to any kind of testing or real usability concerns. 2021-04-09 04:05:16 +08:00			`# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`# See https://llvm.org/LICENSE.txt for license information.`
			`# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`

			`import os`

			`import torch`

			`from mlir.ir import *`
			`from mlir.passmanager import *`
			`from npcomp.compiler.utils import logging`

			`__all__ = [`
			`"lower_object_graph",`
			`"lower_module",`
			`]`

			`# The set of passes that lowers from a TorchScript object graph representation`
			`# to a module semantics where symbols correspond to dotted paths into the`
			`# module.`
			`OBJECT_GRAPH_LOWERING_PASSES = (`
			`# Globalize the program. The rest of the compiler assumes a globalized`
			`# program, which makes all analyses and transforms significantly easier`
			`# to write.`
			`"torch-globalize-pipeline",`
			`# symbol-dce is currently needed for correctness, as we don't have a lowering`
			`# in the backend for torch.global_slot's.`
			`# Torch usually inserts a few unused global slots that are otherwise`
			`# bothersome because we don't currently have a lowering for them.`
			`# TODO: Support global slots in backends.`
			`"symbol-dce",`
Add npcomp-verify-backend-contract pass. This pass verifies that a given module satisfies the contract that we have for backends. This is phrased as an "allowlist", because we want to keep this interface tight. Also, this gives much better diagnostics than a backend randomly crashing or failing to compile would (though they could still be improved). This was especially painful because if we had `tensor<?x!numpy.any_dtype>` slip through, at some point RefBackend would convert it to a memref type and trip the "verify type invariants" assertion which gives no location or anything and crashed the process, which was very unpleasant. We implement this with the dialect conversion framework, which works reasonably well and was quick to put together and familiar, but is still very "op oriented". We probably want to make this hand-rolled eventually, especially the error reporting (the most useful kind of error for a dialect conversion user is not necessarily the best for this use case). Also, in production, these error will go to users, and need to be surfaced carefully such as "the compiler needs a type annotation on this function parameter" which in general requires some special analysis, wordsmithing, and overall awareness of the e2e use case (such as how much we can lean into certain source locations) to provide a meaningful user-level diagnostic. Also, add `inline` to the current frontend lowering pass pipeline to allow slightly more complicated programs that otherwise would fail on shape inference. 2021-04-13 09:39:53 +08:00			`# Currently, our shape inference is not powerful enough to deal with`
			`# calls, so inline everything.`
			`# TODO: Improve shape inference.`
			`"inline",`
Add support for compiling through IREE. Recommended review order: - Changes in frontends/pytorch/examples/ - Changes in python/npcomp/compiler/pytorch/backend/ - Boilerplate for the `npcomp-iree-backend-lower-linkage` pass. This change separates out a `npcomp.compiler.pytorch.backend.frontend_lowering` module that does the common lowering for all backends. The individual compiler backends `npcomp.compiler.pytorch.backend.{refjit,iree}` now accept a loosely defined "TCP + scalar code" IR mix that will be formalized in the future as the interface to codegen backends. This also required adding a small pass `npcomp-iree-backend-lower-linkage` which adds `iree.module.export` onto functions, and layering that into the frontend flow. The pass doesn't require a C++-level dependency on IREE, which is nice for now. TBD how we are going to handle lists (we hope we can get away with sneakerneting some td files and relying on loose IR compatibility). Running through IREE requires the ability to import `iree.compiler` and `iree.runtime`, which can be obtained as follows: ``` python3 -m pip install iree-compiler-snapshot iree-runtime-snapshot -f https://github.com/google/iree/releases/tag/snapshot-20210406.200 PYTHONPATH="${PYTHONPATH}:${MY_IREE_BUILD}/bindings/python/" ``` This patch makes it painfully clear that we don't have any e2e testing harness to really plug into, and also don't have a usable Python API to our compiler stack (something usable in a jupyter notebook). That will be addressed in subsequent commits. We've been flying by the seat of our pants with this `examples` directory that isn't subject to any kind of testing or real usability concerns. 2021-04-09 04:05:16 +08:00			`# Incorporate user annotations and remove signature Python-isms.`
			`"torch-adjust-calling-conventions",`
			`)`

			`TORCH_TO_TCP_PASSES = (`
			`# Recognize ATen kernels.`
			`"func(aten-recognize-kernels)",`

			`# Convert the bulk of the program to ranked tensors with known dtype.`
			`# This is the input to the backend layer that we are aiming for.`

			`# First, unilaterally convert public functions to tensor.`
			`# The way this pass is currently written, this implies that`
			`# as pipeline authors, we are restricting our users to not be able to see`
			`# updates to "out params" on their public functions.`
			`# This is deemed ok for now.`
			`"numpy-public-functions-to-tensor",`
			`# Convert the bulk of non-ABI-visible arrays to tensors.`
			`"func(numpy-array-to-tensor)",`
			`# Do shape and dtype refinement.`
			`# We could do it sooner, but the pass currently doesn't have transfer`
			`# functions for array ops.`
			`"func(torch-refine-types)",`
			`# Propagate to ABI return types the shape/dtype information discovered by`
			`# the previous pass. Doing this is ABI-compatible for our backends.`
			`"numpy-refine-public-return",`
			`# Clean up a few stray array/tensor conversion remnants.`
			`"func(numpy-array-to-tensor)",`

			`# Lower to TCP (+ guards) which is the input to codegen backends.`
			`# Most of this should be subsumed by aten->linalg+guards conversions.`
			`# (the guard generation will be automated from the linalg Op DSL)`
Add `aten.mm` to linalg lowering. This is our first op with error semantics, and stresses the system. There are a few design notes of special interest: - RefineTypes.cpp's note about shape inference in the presence of code that dynamically produces and error, and it is provable statically. - ATenToLinalg.cpp's notes about future automation of the ATen->linalg path. - The notes in Passes.td about using low-tech `std.assert` ops instead of `shape.assuming`. Note: Doesn't work on IREE yet due to the `std.assert` op (needs to be lowered to `vm.fail` on the IREE side). 2021-04-09 08:43:41 +08:00			`"func(convert-aten-to-linalg)",`
Add support for compiling through IREE. Recommended review order: - Changes in frontends/pytorch/examples/ - Changes in python/npcomp/compiler/pytorch/backend/ - Boilerplate for the `npcomp-iree-backend-lower-linkage` pass. This change separates out a `npcomp.compiler.pytorch.backend.frontend_lowering` module that does the common lowering for all backends. The individual compiler backends `npcomp.compiler.pytorch.backend.{refjit,iree}` now accept a loosely defined "TCP + scalar code" IR mix that will be formalized in the future as the interface to codegen backends. This also required adding a small pass `npcomp-iree-backend-lower-linkage` which adds `iree.module.export` onto functions, and layering that into the frontend flow. The pass doesn't require a C++-level dependency on IREE, which is nice for now. TBD how we are going to handle lists (we hope we can get away with sneakerneting some td files and relying on loose IR compatibility). Running through IREE requires the ability to import `iree.compiler` and `iree.runtime`, which can be obtained as follows: ``` python3 -m pip install iree-compiler-snapshot iree-runtime-snapshot -f https://github.com/google/iree/releases/tag/snapshot-20210406.200 PYTHONPATH="${PYTHONPATH}:${MY_IREE_BUILD}/bindings/python/" ``` This patch makes it painfully clear that we don't have any e2e testing harness to really plug into, and also don't have a usable Python API to our compiler stack (something usable in a jupyter notebook). That will be addressed in subsequent commits. We've been flying by the seat of our pants with this `examples` directory that isn't subject to any kind of testing or real usability concerns. 2021-04-09 04:05:16 +08:00			`"func(convert-aten-to-tcf)",`
			`"func(convert-tcf-to-std)",`
			`"func(convert-elementwise-to-linalg)",`
Add npcomp-verify-backend-contract pass. This pass verifies that a given module satisfies the contract that we have for backends. This is phrased as an "allowlist", because we want to keep this interface tight. Also, this gives much better diagnostics than a backend randomly crashing or failing to compile would (though they could still be improved). This was especially painful because if we had `tensor<?x!numpy.any_dtype>` slip through, at some point RefBackend would convert it to a memref type and trip the "verify type invariants" assertion which gives no location or anything and crashed the process, which was very unpleasant. We implement this with the dialect conversion framework, which works reasonably well and was quick to put together and familiar, but is still very "op oriented". We probably want to make this hand-rolled eventually, especially the error reporting (the most useful kind of error for a dialect conversion user is not necessarily the best for this use case). Also, in production, these error will go to users, and need to be surfaced carefully such as "the compiler needs a type annotation on this function parameter" which in general requires some special analysis, wordsmithing, and overall awareness of the e2e use case (such as how much we can lean into certain source locations) to provide a meaningful user-level diagnostic. Also, add `inline` to the current frontend lowering pass pipeline to allow slightly more complicated programs that otherwise would fail on shape inference. 2021-04-13 09:39:53 +08:00			`"npcomp-verify-backend-contract",`
Add support for compiling through IREE. Recommended review order: - Changes in frontends/pytorch/examples/ - Changes in python/npcomp/compiler/pytorch/backend/ - Boilerplate for the `npcomp-iree-backend-lower-linkage` pass. This change separates out a `npcomp.compiler.pytorch.backend.frontend_lowering` module that does the common lowering for all backends. The individual compiler backends `npcomp.compiler.pytorch.backend.{refjit,iree}` now accept a loosely defined "TCP + scalar code" IR mix that will be formalized in the future as the interface to codegen backends. This also required adding a small pass `npcomp-iree-backend-lower-linkage` which adds `iree.module.export` onto functions, and layering that into the frontend flow. The pass doesn't require a C++-level dependency on IREE, which is nice for now. TBD how we are going to handle lists (we hope we can get away with sneakerneting some td files and relying on loose IR compatibility). Running through IREE requires the ability to import `iree.compiler` and `iree.runtime`, which can be obtained as follows: ``` python3 -m pip install iree-compiler-snapshot iree-runtime-snapshot -f https://github.com/google/iree/releases/tag/snapshot-20210406.200 PYTHONPATH="${PYTHONPATH}:${MY_IREE_BUILD}/bindings/python/" ``` This patch makes it painfully clear that we don't have any e2e testing harness to really plug into, and also don't have a usable Python API to our compiler stack (something usable in a jupyter notebook). That will be addressed in subsequent commits. We've been flying by the seat of our pants with this `examples` directory that isn't subject to any kind of testing or real usability concerns. 2021-04-09 04:05:16 +08:00			`)`

			`def lower_module(imported_module: Module):`
			`"""Compiles an imported module, with a flat list of functions.`

			`Args:`
			`imported_module: The MLIR module consisting of funcs and globals in`
			`the torch dialect. It is lowered in place.`
			`Returns:`
			`The imported_module, for convenience chaining methods.`
			`"""`
			`with imported_module.context as context:`
			`if logging.debug_enabled():`
			`logging.debug("Initial PyTorch IR:\n{}", imported_module)`
			`# Frontend.`
			`pipeline_str = ",".join(TORCH_TO_TCP_PASSES)`
			`if logging.debug_enabled():`
			`logging.debug("Running Torch->TCP pipeline '{}'", pipeline_str)`
			`pm = PassManager.parse(pipeline_str)`
			`pm.run(imported_module)`
			`if logging.debug_enabled():`
			`logging.debug("TCP IR:\n{}", imported_module)`
			`return imported_module`

			`def lower_object_graph(imported_module: Module):`
			`"""Lowers an imported module that has TorchScript object graph semantics.`

			`Args:`
			`imported_module: The MLIR module consisting of IR as imported by the`
			`torch_mlir.import_module. It is lowered in place.`
			`Returns:`
			`The imported_module, for convenience chaining methods.`
			`"""`
			`with imported_module.context as context:`
			`if logging.debug_enabled():`
			`logging.debug("Initial PyTorch object graph IR:\n{}", imported_module)`

			`# Object graph lowering.`
			`pipeline_str = ",".join(OBJECT_GRAPH_LOWERING_PASSES)`
			`if logging.debug_enabled():`
			`logging.debug(`
			`"Running Torch object graph lowering pipeline '{}'", pipeline_str)`
			`pm = PassManager.parse(pipeline_str)`
			`pm.run(imported_module)`
			`return lower_module(imported_module)`