torch-mlir/python/npcomp/compiler/pytorch/backend/refjit.py

#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
#  See https://llvm.org/LICENSE.txt for license information.
#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import os

import torch

from mlir.ir import *
from mlir.passmanager import *
from npcomp.compiler.generic.backend import refjit as refjit_backend
from npcomp.compiler.utils import logging

__all__ = [
    "is_enabled",
    "CompilerBackend",
]

# The set of passes that lowers from a TorchScript object graph representation
# to a module semantics where symbols correspond to dotted paths into the
# module.
OBJECT_GRAPH_LOWERING_PASSES = (
    "torch-globalize-pipeline",
    # symbol-dce is currently needed for correctness, as we don't have a lowering
    # in the backend for torch.global_slot's.
    # Torch usually inserts a few unused global slots that are otherwise
    # bothersome because we don't currently have a lowering for them.
    # TODO: Support global slots in backends.
    "symbol-dce",
    "torch-adjust-calling-conventions",
)

TORCH_TO_TCF_PASSES = (
    "func(aten-recognize-kernels)",
    "func(convert-aten-to-tcf)",
    "numpy-public-functions-to-tensor",
    "canonicalize",
)

# Re-export.
is_enabled = refjit_backend.is_enabled


class TorchJitModuleInvoker(refjit_backend.JitModuleInvoker):
  """Allows torch.Tensor inputs to be passed to module invocations."""

  def __getitem__(self, function_name: str):
    numpy_invoke = super().__getitem__(function_name)

    def invoke(*args):
      args = tuple(
          arg.numpy() if isinstance(arg, torch.Tensor) else arg for arg in args)
      return numpy_invoke(*args)

    return invoke


class CompilerBackend:
  """Main entry-point for the backend."""

  def __init__(self):
    super().__init__()
    self._refjit = refjit_backend.get_refjit()
    self._debug = logging.debug_enabled()

  def compile(self, imported_module: Module):
    """Compiles an imported module, with a flat list of functions.

    Args:
      imported_module: The MLIR module consisting of funcs in the torch
        dialect.
    Returns:
      An opaque, backend specific module object that can be passed to load.
      The object may actually be something more specific to the backend (i.e.
      for IREE, it is a serialized VM flatbuffer) but the contract is that
      it is operated on by methods on this class.
    """
    with imported_module.context as context:
      if self._debug:
        logging.debug("Initial PyTorch IR:\n{}", imported_module)

      # Frontend.
      pipeline_str = ",".join(TORCH_TO_TCF_PASSES)
      if self._debug:
        logging.debug("Running Torch->TCF pipeline '{}'", pipeline_str)
      pm = PassManager.parse(pipeline_str)
      pm.run(imported_module)
      if self._debug:
        logging.debug("TCF IR:\n{}", imported_module)

      # Backend.
      # Note that this is a separate pass manager purely to aid in debugging.
      pm = PassManager()
      self._refjit.build_backend_compilation_pipeline(pm)
      pm.run(imported_module)
      if self._debug:
        logging.debug("Backend IR:\n{}", imported_module)

    jit_module = self._refjit.JITModule.from_compiled_module(
        imported_module, refjit_backend.get_runtime_libs())
    return jit_module

  def compile_object_graph(self, imported_module: Module):
    """Compiles an imported module, with TorchScript object graph semantics.

    Args:
      imported_module: The MLIR module consisting of IR as imported by the
      torch_mlir.import_module
    Returns:
      An opaque, backend specific module object that can be passed to load.
      The object may actually be something more specific to the backend (i.e.
      for IREE, it is a serialized VM flatbuffer) but the contract is that
      it is operated on by methods on this class.
    """
    with imported_module.context as context:
      if self._debug:
        logging.debug("Initial PyTorch object graph IR:\n{}", imported_module)

      # Frontend.
      pipeline_str = ",".join(OBJECT_GRAPH_LOWERING_PASSES)
      if self._debug:
        logging.debug(
            "Running Torch object graph lowering pipeline '{}'", pipeline_str)
      pm = PassManager.parse(pipeline_str)
      pm.run(imported_module)
    return self.compile(imported_module)

  def load(self, jit_module) -> TorchJitModuleInvoker:
    """Loads a compiled artifact into the runtime."""
    return TorchJitModuleInvoker(jit_module)
Repurpose numpy-compiler compiler/runtime flow for PyTorch. * A bit gross because I took the chance to upgrade all of the backend bits to the new MLIR Python bindings and we still co-mingle the old and new for now. * Since the Python created PassManagers are configured for explicit nesting, I had to upgrade some of the pass pipelines to be explicit. * The demo in mul_maximum_e2e.py now compiles, runs through PyTorch and through the JIT, prints and asserts the same results. * I am not claiming that this is the prettiest API in this patch: consider that this is just directly using low-level APIs and there should be an intervening high level API. 2020-11-11 13:38:13 +08:00			`# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`# See https://llvm.org/LICENSE.txt for license information.`
			`# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`

			`import os`

Add cos_e2e.py, test_utils and support for tensor inputs (#134) 2020-11-25 11:02:50 +08:00			`import torch`

Repurpose numpy-compiler compiler/runtime flow for PyTorch. * A bit gross because I took the chance to upgrade all of the backend bits to the new MLIR Python bindings and we still co-mingle the old and new for now. * Since the Python created PassManagers are configured for explicit nesting, I had to upgrade some of the pass pipelines to be explicit. * The demo in mul_maximum_e2e.py now compiles, runs through PyTorch and through the JIT, prints and asserts the same results. * I am not claiming that this is the prettiest API in this patch: consider that this is just directly using low-level APIs and there should be an intervening high level API. 2020-11-11 13:38:13 +08:00			`from mlir.ir import *`
			`from mlir.passmanager import *`
			`from npcomp.compiler.generic.backend import refjit as refjit_backend`
			`from npcomp.compiler.utils import logging`

			`__all__ = [`
			`"is_enabled",`
			`"CompilerBackend",`
			`]`

Add ability to compile from object graph ir. 2021-03-20 05:08:04 +08:00			`# The set of passes that lowers from a TorchScript object graph representation`
			`# to a module semantics where symbols correspond to dotted paths into the`
			`# module.`
			`OBJECT_GRAPH_LOWERING_PASSES = (`
			`"torch-globalize-pipeline",`
			`# symbol-dce is currently needed for correctness, as we don't have a lowering`
			`# in the backend for torch.global_slot's.`
			`# Torch usually inserts a few unused global slots that are otherwise`
			`# bothersome because we don't currently have a lowering for them.`
			`# TODO: Support global slots in backends.`
			`"symbol-dce",`
Add torch-adjust-calling-conventions pass. This pass incorporates torch.type_bound info and also removes NoneType returns (eventually it will rewrite tuple types too, but can't yet because !basicpy.TupleType doesn't track element types). Recommend looking at adjust-calling-conventions.mlir first to see what it is doing, and holding your nose for the implementation of the pass. I decided to implement this with the conversion framework, because it gives us some goodies for type conversion -- mainly avoiding large amounts of tricky RAUW dances. Unfortunately, the conversion framework isn't a perfect fit for a couple reasons: - the incorporation of torch.type_bound is a context-sensitive rewrite (requires looking at the arg attr, not just the type). - NoneType conversion is 1->0, which requires some special handling - (not implemented yet) 1->N tuple type conversions require special handling. It's a little bit scary, but on balance doing it the other way would have its own downsides. 2021-04-02 08:36:18 +08:00			`"torch-adjust-calling-conventions",`
Add ability to compile from object graph ir. 2021-03-20 05:08:04 +08:00			`)`

Make pytorch/backend/refjit.py a bit tidier - Print out initial PyTorch IR. - Rename ambiguous "frontend IR" to "TCF IR". - Add newlines to prints - Rename FRONTEND_PASSES to TORCH_TO_TCF_PASSES 2020-11-21 07:07:34 +08:00			`TORCH_TO_TCF_PASSES = (`
			`"func(aten-recognize-kernels)",`
			`"func(convert-aten-to-tcf)",`
			`"numpy-public-functions-to-tensor",`
			`"canonicalize",`
			`)`
Repurpose numpy-compiler compiler/runtime flow for PyTorch. * A bit gross because I took the chance to upgrade all of the backend bits to the new MLIR Python bindings and we still co-mingle the old and new for now. * Since the Python created PassManagers are configured for explicit nesting, I had to upgrade some of the pass pipelines to be explicit. * The demo in mul_maximum_e2e.py now compiles, runs through PyTorch and through the JIT, prints and asserts the same results. * I am not claiming that this is the prettiest API in this patch: consider that this is just directly using low-level APIs and there should be an intervening high level API. 2020-11-11 13:38:13 +08:00
			`# Re-export.`
			`is_enabled = refjit_backend.is_enabled`


Add cos_e2e.py, test_utils and support for tensor inputs (#134) 2020-11-25 11:02:50 +08:00			`class TorchJitModuleInvoker(refjit_backend.JitModuleInvoker):`
			`"""Allows torch.Tensor inputs to be passed to module invocations."""`

			`def __getitem__(self, function_name: str):`
			`numpy_invoke = super().__getitem__(function_name)`

			`def invoke(*args):`
			`args = tuple(`
			`arg.numpy() if isinstance(arg, torch.Tensor) else arg for arg in args)`
			`return numpy_invoke(*args)`

			`return invoke`


Repurpose numpy-compiler compiler/runtime flow for PyTorch. * A bit gross because I took the chance to upgrade all of the backend bits to the new MLIR Python bindings and we still co-mingle the old and new for now. * Since the Python created PassManagers are configured for explicit nesting, I had to upgrade some of the pass pipelines to be explicit. * The demo in mul_maximum_e2e.py now compiles, runs through PyTorch and through the JIT, prints and asserts the same results. * I am not claiming that this is the prettiest API in this patch: consider that this is just directly using low-level APIs and there should be an intervening high level API. 2020-11-11 13:38:13 +08:00			`class CompilerBackend:`
			`"""Main entry-point for the backend."""`

			`def __init__(self):`
			`super().__init__()`
			`self._refjit = refjit_backend.get_refjit()`
			`self._debug = logging.debug_enabled()`

			`def compile(self, imported_module: Module):`
Add ability to compile from object graph ir. 2021-03-20 05:08:04 +08:00			`"""Compiles an imported module, with a flat list of functions.`
Repurpose numpy-compiler compiler/runtime flow for PyTorch. * A bit gross because I took the chance to upgrade all of the backend bits to the new MLIR Python bindings and we still co-mingle the old and new for now. * Since the Python created PassManagers are configured for explicit nesting, I had to upgrade some of the pass pipelines to be explicit. * The demo in mul_maximum_e2e.py now compiles, runs through PyTorch and through the JIT, prints and asserts the same results. * I am not claiming that this is the prettiest API in this patch: consider that this is just directly using low-level APIs and there should be an intervening high level API. 2020-11-11 13:38:13 +08:00
			`Args:`
			`imported_module: The MLIR module consisting of funcs in the torch`
			`dialect.`
			`Returns:`
			`An opaque, backend specific module object that can be passed to load.`
			`The object may actually be something more specific to the backend (i.e.`
			`for IREE, it is a serialized VM flatbuffer) but the contract is that`
			`it is operated on by methods on this class.`
			`"""`
Add support for "trailing_" and "out" variants of various ops. We already had the `promoteTrailingOutTensor` flag, but weren't using it. A inplaceVariantKernelName flag needed to be added. This change is a little dissatisfying, as the conversions done by the RecognizeKernelsPass are currently non-orthogonal. In particular, `kDropResultAndAliasArg0` probably won't work as intended if mixed with these (we probably need to promote kDropResultAndAliasArg0 to not be an arg-level thing anyway, as we have done with promoteTrailingOutTensor). This involved adding a new op `numpy.overwrite_array`. ``` numpy.overwrite_array %arg2 overwrites %arg0 : tensor<2x3xf32>, !numpy.ndarray<[2,3]:f32> ``` This models the destructive update behavior. Note that in the above op, we cannot simply RAUW %arg0 with a suitably conveted %arg2 (for example, %arg0 might have uses that are not dominated by %arg2, or might have an alias relation with some other array in the program). In general, we need a pass analogous to "SSA-formation" which knows how to see through these to uncover an underlying tensor program. Also, add tanh_out_e2e.py/div_inplace_e2e.py and fix some bitrot in refjit.py which is my running example I'm trying to get working. 2021-03-19 04:13:40 +08:00			`with imported_module.context as context:`
Make pytorch/backend/refjit.py a bit tidier - Print out initial PyTorch IR. - Rename ambiguous "frontend IR" to "TCF IR". - Add newlines to prints - Rename FRONTEND_PASSES to TORCH_TO_TCF_PASSES 2020-11-21 07:07:34 +08:00			`if self._debug:`
			`logging.debug("Initial PyTorch IR:\n{}", imported_module)`

Repurpose numpy-compiler compiler/runtime flow for PyTorch. * A bit gross because I took the chance to upgrade all of the backend bits to the new MLIR Python bindings and we still co-mingle the old and new for now. * Since the Python created PassManagers are configured for explicit nesting, I had to upgrade some of the pass pipelines to be explicit. * The demo in mul_maximum_e2e.py now compiles, runs through PyTorch and through the JIT, prints and asserts the same results. * I am not claiming that this is the prettiest API in this patch: consider that this is just directly using low-level APIs and there should be an intervening high level API. 2020-11-11 13:38:13 +08:00			`# Frontend.`
Add support for "trailing_" and "out" variants of various ops. We already had the `promoteTrailingOutTensor` flag, but weren't using it. A inplaceVariantKernelName flag needed to be added. This change is a little dissatisfying, as the conversions done by the RecognizeKernelsPass are currently non-orthogonal. In particular, `kDropResultAndAliasArg0` probably won't work as intended if mixed with these (we probably need to promote kDropResultAndAliasArg0 to not be an arg-level thing anyway, as we have done with promoteTrailingOutTensor). This involved adding a new op `numpy.overwrite_array`. ``` numpy.overwrite_array %arg2 overwrites %arg0 : tensor<2x3xf32>, !numpy.ndarray<[2,3]:f32> ``` This models the destructive update behavior. Note that in the above op, we cannot simply RAUW %arg0 with a suitably conveted %arg2 (for example, %arg0 might have uses that are not dominated by %arg2, or might have an alias relation with some other array in the program). In general, we need a pass analogous to "SSA-formation" which knows how to see through these to uncover an underlying tensor program. Also, add tanh_out_e2e.py/div_inplace_e2e.py and fix some bitrot in refjit.py which is my running example I'm trying to get working. 2021-03-19 04:13:40 +08:00			`pipeline_str = ",".join(TORCH_TO_TCF_PASSES)`
			`if self._debug:`
			`logging.debug("Running Torch->TCF pipeline '{}'", pipeline_str)`
			`pm = PassManager.parse(pipeline_str)`
Repurpose numpy-compiler compiler/runtime flow for PyTorch. * A bit gross because I took the chance to upgrade all of the backend bits to the new MLIR Python bindings and we still co-mingle the old and new for now. * Since the Python created PassManagers are configured for explicit nesting, I had to upgrade some of the pass pipelines to be explicit. * The demo in mul_maximum_e2e.py now compiles, runs through PyTorch and through the JIT, prints and asserts the same results. * I am not claiming that this is the prettiest API in this patch: consider that this is just directly using low-level APIs and there should be an intervening high level API. 2020-11-11 13:38:13 +08:00			`pm.run(imported_module)`
			`if self._debug:`
Make pytorch/backend/refjit.py a bit tidier - Print out initial PyTorch IR. - Rename ambiguous "frontend IR" to "TCF IR". - Add newlines to prints - Rename FRONTEND_PASSES to TORCH_TO_TCF_PASSES 2020-11-21 07:07:34 +08:00			`logging.debug("TCF IR:\n{}", imported_module)`
Repurpose numpy-compiler compiler/runtime flow for PyTorch. * A bit gross because I took the chance to upgrade all of the backend bits to the new MLIR Python bindings and we still co-mingle the old and new for now. * Since the Python created PassManagers are configured for explicit nesting, I had to upgrade some of the pass pipelines to be explicit. * The demo in mul_maximum_e2e.py now compiles, runs through PyTorch and through the JIT, prints and asserts the same results. * I am not claiming that this is the prettiest API in this patch: consider that this is just directly using low-level APIs and there should be an intervening high level API. 2020-11-11 13:38:13 +08:00
			`# Backend.`
			`# Note that this is a separate pass manager purely to aid in debugging.`
			`pm = PassManager()`
			`self._refjit.build_backend_compilation_pipeline(pm)`
			`pm.run(imported_module)`
			`if self._debug:`
Make pytorch/backend/refjit.py a bit tidier - Print out initial PyTorch IR. - Rename ambiguous "frontend IR" to "TCF IR". - Add newlines to prints - Rename FRONTEND_PASSES to TORCH_TO_TCF_PASSES 2020-11-21 07:07:34 +08:00			`logging.debug("Backend IR:\n{}", imported_module)`
Repurpose numpy-compiler compiler/runtime flow for PyTorch. * A bit gross because I took the chance to upgrade all of the backend bits to the new MLIR Python bindings and we still co-mingle the old and new for now. * Since the Python created PassManagers are configured for explicit nesting, I had to upgrade some of the pass pipelines to be explicit. * The demo in mul_maximum_e2e.py now compiles, runs through PyTorch and through the JIT, prints and asserts the same results. * I am not claiming that this is the prettiest API in this patch: consider that this is just directly using low-level APIs and there should be an intervening high level API. 2020-11-11 13:38:13 +08:00
			`jit_module = self._refjit.JITModule.from_compiled_module(`
			`imported_module, refjit_backend.get_runtime_libs())`
			`return jit_module`

Add ability to compile from object graph ir. 2021-03-20 05:08:04 +08:00			`def compile_object_graph(self, imported_module: Module):`
			`"""Compiles an imported module, with TorchScript object graph semantics.`

			`Args:`
			`imported_module: The MLIR module consisting of IR as imported by the`
			`torch_mlir.import_module`
			`Returns:`
			`An opaque, backend specific module object that can be passed to load.`
			`The object may actually be something more specific to the backend (i.e.`
			`for IREE, it is a serialized VM flatbuffer) but the contract is that`
			`it is operated on by methods on this class.`
			`"""`
			`with imported_module.context as context:`
			`if self._debug:`
			`logging.debug("Initial PyTorch object graph IR:\n{}", imported_module)`

			`# Frontend.`
			`pipeline_str = ",".join(OBJECT_GRAPH_LOWERING_PASSES)`
			`if self._debug:`
			`logging.debug(`
			`"Running Torch object graph lowering pipeline '{}'", pipeline_str)`
			`pm = PassManager.parse(pipeline_str)`
			`pm.run(imported_module)`
			`return self.compile(imported_module)`

Add cos_e2e.py, test_utils and support for tensor inputs (#134) 2020-11-25 11:02:50 +08:00			`def load(self, jit_module) -> TorchJitModuleInvoker:`
			`"""Loads a compiled artifact into the runtime."""`
			`return TorchJitModuleInvoker(jit_module)`