[custom op] Generalize shape library logic to work with dtypes (#1594)

* [custom op] Generalize shape library logic to work with dtypes This commit generalizes the shape library logic, so that dtype rules for ops can also be expressed using the same mechanism. In other words, each op can now have a shape function and a dtype function specified in Python that is imported during lowering to calculate the shapes and dtypes throught a program. For more information about how to specify a dtype function, see the updated `docs/adding_a_shape_and_dtype_function.md`. For those not familiar with how the shape library works, the file `docs/calculations_lib.md` provides an overview.
2022-12-13 08:25:41 -08:00 · 2022-12-13 08:25:41 -08:00 · a710237437
parent 2acf7da63c
commit a710237437
45 changed files with 3739 additions and 2312 deletions
--- a/.github/workflows/RollPyTorch.yml
+++ b/.github/workflows/RollPyTorch.yml
@ -61,14 +61,14 @@ jobs:
        echo "PT_RELEASE=${PT_RELEASE}" >> ${GITHUB_ENV}
        echo "PT_HASH_CHANGED=${PT_HASH_CHANGED}" >> ${GITHUB_ENV}

-    - name: Build and test (in-tree), also update ODS and shape library
+    - name: Build and test (in-tree), also update ODS and abstract interpretation library
      if: env.PT_HASH_CHANGED != '0'
      run: |
        cd ${GITHUB_WORKSPACE}
        TM_PACKAGES="in-tree" TM_USE_PYTORCH_BINARY="OFF" \
        TORCH_MLIR_SRC_PYTORCH_BRANCH="${{ env.PT_HASH }}" \
        TORCH_MLIR_SRC_PYTORCH_RELEASE="${{ env.PT_RELEASE }}" \
-        TM_UPDATE_ODS_AND_SHAPE_LIB="ON" \
+        TM_UPDATE_ODS_AND_ABSTRACT_INTERP_LIB="ON" \
        ./build_tools/python_deploy/build_linux_packages.sh

    - name: Push changes to main branch
@ -79,7 +79,7 @@ jobs:
        git config user.name "Roll PyTorch Action"
        git fetch --recurse-submodules=no
        git checkout main
-        git add pytorch-hash.txt pytorch-requirements.txt lib/Dialect/Torch/Transforms/ShapeLibrary.cpp include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
+        git add pytorch-hash.txt pytorch-requirements.txt lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
        git diff --cached --exit-code || (git commit -m "update PyTorch version to ${{ env.PT_RELEASE }}" && git push --set-upstream origin main)

    - name: Update PyTorch Build Cache (if running on main branch)
--- a/build_tools/python_deploy/build_linux_packages.sh
+++ b/build_tools/python_deploy/build_linux_packages.sh
@ -53,8 +53,8 @@ TM_PACKAGES="${TM_PACKAGES:-torch-mlir}"
 TM_USE_PYTORCH_BINARY="${TM_USE_PYTORCH_BINARY:-ON}"
 # Skip running tests if you want quick iteration
 TM_SKIP_TESTS="${TM_SKIP_TESTS:-OFF}"
-# Update ODS and shape library files
-TM_UPDATE_ODS_AND_SHAPE_LIB="${TM_UPDATE_ODS_AND_SHAPE_LIB:-OFF}"
+# Update ODS and abstract interpretation library files
+TM_UPDATE_ODS_AND_ABSTRACT_INTERP_LIB="${TM_UPDATE_ODS_AND_ABSTRACT_INTERP_LIB:-OFF}"

 PKG_VER_FILE="${repo_root}"/torch_mlir_package_version ; [ -f "$PKG_VER_FILE" ] && . "$PKG_VER_FILE"
 TORCH_MLIR_PYTHON_PACKAGE_VERSION="${TORCH_MLIR_PYTHON_PACKAGE_VERSION:-0.0.1}"
@ -119,7 +119,7 @@ function run_on_host() {
    -e "TM_PYTHON_VERSIONS=${TM_PYTHON_VERSIONS}" \
    -e "TM_PACKAGES=${package}" \
    -e "TM_SKIP_TESTS=${TM_SKIP_TESTS}" \
-    -e "TM_UPDATE_ODS_AND_SHAPE_LIB=${TM_UPDATE_ODS_AND_SHAPE_LIB}" \
+    -e "TM_UPDATE_ODS_AND_ABSTRACT_INTERP_LIB=${TM_UPDATE_ODS_AND_ABSTRACT_INTERP_LIB}" \
    -e "TM_USE_PYTORCH_BINARY=${TM_USE_PYTORCH_BINARY}" \
    -e "TORCH_MLIR_SRC_PYTORCH_REPO=${TORCH_MLIR_SRC_PYTORCH_REPO}" \
    -e "TORCH_MLIR_SRC_PYTORCH_BRANCH=${TORCH_MLIR_SRC_PYTORCH_BRANCH}" \
@ -164,10 +164,10 @@ function run_in_docker() {
        in-tree)
          setup_venv "$python_version"
          build_in_tree "$TM_USE_PYTORCH_BINARY" "$python_version"
-          if [ "${TM_UPDATE_ODS_AND_SHAPE_LIB}" == "ON" ]; then
+          if [ "${TM_UPDATE_ODS_AND_ABSTRACT_INTERP_LIB}" == "ON" ]; then
            pushd /main_checkout/torch-mlir
            ./build_tools/update_torch_ods.sh
-            ./build_tools/update_shape_lib.sh
+            ./build_tools/update_abstract_interp_lib.sh
            popd
          fi
          if [ "${TM_SKIP_TESTS}" == "OFF" ]; then
@ -253,8 +253,8 @@ function test_in_tree() {
  cd /main_checkout/torch-mlir/
  export PYTHONPATH="/main_checkout/torch-mlir/build/tools/torch-mlir/python_packages/torch_mlir"

-  echo ":::: Check that update_shape_lib.sh has been run"
-  _check_file_not_changed_by ./build_tools/update_shape_lib.sh lib/Dialect/Torch/Transforms/ShapeLibrary.cpp
+  echo ":::: Check that update_abstract_interp_lib.sh has been run"
+  _check_file_not_changed_by ./build_tools/update_abstract_interp_lib.sh lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp

  echo ":::: Check that update_torch_ods.sh has been run"
  _check_file_not_changed_by ./build_tools/update_torch_ods.sh include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
--- a/build_tools/update_abstract_interp_lib.sh
+++ b/build_tools/update_abstract_interp_lib.sh
@ -1,5 +1,6 @@
 #!/bin/bash
-# Updates auto-generated shape library files for the `torch` dialect.
+# Updates auto-generated abstract interpretation library files for the
+# `torch` dialect.
 #
 # Environment variables:
 #   TORCH_MLIR_EXT_MODULES: comma-separated list of python module names
@ -41,6 +42,6 @@ if [ ! -z ${TORCH_MLIR_EXT_MODULES} ]; then
 fi

 PYTHONPATH="${pypath}" python \
-  -m torch_mlir.dialects.torch.importer.jit_ir.build_tools.shape_lib_gen \
+  -m torch_mlir.dialects.torch.importer.jit_ir.build_tools.abstract_interp_lib_gen \
  --pytorch_op_extensions=${ext_module:-""} \
  --torch_transforms_cpp_dir="${torch_transforms_cpp_dir}"
--- a/docs/abstract_interp_lib.md
+++ b/docs/abstract_interp_lib.md
@ -0,0 +1,130 @@
+# Torch-MLIR Abstract Interpretation Library Infrastructure
+
+## Overview
+
+The Torch-MLIR project has an infrastructure for maintaining a library of
+calculation functions for different Torch operators, which supply extra
+information such as result dtypes and shapes as well as decompositions. These
+functions are fully executable specifications of the shape/dtype/decomposition
+functions for each operator and can be authored and tested from Python for
+convenience. These are then brought into the compiler and can be manipulated /
+transformed for various purposes.  Additionally, in the case of shape functions,
+this effort is synergistic with upstream PyTorch efforts to maintain a library
+of shape functions.
+
+The two main use cases are:
+
+- Refinement / inference. The `torch-shape-refinement-pipeline` and
+  `torch-dtype-refinement-pipeline` pass pipelines orchestrate a series of
+  passes that use the available information in the program to further refine the
+  types in the program.
+
+- Error guard insertion for backends (Not Yet Implemented). The executable
+  functions can include error guards / assertions that abort the program in case
+  of invalid input (such as a matmul with a mismatching contracting dimension).
+
+## Architecture
+
+Functions are defined as TorchScript-able Python functions in
+`python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/abstract_interp_lib_gen.py`.
+The signatures of the functions are systematically derived from Torch JIT
+operator registry. Most shape functions are expected to reuse the upstream
+helper functions
+[`torch/jit/_shape_functions.py`](https://github.com/pytorch/pytorch/blob/279634f384662b7c3a9f8bf7ccc3a6afd2f05657/torch/jit/_shape_functions.py#L1),
+and any new shape functions should be added there.
+
+The `build_tools/update_abstract_interp_lib.sh` script invokes
+`abstract_interp_lib_gen.py` to generate an MLIR module containing the functions,
+which is currently embedded as a string literal in
+`lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp`.
+
+The function `StringRef mlir::torch::Torch::getAbstractInterpLibrary()` is
+available for use inside the compiler any time that the library is needed.
+
+## Shape and Dtype Refinement Pipeline Architecture
+
+One of the main services that Torch-MLIR provides for backends is to normalize
+all Torch frontends into a common form which includes tensor shapes and dtypes
+that are as precise as possible. This alleviates the need for backends to solve
+this problem themselves. This process of shape and dtype refinement is
+accomplished in Torch-MLIR through a pipeline of passes which uses the abstract
+interpretation library combined with abstract interpretation of the calculation
+functions to calculate shapes and dtypes that are as precise as possible.
+
+The pipeline works as follows:
+
+1. Calculations are reified. The `torch-reify-shape-calculations` and
+   `torch-reify-dtype-calculations` passes reify (i.e., materializes into the
+   IR) the functions for each op with a function in the calculation library. To
+   do this, the passes wrap those ops in a `torch.shape.calculate` or
+   `torch.dtype.calculate` op, respectively, which has two regions: 1) a body
+   with the op itself, and 2) the shape or dtype calculation, which calculates
+   the shapes or dtypes of the tensors yielded by the body.
+
+2. Simplifying the functions and propagating the shapes and dtypes. After the
+   functions are reified, we then attempt to "optimize hard enough" until the
+   shapes and dtypes yielded by the calculation regions become obvious in the IR.
+   Those results are propagated through the IR, which usually reveals more
+   opportunities for simplification.
+
+   a. After reification, the functions are just a loose collection of
+   functions, which are difficult to analyze. The first step is to inline them.
+
+   b. After inlining, the `torch-simplify-shape-calculations` and
+   `torch-simplify-dtype-calculations` passes are used to simplify the
+   calculations. These passes bring in a number of targeted canonicalization
+   patterns and folds, along with a few specific patterns such as unrolling
+   fixed-trip-count loops and abstractly interpreting list operations (an
+   example is turning a series of "append" operations into a list
+   literal). These passes also look at the values yielded by the calculation
+   regions, and if the resulting shape or dtype can be deduced by looking at the
+   IR (for example, the shape is the list literal `[1, 2, 3]`), it will refine
+   the types of the `torch.shape.calculate` and `torch.dtype.calculate`
+   ops. This usually yields more opportunities for simplification. This process
+   runs to a fixed-point.
+
+3. Dropping the calculations. Once all the types in the program have been
+   refined as much as possible, the ops that were originally wrapped in
+   `torch.shape.calculate` and `torch.dtype.calculate` are unwrapped by the
+   `torch-drop-abstract-interp-calculations` pass which drops the reified
+   calculations, leaving behind the shape and dtype refined program.
+
+Inferring precise shapes and dtypes often is needed for correctness by
+backends. That said, requiring "optimizing hard enough" for correctness is
+usually considered quite brittle in a compiler flow. In this case, the saving
+grace is that we are only optimizing the functions, which are authored by
+compiler developers (not users), and thus there is some give-and-take in terms
+of understanding the optimizable constructs while authoring the functions, or
+improving the optimizations to enable easier authoring. Some brittleness is
+likely to escape to users, unfortunately, since there will always be situations
+where, for example, a statically shaped program allows the shape functions to be
+simplified to a greater extent than in a dynamically shaped program (for
+example, if the shape function checks "is this dimension of size 1"). We hope
+that this is minimal.
+
+## Adding to the abstract interpretation library
+
+See [Adding a Shape and Dtype Function](adding_a_shape_and_dtype_function.md)
+for details on how to add a shape and dtype function for an operator.
+
+## Rationale
+
+### Use of full operator signatures
+
+The use of the full operator signature such as
+`def aten〇add〇Tensor(self: List[int], other: List[int], alpha: float = 1) -> List[int]:`
+for defining calculation functions is somewhat verbose and repetitive, especially when
+there are multiple identical functions. Upstream uses a map with key-value
+pairs like `"aten.add.Tensor": upstream_shape_functions.broadcast`, which is
+more compact and less repetitive in some ways (upstream also allows trailing
+arguments beyond those accepted by the shape function to be ignored, allowing
+further deduplication). The decision to do it the more verbose way in Torch-MLIR
+was based on the following goals:
+
+- To make the system very easy to debug and test.
+
+- To make the system maximally consistent between functions that are
+  implemented with the upstream shape helpers and the ones that are manually
+  written, which are still a fairly large and non-trivial set.
+
+- To make it as mechanical as possible to add a new function.
--- a/docs/adding_a_shape_function.md
+++ b/docs/adding_a_shape_function.md
@ -1,75 +0,0 @@
-# Adding a Shape Function
-
-## Overview
-
-As part of adding support for a Torch operator in Torch-MLIR, it is usually
-necessary to define a shape function so that the compiler can infer the shapes
-of result tensors for the operator. We use the [shape library](shape_lib.md) for this process.
-
-## Step-by-step guide
-
-We will use the example of adding support for the `torch.aten.tanh` op.
-
-1. First, you need to find the shape function signature for the operator you are
-   implementing a shape function for. This can be found in
-   `include/torch-mlir/Dialect/Torch/IR/JITOperatorRegistryDump.txt` generated
-   by the `build_tools/update_torch_ods.sh` script. That file is the "rosetta
-   stone" that allows translating between e.g. `torch.aten.tanh`, `AtenTanhOp`,
-   and the shape function signature
-   `def aten〇tanh(self: List[int]) -> List[int]:`. Note the use of `〇` as a
-   separator since `.` or `::` aren't legal in a Python identifier.
-
-2. Paste the shape function signature into `shape_lib_gen.py` in an appropriate
-   place (ideally near other functions with a similar shape function). Note that
-   `shape_lib_gen.py` will check that this signature is verbatim identical with
-   the one given in `JITOperatorRegistryDump.txt` -- this ensures that the shape
-   functions don't get outdated if Torch changes an operator signature.
-
-3. Fill in the body of the shape function. Ideally this will just be a call into
-   a helper function from
-   [`torch/jit/_shape_functions.py`](https://github.com/pytorch/pytorch/blob/279634f384662b7c3a9f8bf7ccc3a6afd2f05657/torch/jit/_shape_functions.py#L1).
-   But in general, you will need to write the shape function and test it (see
-   the comments about "Shape function testing infrastructure" in
-   `shape_lib_gen.py`). New shape functions should be added upstream following
-   the example of [this PR](https://github.com/pytorch/pytorch/pull/76889),
-   though it can be useful to iterate locally in `shape_lib_gen.py` first.
-
-4. Re-run the `build_tools/update_shape_lib.sh` script to update the shape
-   library. After this step happens, ideally everything "just works" and the
-   shape is now correctly inferred for the operator.
-
-## When things go wrong
-
-It is possible that the shape refinement pipeline (see
-[Shape Refinement Pipeline Architecture](shape_lib.md#shape-refinement-pipeline-architecture))
-is not able to infer the shape of a tensor with a given shape function. This
-usually means that there is something about the shape function which the
-optimizations in `torch-simplify-shape-functions`
-(`lib/Dialect/Torch/Transforms/SimplifyShapeCalculations.cpp`) cannot handle.
-
-To debug this, the overall goal is to pinpoint the IR construct that is not
-being simplified. This is usually accomplished by a combination of looking at
-the Python code for the shape function and the IR dumps. The best IR dump to
-look at varies, but frequently the IR dump right before `DropShapeCalculations`
-is the most useful, because it has already been simplified as much as possible,
-making it is easy to see what is blocking further simplification. Examples of
-issues you might see:
-
- You might find that there is a loop with a non-constant trip count, but based
-  on your understanding of the shape function, you would expect it to be
-  simplified to a constant trip count -- you can then look at the trip count
-  calculation and see if there is a missing fold or canonicalization.
-
- You might find that there is a list operation that is not currently understood
-  by the optimizations. You can then teach the optimizations about that
-  operation.
-
- You might find that there is an `Optional` value that you would expect to be
-  resolved to either a concrete value or `None`. You can then look at the calculation that produces the optional value and see what folds or canonicalizations are missing.
-
-See [this video](https://www.youtube.com/watch?v=E5epCJOtrf8) for general
-guidance on debugging Torch-MLIR.
-
-As a last resort, you can rewrite the shape function using constructs that
-`torch-simplify-shape-functions` can handle (look at other shape functions for
-examples, sometimes it requires writing things a little awkwardly).
--- a/docs/adding_abstract_interpretation_functions.md
+++ b/docs/adding_abstract_interpretation_functions.md
@ -0,0 +1,99 @@
+# Adding Abstract Interpretation Functions
+
+## Overview
+
+As part of adding support for a Torch operator in Torch-MLIR, it is usually
+necessary to define a shape and dtype function so that the compiler can infer
+the shapes and dtypes of result tensors for the operator. We use the 
+[abstract interpretation library](abstract_interp_lib.md) for this process.
+
+## Step-by-step guide
+
+We will use the example of adding support for the `torch.aten.tanh` op.
+
+1. First, you need to find the shape and dtype function signatures for
+   the operator you are implementing a functions for. This can be
+   found in
+   `include/torch-mlir/Dialect/Torch/IR/JITOperatorRegistryDump.txt`
+   generated by the `build_tools/update_torch_ods.sh` script. That
+   file is the "rosetta stone" that allows translating between
+   e.g. `torch.aten.tanh`, `AtenTanhOp`, and the shape and dtype
+   function signatures are:
+   
+   - `def aten〇tanh〡shape(self: List[int]) -> List[int]:`
+   - `def aten〇tanh〡dtype(self_rank: int, self_dtype: int) -> int:`
+
+   Note the use of `〇` as a separator since `.` or `::` aren't legal
+   in a Python identifier.
+
+2. Paste the function signature into `abstract_interp_lib_gen.py` in an
+   appropriate place (ideally near other functions with a similar
+   functions). Note that `abstract_interp_lib_gen.py` will check that
+   these signatures are verbatim identical with the ones given in
+   `JITOperatorRegistryDump.txt` -- this ensures that the functions
+   don't get outdated if Torch changes an operator signature.
+
+3. Fill in the body of the function. Ideally this will just be a call
+   into a helper function from
+   [`torch/jit/_shape_functions.py`](https://github.com/pytorch/pytorch/blob/279634f384662b7c3a9f8bf7ccc3a6afd2f05657/torch/jit/_shape_functions.py#L1).
+   But in general, you will need to write the function and test it
+   (see the comments about "Shape, dtype, and decomposition function
+   testing infrastructure" in `testing_framework.py`). New shape
+   functions should be added upstream following the example of [this PR](https://github.com/pytorch/pytorch/pull/76889), 
+   though it can be useful to iterate locally in `abstract_interp_lib_gen.py` 
+   first.
+   
+   Similarly, dtype functions should ideally just be a call to the helper
+   `promote_dtypes` defined in `library_generator.py`. However, some ops will
+   require some extra logic to calculate the right result types. While dtypes
+   are expressed as `int`s in the arguments of the dtype function, using PyTorch
+   dtypes, such as `torch.int` and `torch.float32`, in the body of the dtype
+   function is fully supported. Dtype functions are also expected to be fully
+   tested.
+
+4. Re-run the `build_tools/update_abstract_interp_lib.sh` script to
+   update the library. After this step happens, ideally everything
+   "just works" and the functions are now correctly inferred for the
+   operator.
+
+## When things go wrong
+
+It is possible that the refinement pipeline (see [Shape and Dtype Refinement Pipeline Architecture](abstract_interp_lib.md#shape-and-dtype-refinement-pipeline-architecture))
+is not able to infer the shape or dtype of a tensor with a given
+abstract interpretation function. This usually means that there is something
+about the function which the optimizations in
+`torch-simplify-shape-functions` and `torch-simplify-dtype-functions`
+(`lib/Dialect/Torch/Transforms/SimplifyShapeCalculations.cpp`,
+`lib/Dialect/Torch/Transforms/SimplifyDtypeCalculations.cpp`)
+cannot handle.
+
+To debug this, the overall goal is to pinpoint the IR construct that is not
+being simplified. This is usually accomplished by a combination of looking at
+the Python code for the function and the IR dumps. The best IR dump to look at
+varies, but frequently the IR dump right before `DropAbstractInterpCalculations`
+is the most useful, because it has already been simplified as much as possible,
+making it is easy to see what is blocking further simplification. Examples of
+issues you might see:
+
+- You might find that there is a loop with a non-constant trip count,
+  but based on your understanding of the function, you would expect it
+  to be simplified to a constant trip count -- you can then look at
+  the trip count calculation and see if there is a missing fold or
+  canonicalization.
+
+- You might find that there is a list operation that is not currently understood
+  by the optimizations. You can then teach the optimizations about that
+  operation.
+
+- You might find that there is an `Optional` value that you would
+  expect to be resolved to either a concrete value or `None`. You can
+  then look at the calculation that produces the optional value and
+  see what folds or canonicalizations are missing.
+
+See [this video](https://www.youtube.com/watch?v=E5epCJOtrf8) for general
+guidance on debugging Torch-MLIR.
+
+As a last resort, you can rewrite the function using constructs that
+`torch-simplify-shape-functions` and `torch-simplify-dtype-functions` can handle
+(look at other functions for examples, sometimes it requires writing things a
+little awkwardly).
--- a/docs/development.md
+++ b/docs/development.md
@ -446,10 +446,12 @@ LLVM updates in other projects like ONNX-MLIR and MLIR-HLO.
   working on).  If these fixes are too complex, please file a work-in-progress
   PR explaining the issues you are running into asking for help so that someone
   from the community can help.
-5. **Update Shape Library**: Run `build_tools/update_shape_lib.sh`. This is
-   sometimes needed because upstream changes can affect canonicalization and
-   other minor details of the IR in the shape library. See
-   [docs/shape_lib.md](docs/shape_lib.md) for more details on the shape library.
+5. **Update Abstract Interpretation Library**: Run
+   `build_tools/update_abstract_interp_lib.sh`.  This is sometimes needed
+   because upstream changes can affect canonicalization and other minor details
+   of the IR in the shape library. See
+   [docs/abstract_interp_lib.md](docs/abstract_interp_lib.md) for more details
+   on the abstract interpretation library.


 Here are some examples of PRs updating the LLVM and MLIR-HLO submodules:
--- a/docs/shape_lib.md
+++ b/docs/shape_lib.md
@ -1,121 +0,0 @@
-# Torch-MLIR Shape Library Infrastructure
-
-## Overview
-
-The Torch-MLIR project has an infrastructure for maintaining a library of shape
-functions for different Torch operators. These shape functions are fully
-executable specifications of the shape functions for each operator and can be
-authored and tested from Python for convenience. These are then brought into the
-compiler and can be manipulated / transformed for various purposes.
-Additionally, this effort is synergistic with upstream PyTorch efforts to
-maintain a library of shape functions.
-
-The two main use cases are:
-
- Shape refinement / shape inference. The `torch-shape-refinement-pipeline` pass
-  pipeline orchestrates a series of passes that use the available shape information in the program to further refine the types in the program.
-
- Error guard insertion for backends (Not Yet Implemented). The executable shape
-  functions can include error guards / assertions that abort the program in case
-  of invalid input (such as a matmul with a mismatching contracting dimension).
-
-## Architecture
-
-Shape functions are defined as TorchScript-able Python functions in
-`python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/shape_lib_gen.py`.
-The signatures of the shape functions are systematically derived from Torch JIT
-operator registry (mainly by replacing `Tensor` with `List[int]` in the operator
-signatures). Most shape functions are expected to reuse the upstream helper
-functions [`torch/jit/_shape_functions.py`](https://github.com/pytorch/pytorch/blob/279634f384662b7c3a9f8bf7ccc3a6afd2f05657/torch/jit/_shape_functions.py#L1),
-and any new shape functions should be added there.
-
-The `build_tools/update_shape_lib.sh` script invokes `shape_lib_gen.py` to
-generate an MLIR module containing the shape functions, which is currently
-embedded as a string literal in `lib/Dialect/Torch/Transforms/ShapeLibrary.cpp`.
-
-The function `StringRef mlir::torch::Torch::getShapeLibrary()` is available for
-use inside the compiler any time that the shape library is needed.
-
-## Shape Refinement Pipeline Architecture
-
-One of the main services that Torch-MLIR provides for backends is to normalize
-all Torch frontends into a common form which includes tensor shapes that are as
-precise as possible. This alleviates the need for backends to solve this problem
-themselves. This process of shape refinement is accomplished in Torch-MLIR
-through a pipeline of passes which uses the shape library combined with abstract
-interpretation of the shape functions to calculate shapes that are as precise as
-possible.
-
-The pipeline works as follows:
-
-1. Shape calculations are reified. The `torch-reify-shape-calculations` reifies
-   (i.e., materializes into the IR) the shape functions for each op with a shape
-   function in the shape library. To do this, it wraps those ops in a
-   `torch.shape.calculate` op, which has two regions: 1) a body with the op
-   itself, and 2) the shape calculation, which calculates the shapes of the
-   tensors yielded by the body.
-
-2. Simplifying the shape functions and propagating the shapes. After the shape
-   functions are reified, we then attempt to "optimize hard enough" until the
-   shapes yielded by the shape calculation regions become obvious in the IR.
-   Those shapes are propagated through the IR, which usually reveals more
-   opportunities for simplification.
-
-   a. After reification, the shape functions are just a loose collection of
-   functions, which are difficult to analyze. The first step is to inline them.
-
-   b. After inlining, the `torch-simplify-shape-calculations` pass is used to
-   simplify the shape calculations. This pass brings in a number of targeted
-   canonicalization patterns and folds, along with a few specific patterns such
-   as unrolling fixed-trip-count loops and abstractly interpreting list
-   operations (an example is turning a series of "append" operations into a list
-   literal). This pass also looks at the values yielded by the shape calculation
-   regions, and if the resulting shape can be deduced by looking at the IR (for
-   example, the shape is the list literal `[1, 2, 3]`), it will refine the types
-   of the `torch.shape.calculate` op. This usually yields more opportunities for
-   simplification. This process runs to a fixed-point.
-
-3. Dropping the shape calculations. Once all the types in the program have been
-   refined as much as possible, the ops that were originally wrapped in
-   `torch.shape.calculate` are unwrapped by the `torch-drop-shape-calculations`
-   pass which drops the reified shape calculations, leaving behind the shape-refined program.
-
-Inferring precise shape often is needed for correctness by backends. That said,
-requiring "optimizing hard enough" for correctness is usually considered quite
-brittle in a compiler flow. In this case, the saving grace is that we are only
-optimizing the shape functions, which are authored by compiler developers (not
-users), and thus there is some give-and-take in terms of understanding the
-optimizable constructs while authoring the shape functions, or improving the
-optimizations to enable easier authoring. Some brittleness is likely to escape
-to users, unfortunately, since there will always be situations where, for
-example, a statically shaped program allows the shape functions to be simplified
-to a greater extent than in a dynamically shaped program (for example, if the
-shape function checks "is this dimension of size 1"). We hope that this is
-minimal.
-
-## Adding to the shape library
-
-See [Adding a Shape Function](adding_a_shape_function.md) for details on how to
-add a shpae function for an operator.
-
-## Rationale
-
-### Use of full operator signatures
-
-The use of the full operator signature such as
-`def aten〇add〇Tensor(self: List[int], other: List[int], alpha: float = 1) -> List[int]:`
-for defining shape functions is somewhat verbose and repetitive, especially when
-there are multiple identical shape functions. Upstream uses a map with key-value
-pairs like `"aten.add.Tensor": upstream_shape_functions.broadcast`, which is
-more compact and less repetitive in some ways (upstream also allows trailing
-arguments beyond those accepted by the shape function to be ignored, allowing
-further deduplication). The decision to do it the more verbose way in Torch-MLIR
-was based on the following goals:
-
- To make the system very easy to debug and test.
-
- To make the system maximally consistent between shape functions that are
-  implemented with the upstream shape helpers and the ones that are manually
-  written, which are still a fairly large and non-trivial set.
-
- To make it as mechanical as possible to add a new shape function.
--- a/include/torch-mlir/Dialect/Torch/IR/TorchOps.h
+++ b/include/torch-mlir/Dialect/Torch/IR/TorchOps.h
@ -171,6 +171,42 @@ m_TorchListOfConstantInts(SmallVectorImpl<int64_t> &bind_values) {
  return detail::torch_list_of_constant_ints_op_binder(bind_values);
 }

+namespace detail {
+/// Matches the optional constant integers stored in a `torch.ListConstruct`.
+struct torch_list_of_optional_constant_ints_op_binder {
+  SmallVectorImpl<Optional<int64_t>> &bind_values;
+
+  /// Creates a matcher instance that binds the value to bvs if match succeeds.
+  torch_list_of_optional_constant_ints_op_binder(
+      SmallVectorImpl<Optional<int64_t>> &bvs)
+      : bind_values(bvs) {}
+
+  bool match(Operation *op) {
+    auto listConstruct = dyn_cast<Torch::PrimListConstructOp>(op);
+    if (!listConstruct)
+      return false;
+    for (Value value : listConstruct.getElements()) {
+      int64_t num;
+      if (matchPattern(value, m_TorchConstantInt(&num)))
+        bind_values.push_back(num);
+      else if (value.getType().isa<Torch::NoneType>())
+        bind_values.push_back(llvm::None);
+      else
+        return false;
+    }
+    return true;
+  }
+};
+} // namespace detail
+
+/// Matches the optional constant integers stored in a
+/// `torch.prim.ListConstruct`.
+inline detail::torch_list_of_optional_constant_ints_op_binder
+m_TorchListOfOptionalConstantInts(
+    SmallVectorImpl<Optional<int64_t>> &bind_values) {
+  return detail::torch_list_of_optional_constant_ints_op_binder(bind_values);
+}
+
 namespace detail {
 /// Matches the constant bools stored in a `torch.ListConstruct`.
 struct torch_list_of_constant_bools_op_binder {
--- a/include/torch-mlir/Dialect/Torch/IR/TorchOps.td
+++ b/include/torch-mlir/Dialect/Torch/IR/TorchOps.td
@ -1101,6 +1101,40 @@ def Torch_ValsemVariantAtenBernoulliFloatOp: Torch_Op<"valsem.aten.bernoulli.flo
  let assemblyFormat = "$self `,` $p `,` $generator attr-dict `:` type($self) `,` type($p) `,` type($generator) `->` type($result)";
 }

+def Torch_PromoteDtypesOp: Torch_Op<"promote_dtypes", [
+    AllowsTypeRefinement,
+    HasValueSemantics,
+    ReadOnly
+  ]> {
+  let summary = "`promote_dtypes op : (int?[], int[]) -> (int)`";
+  let description = [{
+    This op is generated when the python function
+    `__torch_mlir_internal_promote_dtypes` is used in a dtype refinement
+    function. It represents the type promotion logic used by PyTorch to
+    determine result types.
+
+    The first argument is a list of optional ranks for each of the inputs
+    being used for promotion. The ranks are optional to allow representing
+    `Scalar` inputs, which follow their own set of promotion rules.
+
+    The second argument is a list of dtypes for each of the inputs being used
+    for promotion.
+
+    The order of the values in each list must be the same. In other words,
+    the ith rank and the ith dtype must be from the same Scalar/Tensor.
+
+    It is an error to call this op with empty lists or lists of different size.
+  }];
+  let arguments = (ins
+    AnyTorchListOfOptionalIntType:$ranks,
+    AnyTorchListOfTorchIntType:$dtypes
+  );
+  let results = (outs
+    Torch_IntType:$result
+  );
+  let assemblyFormat = "$ranks `,` $dtypes attr-dict `:` functional-type(operands, results)";
+}
+
 // To handle runtime assertions, torchscript provides us `torch._assert` operation. 
 // But TS compiler introduces control flow for `torch._assert` operation. The 
 // `torch._assert` would introduce control flow like: 
@ -1140,31 +1174,31 @@ def Torch_ShapeCalculateOp : Torch_Op<"shape.calculate", [
  let summary = "Shape calculation encapsulation op";
  let description = [{
    The `torch.shape.calculate` op captures a shape calculation
-    (in the region `shapeCalculation`) which calculates the shapes for
+    (in the region `calculation`) which calculates the shapes for
    the set of values yielded by the `body` region.

-    The `shapeCalculation` region yields a `!torch.list<int>` for each
+    The `calculation` region yields a `!torch.list<int>` for each
    value yielded by the `body` region.

-    Conceptually, the `shapeCalculation` region executes first, then `body`
-    region. So the `shapeCalculation` region can also contain arbitrary
+    Conceptually, the `calculation` region executes first, then `body`
+    region. So the `calculation` region can also contain arbitrary
    assertions or side-effecting code which guard the validity of the execution
    of the body (typically by terminating the program with a
    torch.prim.RaiseException op).

    The program has undefined behavior if the values yielded by the `body`
-    region do not have the shapes yielded by the `shapeCalculation` region.
+    region do not have the shapes yielded by the `calculation` region.
  }];

  let arguments = (ins);
  let results = (outs Variadic<AnyTorchType>:$results);
  let regions = (region
    SizedRegion<1>:$body,
-    SizedRegion<1>:$shapeCalculation
+    SizedRegion<1>:$calculation
  );

  let assemblyFormat = [{
-    $body `shapes` $shapeCalculation attr-dict `:` type($results)
+    $body `shapes` $calculation attr-dict `:` type($results)
  }];
 }

@ -1211,4 +1245,84 @@ def Torch_ShapeCalculateYieldShapesOp : Torch_Op<"shape.calculate.yield.shapes",
  let hasVerifier = 1;
 }

+//===----------------------------------------------------------------------===//
+// Dtype calculation modeling ops.
+//===----------------------------------------------------------------------===//
+
+def Torch_DtypeCalculateOp : Torch_Op<"dtype.calculate", [
+  DeclareOpInterfaceMethods<RegionBranchOpInterface>]> {
+  let summary = "Dtype calculation encapsulation op";
+  let description = [{
+    The `torch.dtype.calculate` op captures a dtype calculation
+    (in the region `calculation`) which calculates the dtypes for
+    the set of values yielded by the `body` region.
+
+    The `calculation` region yields a `!torch.int` for each
+    value yielded by the `body` region.
+
+    Conceptually, the `calculation` region executes first, then `body`
+    region. So the `calculation` region can also contain arbitrary
+    assertions or side-effecting code which guard the validity of the execution
+    of the body (typically by terminating the program with a
+    torch.prim.RaiseException op).
+
+    The program has undefined behavior if the values yielded by the `body`
+    region do not have the dtypes yielded by the `calculation` region.
+  }];
+
+  let arguments = (ins);
+  let results = (outs Variadic<AnyTorchType>:$results);
+  let regions = (region
+    SizedRegion<1>:$body,
+    SizedRegion<1>:$calculation
+  );
+
+  let assemblyFormat = [{
+    $body `dtypes` $calculation attr-dict `:` type($results)
+  }];
+}
+
+def Torch_DtypeCalculateYieldOp : Torch_Op<"dtype.calculate.yield", [
+    Terminator,
+    ReturnLike,
+    HasParent<"::mlir::torch::Torch::DtypeCalculateOp">]> {
+  let summary = "yield-like terminator for torch.dtype.calculate";
+  let description = [{
+    This op terminates the `body` region of a `torch.dtype.calculate` op.
+  }];
+
+  let arguments = (ins
+    Variadic<AnyTorchType>:$results
+  );
+  let results = (outs);
+
+  let assemblyFormat = [{
+    attr-dict ($results^ `:` type($results))?
+  }];
+}
+
+def Torch_DtypeCalculateYieldDtypesOp : Torch_Op<"dtype.calculate.yield.dtypes", [
+    DeclareOpInterfaceMethods<RegionBranchTerminatorOpInterface>,
+    Terminator,
+    HasValueSemantics,
+    ReadOnly,
+    HasParent<"::mlir::torch::Torch::DtypeCalculateOp">]> {
+  let summary = "yield-like terminator for torch.dtype.calculate shape region";
+  let description = [{
+    This op terminates the `dtypeCalculation` region of a
+    `torch.dtype.calculate` op.
+  }];
+
+  let arguments = (ins
+    Variadic<Torch_IntType>:$results
+  );
+  let results = (outs);
+
+  let assemblyFormat = [{
+    attr-dict ($results^ `:` type($results))?
+  }];
+
+  let hasVerifier = 1;
+}
+
 #endif // TORCH_OPS
--- a/include/torch-mlir/Dialect/Torch/IR/TorchTypes.td
+++ b/include/torch-mlir/Dialect/Torch/IR/TorchTypes.td
@ -390,6 +390,9 @@ def AnyTorchListOfTensorType:
 def AnyTorchListOfOptionalTensorType :
      ListOf<[AnyTorchOptionalTensorType],
             "Any optional tensor list type (Tensor?[])">;
+def AnyTorchListOfOptionalIntType :
+      ListOf<[AnyTorchOptionalIntType],
+             "List of optional ints type (int?[])">;
 def AnyTorchOptionalListOfTorchIntType : OptionalOf<AnyTorchListOfTorchIntType, "Optional torch int list type (int[]?)">;
 def AnyTorchOptionalListOfTorchFloatType : OptionalOf<AnyTorchListOfTorchFloatType, "Optional torch float list type (float[]?)">;
 // Note: TorchScript does not consider !torch.bool to be a Scalar.
--- a/include/torch-mlir/Dialect/Torch/Transforms/Passes.h
+++ b/include/torch-mlir/Dialect/Torch/Transforms/Passes.h
@ -80,6 +80,9 @@ void createTorchSimplificationPipeline(
 /// Creates a pipeline that refines shapes of tensor operations in the program.
 void createTorchShapeRefinementPipeline(OpPassManager &pm);

+/// Creates a pipeline that refines dtype of tensor operations in the program.
+void createTorchDtypeRefinementPipeline(OpPassManager &pm);
+
 std::unique_ptr<OperationPass<ModuleOp>> createAdjustCallingConventionsPass();

 std::unique_ptr<OperationPass<func::FuncOp>> createRefineTypesPass();
@ -102,7 +105,13 @@ std::unique_ptr<OperationPass<ModuleOp>> createReifyShapeCalculationsPass();
 std::unique_ptr<OperationPass<func::FuncOp>>
 createSimplifyShapeCalculationsPass();

-std::unique_ptr<OperationPass<func::FuncOp>> createDropShapeCalculationsPass();
+std::unique_ptr<OperationPass<ModuleOp>> createReifyDtypeCalculationsPass();
+
+std::unique_ptr<OperationPass<func::FuncOp>>
+createSimplifyDtypeCalculationsPass();
+
+std::unique_ptr<OperationPass<func::FuncOp>>
+createDropAbstractInterpCalculationsPass();

 std::unique_ptr<OperationPass<ModuleOp>>
 createEraseModuleInitializerPass();
@ -113,7 +122,7 @@ createLowerToBackendContractPass(int maxIterations, bool decompose,

 std::unique_ptr<OperationPass<ModuleOp>> createVerifyBackendContractPass();

-StringRef getShapeLibrary();
+StringRef getAbstractInterpLibrary();

 } // namespace Torch

--- a/include/torch-mlir/Dialect/Torch/Transforms/Passes.td
+++ b/include/torch-mlir/Dialect/Torch/Transforms/Passes.td
@ -239,7 +239,7 @@ def DecomposeComplexOps : Pass<"torch-decompose-complex-ops", "func::FuncOp"> {
 }

 def ReifyShapeCalculations : Pass<"torch-reify-shape-calculations", "ModuleOp"> {
-  let summary = "Decompose complicated torch operations";
+  let summary = "Reify shape calculations.";
  let constructor = "mlir::torch::Torch::createReifyShapeCalculationsPass()";
  let description = [{
  }];
@ -253,9 +253,23 @@ def SimplifyShapeCalculations : Pass<"torch-simplify-shape-calculations", "func:
  }];
 }

-def DropShapeCalculations : Pass<"torch-drop-shape-calculations", "func::FuncOp"> {
-  let summary = "Drop reified shape calculations.";
-  let constructor = "mlir::torch::Torch::createDropShapeCalculationsPass()";
+def ReifyDtypeCalculations : Pass<"torch-reify-dtype-calculations", "ModuleOp"> {
+  let summary = "Reify dtype calculations.";
+  let constructor = "mlir::torch::Torch::createReifyDtypeCalculationsPass()";
+  let description = [{
+  }];
+}
+
+def SimplifyDtypeCalculations : Pass<"torch-simplify-dtype-calculations", "func::FuncOp"> {
+  let summary = "Simplify reified dtype calculations.";
+  let constructor = "mlir::torch::Torch::createSimplifyDtypeCalculationsPass()";
+  let description = [{
+  }];
+}
+
+def DropAbstractInterpCalculations : Pass<"torch-drop-abstract-interp-calculations", "func::FuncOp"> {
+  let summary = "Drop reified abstract interpretation calculations.";
+  let constructor = "mlir::torch::Torch::createDropAbstractInterpCalculationsPass()";
  let description = [{
  }];
 }
--- a/include/torch-mlir/Dialect/Torch/Utils/Utils.h
+++ b/include/torch-mlir/Dialect/Torch/Utils/Utils.h
@ -11,7 +11,6 @@

 #include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/Value.h"
-#include "mlir/Support/LLVM.h"
 #include "torch-mlir/Dialect/Torch/Utils/TorchUpstream.h"

 namespace mlir {
@ -35,8 +34,23 @@ Type getTypeForTorchType(
    MLIRContext *context, Type type,
    mlir::IntegerType::SignednessSemantics signedness = IntegerType::Signed);

-Type getTorchTypeForScalarType(MLIRContext *context,
-                               torch_upstream::ScalarType dtypeInt);
+FailureOr<Type> getTorchTypeForScalarType(MLIRContext *context,
+                                          torch_upstream::ScalarType dtypeInt);
+
+// This is the type rule used for deciding dtype for:
+// 1. A new tensor created from given data.
+// 2. The scalar type for type promotion when a scalar is an operand of a tensor
+// operation (such as AtenMulScalarOp, AtenAddScalarOp etc)
+// If the data is floating-point, the `dtype` is inferred to be the
+// default dtype, see `torch.get_default_dtype`.
+Type getDefaultDtypeForTorchScalar(Type type);
+
+// This is the type rule used for deciding builtin type for:
+// 1. The dtype of the result tensor when converting a Scalar into a Tensor like
+// PrimNumToTensorScalarOp.
+// 2. The scalar type for type promotion when a scalar is an operand of scalar
+// only operation like AtenAddOp.
+Type getBuiltInTypeForTorchScalar(Type type);

 Value getDtypeIntValueForType(PatternRewriter &rewriter, Location loc,
                              Type dtype);
--- a/lib/Dialect/Torch/IR/TorchOps.cpp
+++ b/lib/Dialect/Torch/IR/TorchOps.cpp
@ -2294,24 +2294,40 @@ OpFoldResult PrimMinSelfIntOp::fold(ArrayRef<Attribute> operands) {
 // ShapeCalculateOp
 //===----------------------------------------------------------------------===//

-void ShapeCalculateOp::getSuccessorRegions(
-    Optional<unsigned> index, ArrayRef<Attribute> operands,
-    SmallVectorImpl<RegionSuccessor> &regions) {
-  (void)operands;
-
+template <typename CalculateOp>
+static void
+getSuccessorRegionsForCalculateOp(CalculateOp op, Optional<unsigned> index,
+                                  ArrayRef<Attribute> operands,
+                                  SmallVectorImpl<RegionSuccessor> &regions) {
  if (!index.has_value()) {
-    // First thing the op does is branch into the shape calculation.
-    regions.emplace_back(&getShapeCalculation());
+    // First thing the op does is branch into the calculation.
+    regions.emplace_back(&op.getCalculation());
    return;
  }
  if (*index == 0) {
    // Body returns control to the outer op, passing through results.
-    regions.emplace_back(getResults());
+    regions.emplace_back(op.getResults());
    return;
  }
  assert(*index == 1);
-  // Shape calculation branches to the body.
-  regions.emplace_back(&getBody());
+  // Calculation branches to the body.
+  regions.emplace_back(&op.getBody());
+}
+
+void ShapeCalculateOp::getSuccessorRegions(
+    Optional<unsigned> index, ArrayRef<Attribute> operands,
+    SmallVectorImpl<RegionSuccessor> &regions) {
+  getSuccessorRegionsForCalculateOp(*this, index, operands, regions);
+}
+
+//===----------------------------------------------------------------------===//
+// DtypeCalculateOp
+//===----------------------------------------------------------------------===//
+
+void DtypeCalculateOp::getSuccessorRegions(
+    Optional<unsigned> index, ArrayRef<Attribute> operands,
+    SmallVectorImpl<RegionSuccessor> &regions) {
+  getSuccessorRegionsForCalculateOp(*this, index, operands, regions);
 }

 //===----------------------------------------------------------------------===//
@ -2333,6 +2349,25 @@ LogicalResult ShapeCalculateYieldShapesOp::verify() {
  return success();
 }

+//===----------------------------------------------------------------------===//
+// DtypeCalculateYieldDtypesOp
+//===----------------------------------------------------------------------===//
+
+MutableOperandRange DtypeCalculateYieldDtypesOp::getMutableSuccessorOperands(
+    Optional<unsigned> index) {
+  // The dtype operands don't get forwarded to the body.
+  // MutableOperandRange always has an owning operation, even if empty, so
+  // create a 0-length range.
+  return MutableOperandRange(*this, /*start=*/0, /*length=*/0);
+}
+
+LogicalResult DtypeCalculateYieldDtypesOp::verify() {
+  auto parent = cast<DtypeCalculateOp>(getOperation()->getParentOp());
+  if (parent.getNumResults() != getNumOperands())
+    return emitOpError("expected number of dtypes to match number of results");
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // GlobalSlotModuleInitializerOp
 //===----------------------------------------------------------------------===//
--- a/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp
+++ b/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp
@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file is auto-generated! Do not edit!!!
-// Generated with the script `build_tools/update_shape_lib.sh`.
+// Generated with the script `build_tools/update_abstract_interp_lib.sh`.
 //
 //===----------------------------------------------------------------------===//

@ -16,7 +16,7 @@

 using namespace mlir;

-StringRef mlir::torch::Torch::getShapeLibrary() {
+StringRef mlir::torch::Torch::getAbstractInterpLibrary() {
 #ifndef _MSC_VER
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Woverlength-strings"
@ -5470,6 +5470,25 @@ StringRef mlir::torch::Torch::getShapeLibrary() {
 "    %0 = call @__torch__.torch.jit._shape_functions.unary(%arg0) : (!torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"
 "  }\n"
+"  func.func @\"__torch_mlir_dtype_fn.aten.tanh\"(%arg0: !torch.int, %arg1: !torch.int) -> !torch.int {\n"
+"    %int6 = torch.constant.int 6\n"
+"    %int15 = torch.constant.int 15\n"
+"    %true = torch.constant.bool true\n"
+"    %int7 = torch.constant.int 7\n"
+"    %0 = torch.aten.eq.int %arg1, %int7 : !torch.int, !torch.int -> !torch.bool\n"
+"    %1 = torch.prim.If %0 -> (!torch.bool) {\n"
+"      torch.prim.If.yield %true : !torch.bool\n"
+"    } else {\n"
+"      %3 = torch.aten.eq.int %arg1, %int15 : !torch.int, !torch.int -> !torch.bool\n"
+"      torch.prim.If.yield %3 : !torch.bool\n"
+"    }\n"
+"    %2 = torch.prim.If %1 -> (!torch.int) {\n"
+"      torch.prim.If.yield %arg1 : !torch.int\n"
+"    } else {\n"
+"      torch.prim.If.yield %int6 : !torch.int\n"
+"    }\n"
+"    return %2 : !torch.int\n"
+"  }\n"
 "  func.func @\"__torch_mlir_shape_fn.aten.erf\"(%arg0: !torch.list<int>) -> !torch.list<int> {\n"
 "    %0 = call @__torch__.torch.jit._shape_functions.unary(%arg0) : (!torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"
@ -5618,11 +5637,11 @@ StringRef mlir::torch::Torch::getShapeLibrary() {
 "    %0 = call @__torch__.torch.jit._shape_functions.unary(%arg0) : (!torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"
 "  }\n"
-"  func.func @\"__torch_mlir_shape_fn.aten.to.dtype\"(%arg0: !torch.list<int>, %arg1: !torch.int, %arg2: !torch.bool, %arg3: !torch.bool, %arg4: !torch.optional<int>) -> !torch.list<int> {\n"
+"  func.func @\"__torch_mlir_shape_fn.prims.convert_element_type\"(%arg0: !torch.list<int>, %arg1: !torch.int) -> !torch.list<int> {\n"
 "    %0 = call @__torch__.torch.jit._shape_functions.unary(%arg0) : (!torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"
 "  }\n"
-"  func.func @\"__torch_mlir_shape_fn.prims.convert_element_type\"(%arg0: !torch.list<int>, %arg1: !torch.int) -> !torch.list<int> {\n"
+"  func.func @\"__torch_mlir_shape_fn.aten.to.dtype\"(%arg0: !torch.list<int>, %arg1: !torch.int, %arg2: !torch.bool, %arg3: !torch.bool, %arg4: !torch.optional<int>) -> !torch.list<int> {\n"
 "    %0 = call @__torch__.torch.jit._shape_functions.unary(%arg0) : (!torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"
 "  }\n"
@ -5721,6 +5740,23 @@ StringRef mlir::torch::Torch::getShapeLibrary() {
 "    %0 = call @__torch__.torch.jit._shape_functions.broadcast(%arg0, %arg1) : (!torch.list<int>, !torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"
 "  }\n"
+"  func.func @\"__torch_mlir_dtype_fn.aten.rsub.Scalar\"(%arg0: !torch.int, %arg1: !torch.int, %arg2: !torch.union<float, int>, %arg3: !torch.union<float, int>) -> !torch.int {\n"
+"    %none = torch.constant.none\n"
+"    %0 = torch.prim.ListConstruct %arg0, %none : (!torch.int, !torch.none) -> !torch.list<optional<int>>\n"
+"    %1 = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.library_generator.get_dtype_of_scalar(%arg2) : (!torch.union<float, int>) -> !torch.int\n"
+"    %2 = torch.prim.ListConstruct %arg1, %1 : (!torch.int, !torch.int) -> !torch.list<int>\n"
+"    %3 = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.library_generator.promote_dtypes(%0, %2) : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int\n"
+"    return %3 : !torch.int\n"
+"  }\n"
+"  func.func @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.library_generator.promote_dtypes(%arg0: !torch.list<optional<int>>, %arg1: !torch.list<int>) -> !torch.int {\n"
+"    %0 = torch.promote_dtypes %arg0, %arg1 : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int\n"
+"    return %0 : !torch.int\n"
+"  }\n"
+"  func.func @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.library_generator.get_dtype_of_scalar(%arg0: !torch.union<float, int>) -> !torch.int {\n"
+"    %0 = torch.prim.NumToTensor.Scalar %arg0 : !torch.union<float, int> -> !torch.tensor\n"
+"    %1 = torch.prim.dtype %0 : !torch.tensor -> !torch.int\n"
+"    return %1 : !torch.int\n"
+"  }\n"
 "  func.func @\"__torch_mlir_shape_fn.aten.leaky_relu\"(%arg0: !torch.list<int>, %arg1: !torch.float) -> !torch.list<int> {\n"
 "    %0 = call @__torch__.torch.jit._shape_functions.unary(%arg0) : (!torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"
@ -6414,6 +6450,12 @@ StringRef mlir::torch::Torch::getShapeLibrary() {
 "    %0 = call @__torch__.torch.jit._shape_functions.broadcast(%arg0, %arg1) : (!torch.list<int>, !torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"
 "  }\n"
+"  func.func @\"__torch_mlir_dtype_fn.aten.floor_divide\"(%arg0: !torch.int, %arg1: !torch.int, %arg2: !torch.int, %arg3: !torch.int) -> !torch.int {\n"
+"    %0 = torch.prim.ListConstruct %arg0, %arg2 : (!torch.int, !torch.int) -> !torch.list<optional<int>>\n"
+"    %1 = torch.prim.ListConstruct %arg1, %arg3 : (!torch.int, !torch.int) -> !torch.list<int>\n"
+"    %2 = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.library_generator.promote_dtypes(%0, %1) : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int\n"
+"    return %2 : !torch.int\n"
+"  }\n"
 "  func.func @\"__torch_mlir_shape_fn.aten.atan2\"(%arg0: !torch.list<int>, %arg1: !torch.list<int>) -> !torch.list<int> {\n"
 "    %0 = call @__torch__.torch.jit._shape_functions.broadcast(%arg0, %arg1) : (!torch.list<int>, !torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"
@ -7086,6 +7128,15 @@ StringRef mlir::torch::Torch::getShapeLibrary() {
 "    %4 = torch.prim.ListConstruct %0, %1, %2, %3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>\n"
 "    return %4 : !torch.list<int>\n"
 "  }\n"
+"  func.func @\"__torch_mlir_dtype_fn.aten.add\"(%arg0: !torch.union<float, int>, %arg1: !torch.union<float, int>) -> !torch.int {\n"
+"    %none = torch.constant.none\n"
+"    %0 = torch.prim.ListConstruct %none, %none : (!torch.none, !torch.none) -> !torch.list<optional<int>>\n"
+"    %1 = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.library_generator.get_dtype_of_scalar(%arg0) : (!torch.union<float, int>) -> !torch.int\n"
+"    %2 = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.library_generator.get_dtype_of_scalar(%arg1) : (!torch.union<float, int>) -> !torch.int\n"
+"    %3 = torch.prim.ListConstruct %1, %2 : (!torch.int, !torch.int) -> !torch.list<int>\n"
+"    %4 = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.library_generator.promote_dtypes(%0, %3) : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int\n"
+"    return %4 : !torch.int\n"
+"  }\n"
 "}\n"
 "";
  // clang-format on
--- a/lib/Dialect/Torch/Transforms/CMakeLists.txt
+++ b/lib/Dialect/Torch/Transforms/CMakeLists.txt
@ -1,7 +1,7 @@
 add_mlir_library(TorchMLIRTorchPasses
  AdjustCallingConventions.cpp
  DecomposeComplexOps.cpp
-  DropShapeCalculations.cpp
+  DropAbstractInterpCalculations.cpp
  EraseModuleInitializer.cpp
  Passes.cpp
  GlobalizeObjectGraph.cpp
@ -13,8 +13,12 @@ add_mlir_library(TorchMLIRTorchPasses
  RefinePublicReturn.cpp
  RefineTypes.cpp
  ReifyShapeCalculations.cpp
-  ShapeLibrary.cpp
+  ReifyDtypeCalculations.cpp
+  ReifyAbstractInterpCalculationsUtils.cpp
+  AbstractInterpLibrary.cpp
  SimplifyShapeCalculations.cpp
+  SimplifyDtypeCalculations.cpp
+  SimplifyAbstractInterpCalculationsUtils.cpp

  ADDITIONAL_HEADER_DIRS
  ${PROJECT_SOURCE_DIR}/include/torch-mlir/Dialect/Torch/Transforms
--- a/lib/Dialect/Torch/Transforms/DropAbstractInterpCalculations.cpp
+++ b/lib/Dialect/Torch/Transforms/DropAbstractInterpCalculations.cpp
@ -9,29 +9,22 @@

 #include "PassDetail.h"

-#include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Parser/Parser.h"
 #include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/InliningUtils.h"
 #include "torch-mlir/Dialect/Torch/IR/TorchDialect.h"
 #include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
 #include "torch-mlir/Dialect/Torch/Transforms/Passes.h"
-#include "torch-mlir/Dialect/Torch/Utils/Utils.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSet.h"

 using namespace mlir;
 using namespace mlir::torch;
 using namespace mlir::torch::Torch;

 namespace {
-class DropShapeCalculateOp : public OpConversionPattern<ShapeCalculateOp> {
+template <typename CalculateOp>
+class DropCalculateOp : public OpConversionPattern<CalculateOp> {
 public:
-  using OpConversionPattern::OpConversionPattern;
+  using OpConversionPattern<CalculateOp>::OpConversionPattern;
  LogicalResult
-  matchAndRewrite(ShapeCalculateOp op, OpAdaptor adaptor,
+  matchAndRewrite(CalculateOp op, typename CalculateOp::Adaptor adaptor,
                  ConversionPatternRewriter &rewriter) const override {
    Block *block = &op.getBody().front();
    Operation *terminator = block->getTerminator();
@ -45,16 +38,18 @@ public:
 } // namespace

 namespace {
-class DropShapeCalculationsPass
-    : public DropShapeCalculationsBase<DropShapeCalculationsPass> {
+class DropAbstractInterpCalculationsPass
+    : public DropAbstractInterpCalculationsBase<
+          DropAbstractInterpCalculationsPass> {
  void runOnOperation() override {
    MLIRContext *context = &getContext();

    RewritePatternSet patterns(context);
-    patterns.insert<DropShapeCalculateOp>(context);
+    patterns.insert<DropCalculateOp<DtypeCalculateOp>>(context);
+    patterns.insert<DropCalculateOp<ShapeCalculateOp>>(context);
    ConversionTarget target(*context);
    target.addLegalDialect<Torch::TorchDialect>();
-    target.addIllegalOp<ShapeCalculateOp>();
+    target.addIllegalOp<DtypeCalculateOp, ShapeCalculateOp>();
    target.addLegalOp<func::FuncOp>();

    if (failed(applyPartialConversion(getOperation(), target,
@ -66,6 +61,6 @@ class DropShapeCalculationsPass
 } // namespace

 std::unique_ptr<OperationPass<func::FuncOp>>
-mlir::torch::Torch::createDropShapeCalculationsPass() {
-  return std::make_unique<DropShapeCalculationsPass>();
+mlir::torch::Torch::createDropAbstractInterpCalculationsPass() {
+  return std::make_unique<DropAbstractInterpCalculationsPass>();
 }
--- a/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp
+++ b/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp
@ -90,8 +90,8 @@ static LogicalResult checkType(Operation *op, Type type,
            ->emitError(
                "unsupported by backend contract: tensor with unknown rank")
            .attachNote()
-            .append("this is likely due to a missing shape transfer function "
-                    "in shape_lib_gen.py");
+            .append("this is likely due to a missing transfer function "
+                    "in abstract_interp_lib_gen.py");
      } else {
        return failure();
      }
--- a/lib/Dialect/Torch/Transforms/Passes.cpp
+++ b/lib/Dialect/Torch/Transforms/Passes.cpp
@ -121,6 +121,7 @@ void mlir::torch::Torch::createTorchSimplificationPipeline(
  // inference), because Torch type promotion rules actually depend on the shape
  // of the operand.
  createTorchShapeRefinementPipeline(pm);
+  createTorchDtypeRefinementPipeline(pm);
  // Refine types in the program, which mainly means inferring dtypes of ops.
  pm.addNestedPass<func::FuncOp>(Torch::createRefineTypesPass());
  // Propagate to ABI return types the shape/dtype information discovered by
@ -137,23 +138,41 @@ void mlir::torch::Torch::createTorchSimplificationPipeline(
  }
 }

-void mlir::torch::Torch::createTorchShapeRefinementPipeline(OpPassManager &pm) {
-  // Reify the shape functions for each op that is present in the shape library.
-  pm.addPass(Torch::createReifyShapeCalculationsPass());
+static void createRefinementPipeline(
+    mlir::OpPassManager &pm,
+    llvm::function_ref<std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>()>
+        reifyCalculationsPass,
+    llvm::function_ref<
+        std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>()>
+        simplifyCalculationsPass) {
+  // Reify the library functions for each op that is present in the library.
+  pm.addPass(reifyCalculationsPass());

-  // Inline the shape functions to enable analysis and transformation.
-  // TODO: Only inline shape functions (this will currently inline everything).
-  pm.addPass(createInlinerPass());
+  // Inline the library functions to enable analysis and transformation.
+  // TODO: Only inline library functions (this will currently inline
+  // everything).
+  pm.addPass(mlir::createInlinerPass());

-  // Now, try to simplify shape calculations. This is unfortunately a "optimize
+  // Now, try to simplify calculations. This is unfortunately a "optimize
  // as hard as possible" kind of thing, so it's inherently somewhat brittle.
-  // The idea is to keep strengthening what we do here to support the shape
-  // library. We don't need to support arbitrary programs, thankfully.
-  pm.addNestedPass<func::FuncOp>(Torch::createSimplifyShapeCalculationsPass());
+  // The idea is to keep strengthening what we do here to support the
+  // library functions. We don't need to support arbitrary programs, thankfully.
+  pm.addNestedPass<mlir::func::FuncOp>(simplifyCalculationsPass());
  // Run CSE, then see if we can simplify further.
-  pm.addNestedPass<func::FuncOp>(createCSEPass());
-  pm.addNestedPass<func::FuncOp>(Torch::createSimplifyShapeCalculationsPass());
+  pm.addNestedPass<mlir::func::FuncOp>(mlir::createCSEPass());
+  pm.addNestedPass<mlir::func::FuncOp>(simplifyCalculationsPass());

-  // Drop shape calculations, leaving behind the shape-refined program.
-  pm.addNestedPass<func::FuncOp>(Torch::createDropShapeCalculationsPass());
+  // Drop calculations, leaving behind the-refined program.
+  pm.addNestedPass<mlir::func::FuncOp>(
+      mlir::torch::Torch::createDropAbstractInterpCalculationsPass());
+}
+
+void mlir::torch::Torch::createTorchShapeRefinementPipeline(OpPassManager &pm) {
+  createRefinementPipeline(pm, Torch::createReifyShapeCalculationsPass,
+                           Torch::createSimplifyShapeCalculationsPass);
+}
+
+void mlir::torch::Torch::createTorchDtypeRefinementPipeline(OpPassManager &pm) {
+  createRefinementPipeline(pm, Torch::createReifyDtypeCalculationsPass,
+                           Torch::createSimplifyDtypeCalculationsPass);
 }
--- a/lib/Dialect/Torch/Transforms/RefineTypes.cpp
+++ b/lib/Dialect/Torch/Transforms/RefineTypes.cpp
@ -491,44 +491,6 @@ private:
 };
 } // namespace

-// This is the type rule used for deciding dtype for:
-// 1. A new tensor created from given data.
-// 2. The scalar type for type promotion when a scalar is an operand of a tensor
-// operation (such as AtenMulScalarOp, AtenAddScalarOp etc)
-// If the data is floating-point, the `dtype` is inferred to be the
-// default dtype, see `torch.get_default_dtype`.
-static Type getDefaultDtypeForTorchScalar(Type type) {
-  MLIRContext *context = type.getContext();
-  if (type.isa<Torch::FloatType>()) {
-    // For now, use float32 which is the initial default dtype returned by
-    // `torch.get_default_dtype`.
-    return Float32Type::get(context);
-  }
-  if (type.isa<Torch::IntType>())
-    return IntegerType::get(context, 64, IntegerType::Signed);
-  if (type.isa<Torch::BoolType>())
-    return IntegerType::get(context, 1);
-  llvm_unreachable(
-      "getDefaultDtypeForTorchScalar called on an unsupported type");
-}
-
-// This is the type rule used for deciding builtin type for:
-// 1. The dtype of the result tensor when converting a Scalar into a Tensor like
-// PrimNumToTensorScalarOp.
-// 2. The scalar type for type promotion when a scalar is an operand of scalar
-// only operation like AtenAddOp.
-static Type getBuiltInTypeForTorchScalar(Type type) {
-  MLIRContext *context = type.getContext();
-  if (type.isa<Torch::FloatType>())
-    return Float64Type::get(context);
-  if (type.isa<Torch::IntType>())
-    return IntegerType::get(context, 64, IntegerType::Signed);
-  if (type.isa<Torch::BoolType>())
-    return IntegerType::get(context, 1);
-  llvm_unreachable(
-      "getBuiltInTypeForTorchScalar called on an unsupported type");
-}
-
 static torch_upstream::ResultTypeState
 updateResultTypeState(Type scalarType,
                      const torch_upstream::ResultTypeState &inState) {
@ -583,8 +545,11 @@ static Type getPromotedResultScalarType(ArrayRef<Type> scalarTypes) {
    state =
        updateResultTypeState(getBuiltInTypeForTorchScalar(scalarType), state);
  }
-  return getTorchTypeForScalarType(scalarTypes[0].getContext(),
-                                   result_type(state));
+  FailureOr<Type> result = getTorchTypeForScalarType(
+      scalarTypes[0].getContext(), result_type(state));
+  if (failed(result))
+    return Type();
+  return *result;
 }

 // Returns most generic type Type() if the tensor dtype is unknown.
@ -707,9 +672,9 @@ void TypeAnalysis::visitOperation(Operation *op,
  }

  // Dtype is always float32, except for bfloat16, float16, float64 and nullptr.
-  if (isa<AtenTanhOp, AtenExpOp, AtenExpm1Op, AtenSinOp, AtenCosOp,
-          AtenSigmoidOp, AtenReciprocalOp, AtenLogOp, AtenSqrtOp, AtenLog2Op,
-          AtenLog1pOp, AtenRsqrtOp, AtenErfOp, AtenSoftplusOp, AtenFrobeniusNormDimOp>(op)) {
+  if (isa<AtenExpOp, AtenExpm1Op, AtenSinOp, AtenCosOp, AtenSigmoidOp,
+          AtenReciprocalOp, AtenLogOp, AtenSqrtOp, AtenLog2Op, AtenLog1pOp,
+          AtenRsqrtOp, AtenErfOp, AtenSoftplusOp, AtenFrobeniusNormDimOp>(op)) {
    ValueKnowledge knowledge =
        ValueKnowledge::getTensorPessimisticValueState(op->getContext());
    Type dtype = operands[0]->getValue().dtype;
@ -770,7 +735,7 @@ void TypeAnalysis::visitOperation(Operation *op,
  if (isa<AtenAddTensorOp, AtenSubTensorOp, AtenMulTensorOp, AtenDivTensorOp,
          AtenDivTensorModeOp, Aten__And__TensorOp, AtenMinimumOp,
          AtenMaximumOp, AtenBitwiseAndTensorOp, AtenBitwiseOrTensorOp,
-          AtenThresholdBackwardOp, AtenFloorDivideOp>(op)) {
+          AtenThresholdBackwardOp>(op)) {
    auto knowledge =
        ValueKnowledge::getTensorPessimisticValueState(op->getContext());
    knowledge.dtype = getPromotedResultType(
@ -816,7 +781,7 @@ void TypeAnalysis::visitOperation(Operation *op,
  // Promote LHS with scalar RHS.
  if (isa<AtenAddScalarOp, AtenSubScalarOp, AtenMulScalarOp, AtenDivScalarOp,
          AtenFmodScalarOp, AtenFloorDivideScalarOp, AtenPowTensorScalarOp,
-          AtenRsubScalarOp, AtenLeakyReluOp, AtenRemainderScalarOp>(op)) {
+          AtenLeakyReluOp, AtenRemainderScalarOp>(op)) {
    auto lhs = operands[0]->getValue();
    Value scalar = op->getOperand(1);
    auto knowledge =
--- a/lib/Dialect/Torch/Transforms/ReifyAbstractInterpCalculationsUtils.cpp
+++ b/lib/Dialect/Torch/Transforms/ReifyAbstractInterpCalculationsUtils.cpp
@ -0,0 +1,290 @@
+//===----------------------------------------------------------------------===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ReifyAbstractInterpCalculationsUtils.h"
+#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
+#include "llvm/ADT/StringSet.h"
+
+using namespace mlir;
+using namespace mlir::torch;
+using namespace mlir::torch::Torch;
+
+static std::string getLibraryFunctionPrefix(LibraryFunctionKind libFuncKind) {
+  if (libFuncKind == LibraryFunctionKind::ShapeFunction)
+    return "__torch_mlir_shape_fn.";
+  else if (libFuncKind == LibraryFunctionKind::DtypeFunction)
+    return "__torch_mlir_dtype_fn.";
+  llvm_unreachable(
+      "`getLibraryFunctionPrefix` called with an unsupported `CalculateOp`");
+}
+
+static Operation *createCalculateOp(OpBuilder &b, Location loc,
+                                    TypeRange resultTypes,
+                                    LibraryFunctionKind libFuncKind) {
+  if (libFuncKind == LibraryFunctionKind::ShapeFunction)
+    return b.create<ShapeCalculateOp>(loc, resultTypes);
+  else if (libFuncKind == LibraryFunctionKind::DtypeFunction)
+    return b.create<DtypeCalculateOp>(loc, resultTypes);
+  llvm_unreachable(
+      "`createCalculateOp` called with an unsupported `LibraryFunctionKind`");
+}
+
+static Operation *createCalculateYieldOp(OpBuilder &b, Location loc,
+                                         ValueRange results,
+                                         LibraryFunctionKind libFuncKind) {
+  if (libFuncKind == LibraryFunctionKind::ShapeFunction)
+    return b.create<ShapeCalculateYieldOp>(loc, results);
+  else if (libFuncKind == LibraryFunctionKind::DtypeFunction)
+    return b.create<DtypeCalculateYieldOp>(loc, results);
+  llvm_unreachable("`createCalculateYieldOp` called with an unsupported "
+                   "`LibraryFunctionKind`");
+}
+
+static Operation *
+createCalculateYieldCalculationOp(OpBuilder &b, Location loc,
+                                  ValueRange results,
+                                  LibraryFunctionKind libFuncKind) {
+  if (libFuncKind == LibraryFunctionKind::ShapeFunction)
+    return b.create<ShapeCalculateYieldShapesOp>(loc, results);
+  else if (libFuncKind == LibraryFunctionKind::DtypeFunction)
+    return b.create<DtypeCalculateYieldDtypesOp>(loc, results);
+  llvm_unreachable("`createCalculateYieldCalculationOp` called with an "
+                   "unsupported `LibraryFunctionKind`");
+}
+
+LogicalResult Torch::wrapWithCalculateOpIfLibraryFunctionAvailable(
+    Operation *op, ModuleOp library, LibraryFunctionKind libFuncKind,
+    SmallVector<std::string> &libFuncNamesUsed,
+    function_ref<FailureOr<SmallVector<Value>>(OpBuilder &, Location,
+                                               ValueRange, func::FuncOp)>
+        libFuncArgsBuilder) {
+  Location loc = op->getLoc();
+  MLIRContext *context = op->getContext();
+  auto name = op->getName().stripDialect();
+  // For value-semantic variant ops, i.e. valsem-ops (ops that are
+  // mechanically consistent with existing torch conventions of in-place vs.
+  //  out-of-place (value-semantic) variants), remove the prefix when
+  // looking them up in the library.
+  if (name.startswith("valsem."))
+    name = name.drop_front(strlen("valsem."));
+  std::string libFuncName =
+      (getLibraryFunctionPrefix(libFuncKind) + Twine(name)).str();
+  auto libFunc = library.lookupSymbol<func::FuncOp>(libFuncName);
+  if (!libFunc)
+    return success();
+  libFuncNamesUsed.push_back(libFuncName);
+  OpBuilder b(op);
+  Operation *calculate =
+      createCalculateOp(b, loc, op->getResultTypes(), libFuncKind);
+  op->replaceAllUsesWith(calculate);
+  {
+    // Move the op into the body of the `torch.{libFuncType}.calculate` op
+    // and yield its results.
+    OpBuilder b(context);
+    Block *bodyBlock = b.createBlock(&calculate->getRegion(0));
+    op->moveBefore(bodyBlock, bodyBlock->end());
+    b.setInsertionPointAfter(op);
+    createCalculateYieldOp(b, loc, op->getResults(), libFuncKind);
+  }
+
+  {
+    OpBuilder b(context);
+    b.createBlock(&calculate->getRegion(1));
+    // Create the call to the library function!
+    FailureOr<SmallVector<Value>> libFuncArgs =
+        libFuncArgsBuilder(b, loc, op->getOperands(), libFunc);
+    if (failed(libFuncArgs))
+      return failure();
+    auto call = b.create<mlir::func::CallOp>(loc, libFunc, *libFuncArgs);
+
+    // Python models multiple results with a tuple, so we need to unpack it
+    // if the op has multiple results.
+    SmallVector<Value> unpackedResults;
+    assert(call.getNumResults() == 1 &&
+           "Multiple results are packed in a tuple in Python!");
+    Value result = call.getResult(0);
+    if (auto tupleType = result.getType().dyn_cast<Torch::TupleType>()) {
+      auto unpack = b.create<PrimTupleUnpackOp>(
+          loc, tupleType.getContainedTypes(), result);
+      llvm::append_range(unpackedResults, unpack.getResults());
+    } else {
+      unpackedResults.push_back(result);
+    }
+
+    // Terminate the region.
+    createCalculateYieldCalculationOp(b, loc, unpackedResults, libFuncKind);
+  }
+  return success();
+}
+
+void Torch::importLibraryFunctions(ModuleOp module, ModuleOp library,
+                                   SmallVector<std::string> functionsNeeded) {
+  // Import just the functions we need. This includes transitive callees,
+  // so we use a worklist algorithm.
+  llvm::StringSet<> importedFunctions;
+  while (!functionsNeeded.empty()) {
+    std::string symName = functionsNeeded.pop_back_val();
+    if (importedFunctions.contains(symName))
+      continue;
+    auto func = library.lookupSymbol<func::FuncOp>(symName);
+    assert(func && "broken library");
+    // Move the function from the library to the module this pass
+    // is running on. (this mutates the library, but we re-parse it each time
+    // so this is safe to do).
+    func->moveBefore(&module.getBody()->front());
+    // Set the visibility to private so that the functions go away
+    // nicely after we are done with them.
+    func.setVisibility(SymbolTable::Visibility::Private);
+    // Continue the DFS.
+    importedFunctions.insert(symName);
+    func.walk([&](func::CallOp op) {
+      functionsNeeded.push_back(op.getCallee().str());
+    });
+  }
+}
+
+FailureOr<Value> Torch::adjustFunctionArg(
+    OpBuilder &b, Location loc, Value operand, Type desiredType,
+    function_ref<Value(OpBuilder &, Location, Value, Type)> baseTransformation) {
+  operand = baseTransformation(b, loc, operand, desiredType);
+
+  // No need for adjustment if they already match.
+  auto operandType = operand.getType();
+  if (operandType == desiredType)
+    return operand;
+
+  if (desiredType.isa<Torch::AnyType>()) {
+    // Generator's are currently passed as Any because TorchScript cannot
+    // compile a function with Generator type arguments.
+    // Ignoring that hack, this is a correct handling of Any type should we need
+    // to actually support it in the future.
+    return b.create<DerefineOp>(loc, desiredType, operand).getResult();
+  }
+
+  // !torch.union<int, float> is the type used for `Scalar` inputs. At
+  // compile time, such inputs will usually be resolved to an `int` or a `float`
+  // so we need to derefine to match the library function signature.
+  if (auto unionType = desiredType.dyn_cast<Torch::UnionType>()) {
+    if (llvm::all_of(unionType.getContainedTypes(), [](Type containedType) {
+          return containedType.isa<Torch::IntType, Torch::FloatType>();
+        }))
+      return b.create<DerefineOp>(loc, desiredType, operand).getResult();
+  }
+
+  // If the operand is NoneType, then we just need to derefine it to the
+  // optional type in the function signature.
+  if (operandType.isa<Torch::NoneType>()) {
+    assert(desiredType.isa<Torch::OptionalType>() &&
+           "Don't expect library functions to have NoneType parameters");
+    return b.create<DerefineOp>(loc, desiredType, operand).getResult();
+  }
+
+  // If the operand type is statically !torch.optional, then we need to do
+  // different things for the None and non-None cases.
+  // For the None case, we just need to derefine it to the desired type.
+  // For the non-None case, we need to unwrap the optional type and then adjust
+  // it recursively (which also takes care of derefining it to ultimate desired
+  // type).
+  // A case where this happens is `!torch.optional<vtensor>` ->
+  // `!torch.optional<list<int>>>`.
+  if (auto operandOptionalType = operandType.dyn_cast<Torch::OptionalType>()) {
+    if (desiredType.isa<Torch::OptionalType>()) {
+      // if optional is None:
+      //     return derefine(None)
+      // else:
+      //     return adjust(unchecked_cast(optional))
+      auto none = b.create<ConstantNoneOp>(loc);
+      auto isNone = b.create<Aten__Is__Op>(loc, operand, none);
+      auto primIf = b.create<PrimIfOp>(loc, desiredType, isNone);
+      {
+        Region &thenRegion = primIf.getThenRegion();
+        b.createBlock(&thenRegion, thenRegion.end());
+        auto derefineNone = b.create<DerefineOp>(loc, desiredType, none);
+        b.create<PrimIfYieldOp>(loc, ValueRange{derefineNone});
+      }
+      {
+        Region &elseRegion = primIf.getElseRegion();
+        b.createBlock(&elseRegion, elseRegion.end());
+        auto downcasted = b.create<PrimUncheckedCastOp>(
+            loc, operandOptionalType.getContainedType(), operand);
+        FailureOr<Value> adjusted = adjustFunctionArg(
+            b, loc, downcasted, desiredType, baseTransformation);
+        if (failed(adjusted))
+          return failure();
+        b.create<PrimIfYieldOp>(loc, *adjusted);
+      }
+      b.setInsertionPointAfter(primIf);
+      return primIf.getResult(0);
+    }
+  }
+
+  // If the desired type is OptionalType, then recursively adjust the operand to
+  // the contained type, then derefine it to `!torch.optional`. For example,
+  // `!torch.vtensor -> !torch.optional<list<int>>>`.
+  if (auto desiredOptionalType = desiredType.dyn_cast<Torch::OptionalType>()) {
+    FailureOr<Value> adjusted = adjustFunctionArg(
+        b, loc, operand, desiredOptionalType.getContainedType(),
+        baseTransformation);
+    if (failed(adjusted))
+      return failure();
+    return b.create<DerefineOp>(loc, desiredType, *adjusted).getResult();
+  }
+
+  if (auto desiredListType = desiredType.dyn_cast<Torch::ListType>()) {
+    // Pseudocode:
+    //
+    // operand = ...
+    // adjusted_list = []
+    // for i in range(len(operand)):
+    //     adjusted_list.append(adjust(operand[i]))
+    // return adjusted_list
+    auto providedType = operand.getType().cast<Torch::ListType>();
+    Value adjustedList =
+        b.create<PrimListConstructOp>(loc, desiredListType, ValueRange({}));
+    // Create a for-like PrimLoopOp.
+    Value maxTripCount = b.create<AtenLenTOp>(loc, operand);
+    Value cTrue = b.create<Torch::ConstantBoolOp>(loc, true);
+    auto loop = b.create<PrimLoopOp>(loc, TypeRange({}), maxTripCount,
+                                     /*initialCondition=*/cTrue,
+                                     /*iterArgsInit=*/ValueRange({}));
+
+    // Create the loop body.
+    {
+      OpBuilder::InsertionGuard guard(b);
+      Block *body =
+          b.createBlock(&loop.getRegion(), loop.getRegion().begin(),
+                        TypeRange({b.getType<Torch::IntType>()}), {loc});
+      Value iterationNumber = body->getArgument(0);
+      Value element = b.create<Aten__Getitem__TOp>(
+          loc, providedType.getContainedType(), operand, iterationNumber);
+      FailureOr<Value> adjustedElement =
+          adjustFunctionArg(b, loc, element, desiredListType.getContainedType(),
+                            baseTransformation);
+      if (failed(adjustedElement))
+        return failure();
+      b.create<AtenAppendTOp>(loc, adjustedList.getType(), adjustedList,
+                              *adjustedElement);
+      b.create<PrimLoopConditionOp>(loc, /*shouldContinue=*/cTrue,
+                                    /*iterArgs=*/ValueRange({}));
+    }
+
+    return adjustedList;
+  }
+
+  // The library functions use `float` where the operator
+  // signature uses `Scalar` (see comments in torch_ods_gen.py for
+  // explanation).
+  if (desiredType.isa<Torch::FloatType>() &&
+      operand.getType().isa<Torch::IntType>()) {
+    return b.create<AtenFloatScalarOp>(loc, desiredType, operand).getResult();
+  }
+
+  // Pass the operand as-is.
+  return operand;
+}
--- a/lib/Dialect/Torch/Transforms/ReifyAbstractInterpCalculationsUtils.h
+++ b/lib/Dialect/Torch/Transforms/ReifyAbstractInterpCalculationsUtils.h
@ -0,0 +1,67 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TORCHMLIR_DIALECT_TORCH_TRANSFORMS_REIFY_ABSTRACT_INTERP_CALCULATIONS_UTILS_H
+#define TORCHMLIR_DIALECT_TORCH_TRANSFORMS_REIFY_ABSTRACT_INTERP_CALCULATIONS_UTILS_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/OperationSupport.h"
+#include "mlir/Support/LogicalResult.h"
+#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
+
+namespace mlir {
+namespace torch {
+namespace Torch {
+
+enum class LibraryFunctionKind { ShapeFunction, DtypeFunction, Decomposition };
+
+// Searches the function library for an abstract interpretation function for
+// `op`. If one is found, wraps the op in a `CalculateOp`, with the op placed in
+// the first region, and a call to the abstract interpretation function is
+// inserted into the second region.
+//
+// Note: this returns success if no abstract interpretation function is found,
+// since some abstract interpretation functions (such as decompositions) are
+// optional.
+//
+// Note: This function does *not* import the abstract interpretation function
+// from the library into the IR.
+LogicalResult wrapWithCalculateOpIfLibraryFunctionAvailable(
+    Operation *op, ModuleOp library, LibraryFunctionKind funcKind,
+    SmallVector<std::string> &libFuncNamesUsed,
+    function_ref<FailureOr<SmallVector<Value>>(OpBuilder &, Location,
+                                               ValueRange, func::FuncOp)>
+        libFuncArgsBuilder);
+
+// Imports the functions in `functionsNeeded` from the library into the module.
+// This function assumes that all functions needed exist in the library.
+//
+// Note: This function modifies the library.
+void importLibraryFunctions(ModuleOp module, ModuleOp library,
+                            SmallVector<std::string> functionsNeeded);
+
+// Recursively adjust `operand` to match `desiredType`.
+//
+// This function by default handles a few types such as `UnionType`,
+// `OptionalType`, and `ListType`, to name a few. Handling of base element types
+// can be customized by defining `baseTransformation`, which gets called at the
+// beginning of each recursive call. This function can be thought of as mapping
+// `baseTransformation` across `UnionType/OptionalType/ListType`.
+FailureOr<Value> adjustFunctionArg(
+    OpBuilder &b, Location loc, Value operand, Type desiredType,
+    function_ref<Value(OpBuilder &, Location, Value, Type)> baseTransformation =
+        [](OpBuilder &, Location, Value operand, Type) { return operand; });
+} // namespace Torch
+} // namespace torch
+} // namespace mlir
+
+#endif // TORCHMLIR_DIALECT_TORCH_TRANSFORMS_REIFY_ABSTRACT_INTERP_CALCULATIONS_UTILS_H
--- a/lib/Dialect/Torch/Transforms/ReifyDtypeCalculations.cpp
+++ b/lib/Dialect/Torch/Transforms/ReifyDtypeCalculations.cpp
@ -0,0 +1,136 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+
+#include "ReifyAbstractInterpCalculationsUtils.h"
+#include "mlir/Parser/Parser.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
+#include "torch-mlir/Dialect/Torch/Transforms/Passes.h"
+
+using namespace mlir;
+using namespace mlir::torch;
+using namespace mlir::torch::Torch;
+
+static bool isTensorTypeOrWrappedTensorType(Type type) {
+  // Allowing tuples as arguments to dtype calculation functions can cause
+  // issues. For example, if an argument is a tuple of tensors and ints, there
+  // would be no way of differentiating the original ints from the ints created
+  // to represent the dtype and rank of the tensors. Therefore, to avoid this
+  // and keep things simple, the tuple type is not allowed. This works well in
+  // practice, since PyTorch op signatures don't seem to take tuples as inputs.
+  assert(!type.isa<Torch::TupleType>() &&
+         "dtype calculation functions are expected to not have tuples of "
+         "tensors as arguments");
+
+  if (type.isa<Torch::BaseTensorType>())
+    return true;
+
+  if (auto optionalType = type.dyn_cast<Torch::OptionalType>()) {
+    return isTensorTypeOrWrappedTensorType(optionalType.getContainedType());
+  } else if (auto listType = type.dyn_cast<Torch::ListType>()) {
+    return isTensorTypeOrWrappedTensorType(listType.getContainedType());
+  } else {
+    return false;
+  }
+}
+
+// Massage the op operands to match the dtype function signature.
+// The dtype function generally takes the same operands as the op, with a few
+// systematic modifications, such as replacing tensors with a rank and dtype
+// argument.
+static FailureOr<SmallVector<Value>>
+dtypeFunctionArgsBuilder(OpBuilder &b, Location loc,
+                         ValueRange originalOperands, func::FuncOp dtypeFunc) {
+  // Turns a tensor operand into an operand representing the rank of the tensor
+  auto rankArgAdjuster = [](OpBuilder &b, Location loc, Value operand,
+                            Type desiredType) -> Value {
+    if (desiredType.isa<Torch::IntType>() &&
+        operand.getType().isa<Torch::BaseTensorType>()) {
+      auto sizeListType =
+          Torch::ListType::get(Torch::IntType::get(b.getContext()));
+      Value size = b.create<AtenSizeOp>(loc, sizeListType, operand);
+      return b.create<AtenLenTOp>(loc, desiredType, size);
+    }
+    return operand;
+  };
+
+  // Turns a tensor operand into an operand representing the dtype of the tensor
+  auto dtypeArgAdjuster = [](OpBuilder &b, Location loc, Value operand,
+                             Type desiredType) -> Value {
+    if (desiredType.isa<Torch::IntType>() &&
+        operand.getType().isa<Torch::BaseTensorType>()) {
+      return b.create<PrimDtypeOp>(loc, desiredType, operand);
+    }
+    return operand;
+  };
+
+  SmallVector<Value> dtypeFuncArgs;
+  ArrayRef<Type> desiredTypes = dtypeFunc.getArgumentTypes();
+  for (auto operand : originalOperands) {
+    assert(!desiredTypes.empty() &&
+           "`dtypeFunc` should have at least one argument for each argument in "
+           "`originalOperands`");
+    Type desiredType = desiredTypes.front();
+    if (isTensorTypeOrWrappedTensorType(operand.getType())) {
+      assert(desiredTypes.size() >= 2 &&
+             "`dtypeFunc` should have two arguments for each tensor argument "
+             "in `originalOperands`");
+      FailureOr<Value> rankArg, dtypeArg;
+      if (failed(rankArg = adjustFunctionArg(b, loc, operand, desiredType,
+                                             rankArgAdjuster)))
+        return failure();
+      desiredTypes = desiredTypes.drop_front();
+      desiredType = desiredTypes.front();
+      if (failed(dtypeArg = adjustFunctionArg(b, loc, operand, desiredType,
+                                              dtypeArgAdjuster)))
+        return failure();
+      dtypeFuncArgs.append({*rankArg, *dtypeArg});
+    } else {
+      FailureOr<Value> otherArg;
+      if (failed(otherArg = adjustFunctionArg(b, loc, operand, desiredType)))
+        return failure();
+      dtypeFuncArgs.push_back(*otherArg);
+    }
+    desiredTypes = desiredTypes.drop_front();
+  }
+
+  return dtypeFuncArgs;
+}
+
+namespace {
+class ReifyDtypeCalculationsPass
+    : public ReifyDtypeCalculationsBase<ReifyDtypeCalculationsPass> {
+  void runOnOperation() override {
+    MLIRContext *context = &getContext();
+    ModuleOp module = getOperation();
+    OwningOpRef<ModuleOp> library =
+        parseSourceString<ModuleOp>(getAbstractInterpLibrary(), context);
+
+    // Walk all the operations, and if we have a dtype function, wrap the op
+    // in a `torch.dtype.calculate` op.
+    SmallVector<std::string> functionsNeeded;
+    WalkResult walkResult = module.walk([&](Operation *op) -> WalkResult {
+      return wrapWithCalculateOpIfLibraryFunctionAvailable(
+          op, *library, LibraryFunctionKind::DtypeFunction, functionsNeeded,
+          dtypeFunctionArgsBuilder);
+    });
+
+    if (walkResult.wasInterrupted())
+      return signalPassFailure();
+    importLibraryFunctions(module, *library, std::move(functionsNeeded));
+  }
+};
+} // namespace
+
+std::unique_ptr<OperationPass<ModuleOp>>
+Torch::createReifyDtypeCalculationsPass() {
+  return std::make_unique<ReifyDtypeCalculationsPass>();
+}
--- a/lib/Dialect/Torch/Transforms/ReifyShapeCalculations.cpp
+++ b/lib/Dialect/Torch/Transforms/ReifyShapeCalculations.cpp
@ -9,209 +9,49 @@

 #include "PassDetail.h"

-#include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
+#include "ReifyAbstractInterpCalculationsUtils.h"
 #include "mlir/Parser/Parser.h"
 #include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/InliningUtils.h"
-#include "torch-mlir/Dialect/Torch/IR/TorchDialect.h"
 #include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
 #include "torch-mlir/Dialect/Torch/Transforms/Passes.h"
-#include "torch-mlir/Dialect/Torch/Utils/Utils.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSet.h"

 using namespace mlir;
 using namespace mlir::torch;
 using namespace mlir::torch::Torch;

-static Value adjustShapeFunctionArg(Value operand, Type desiredType,
-                                    OpBuilder &b, Location loc);
-
-static Value adjustListArg(Value operand, Torch::ListType desiredType,
-                           OpBuilder &b, Location loc) {
-  auto providedType = operand.getType().cast<Torch::ListType>();
-
-  // Pseudocode:
-  //
-  // operand = ...
-  // adjusted_list = []
-  // for i in range(len(operand)):
-  //     adjusted_list.append(adjust(operand[i]))
-  // return adjusted_list
-  Value adjustedList =
-      b.create<PrimListConstructOp>(loc, desiredType, ValueRange({}));
-  // Create a for-like PrimLoopOp.
-  Value maxTripCount = b.create<AtenLenTOp>(loc, operand);
-  Value cTrue = b.create<Torch::ConstantBoolOp>(loc, true);
-  auto loop = b.create<PrimLoopOp>(loc, TypeRange({}), maxTripCount,
-                                   /*initialCondition=*/cTrue,
-                                   /*iterArgsInit=*/ValueRange({}));
-
-  // Create the loop body.
-  {
-    OpBuilder::InsertionGuard guard(b);
-    Block *body =
-        b.createBlock(&loop.getRegion(), loop.getRegion().begin(),
-                      TypeRange({b.getType<Torch::IntType>()}), {loc});
-    Value iterationNumber = body->getArgument(0);
-    Value element = b.create<Aten__Getitem__TOp>(
-        loc, providedType.getContainedType(), operand, iterationNumber);
-    Value adjustedElement =
-        adjustShapeFunctionArg(element, desiredType.getContainedType(), b, loc);
-    b.create<AtenAppendTOp>(loc, adjustedList.getType(), adjustedList,
-                            adjustedElement);
-    b.create<PrimLoopConditionOp>(loc, /*shouldContinue=*/cTrue,
-                                  /*iterArgs=*/ValueRange({}));
-  }
-
-  return adjustedList;
-}
-
-static Value adjustShapeFunctionArg(Value operand, Type desiredType,
-                                    OpBuilder &b, Location loc) {
-  auto operandType = operand.getType();
-
-  // No need for adjustment if they already match.
-  if (operandType == desiredType)
-    return operand;
-
-  if (desiredType.isa<Torch::AnyType>()) {
-    // Generator's are currently passed as Any because TorchScript cannot
-    // compile a function with Generator type arguments.
-    // Ignoring that hack, this is a correct handling of Any type should we need
-    // to actually support it in the future.
-    return b.create<DerefineOp>(loc, desiredType, operand);
-  }
-
-  // If the operand is NoneType, then we just need to derefine it to the
-  // optional type in the shape function signature.
-  if (operandType.isa<Torch::NoneType>()) {
-    assert(desiredType.isa<Torch::OptionalType>() &&
-           "Don't expect shape functions to have NoneType parameters");
-    return b.create<DerefineOp>(loc, desiredType, operand);
-  }
-
-  // If the operand type is statically !torch.optional, then we need to do
-  // different things for the None and non-None cases.
-  // For the None case, we just need to derefine it to the desired type.
-  // For the non-None case, we need to unwrap the optional type and then adjust
-  // it recursively (which also takes care of derefining it to ultimate desired
-  // type).
-  // A case where this happens is `!torch.optional<vtensor>` ->
-  // `!torch.optional<list<int>>>`.
-  if (auto operandOptionalType = operandType.dyn_cast<Torch::OptionalType>()) {
-    if (desiredType.isa<Torch::OptionalType>()) {
-      // if optional is None:
-      //     return derefine(None)
-      // else:
-      //     return adjust(unchecked_cast(optional))
-      auto none = b.create<ConstantNoneOp>(loc);
-      auto isNone = b.create<Aten__Is__Op>(loc, operand, none);
-      auto primIf = b.create<PrimIfOp>(loc, desiredType, isNone);
-      {
-        Region &thenRegion = primIf.getThenRegion();
-        b.createBlock(&thenRegion, thenRegion.end());
-        auto derefineNone = b.create<DerefineOp>(loc, desiredType, none);
-        b.create<PrimIfYieldOp>(loc, ValueRange{derefineNone});
-      }
-      {
-        Region &elseRegion = primIf.getElseRegion();
-        b.createBlock(&elseRegion, elseRegion.end());
-        auto downcasted = b.create<PrimUncheckedCastOp>(
-            loc, operandOptionalType.getContainedType(), operand);
-        auto adjusted = adjustShapeFunctionArg(downcasted, desiredType, b, loc);
-        b.create<PrimIfYieldOp>(loc, adjusted);
-      }
-      b.setInsertionPointAfter(primIf);
-      return primIf.getResult(0);
-    }
-  }
-
-  // If the desired type is OptionalType, then recursively adjust the operand to
-  // the contained type, then derefine it to `!torch.optional`. For example,
-  // `!torch.vtensor -> !torch.optional<list<int>>>`.
-  if (auto desiredOptionalType = desiredType.dyn_cast<Torch::OptionalType>()) {
-    auto adjusted = adjustShapeFunctionArg(
-        operand, desiredOptionalType.getContainedType(), b, loc);
-    return b.create<DerefineOp>(loc, desiredType, adjusted);
-  }
-
-  // The shape library functions have tensor operands replaced with
-  // `!torch.list<int>` types for the shape. Get the sizes.
-  if (operand.getType().isa<Torch::BaseTensorType>()) {
-    assert(desiredType.isa<Torch::ListType>() &&
-           "Don't expect shape functions to have tensor parameters");
-    return b.create<AtenSizeOp>(loc, desiredType, operand);
-  }
-
-  // Run this after `operand.getType().isa<Torch::BaseTensorType>()` so that
-  // `!torch.vtensor` -> `!torch.list<int>` is handled there specially
-  // first.
-  if (auto desiredListType = desiredType.dyn_cast<Torch::ListType>()) {
-    return adjustListArg(operand, desiredListType, b, loc);
-  }
-
-  // The shape library functions use `float` where the operator
-  // signature uses `Scalar` (see comments in torch_ods_gen.py for
-  // explanation).
-  if (desiredType.isa<Torch::FloatType>() &&
-      operand.getType().isa<Torch::IntType>()) {
-    return b.create<AtenFloatScalarOp>(loc, desiredType, operand);
-  }
-
-  // Pass the operand as-is.
-  return operand;
-}
-
-// Populates the shape calculation region with a call to the shape function
-// from the shape library.
-static LogicalResult
-populateShapeCalculationRegion(ShapeCalculateOp op, ValueRange originalOperands,
-                               mlir::func::FuncOp shapeFunction) {
-  // Create a call to the shape function in the `shapeCalculation` region.
-  // We will import the callee from the shape library later.
-  OpBuilder b(op.getContext());
-  Location loc = op->getLoc();
-  b.createBlock(&op.getShapeCalculation());
+static FailureOr<SmallVector<Value>>
+shapeFunctionArgsBuilder(OpBuilder &b, Location loc,
+                         ValueRange originalOperands, func::FuncOp shapeFunc) {
  // Massage the op operands to match the shape function signature.
  // The shape function generally takes the same operands as the op, with a few
  // systematic modifications, such as replacing tensors with their shapes.
-  SmallVector<Value> shapeFunctionArgs;
+  SmallVector<Value> shapeFuncArgs;
  for (auto operandAndDesiredType :
-       llvm::zip(originalOperands, shapeFunction.getArgumentTypes())) {
+       llvm::zip(originalOperands, shapeFunc.getArgumentTypes())) {
    Value operand;
    Type desiredType;
    std::tie(operand, desiredType) = operandAndDesiredType;
-    Value shapeFunctionArg =
-        adjustShapeFunctionArg(operand, desiredType, b, loc);
-    if (!shapeFunctionArg)
+    FailureOr<Value> shapeFuncArg = adjustFunctionArg(
+        b, loc, operand, desiredType,
+        [](OpBuilder &b, Location loc, Value operand,
+           Type desiredType) -> Value {
+          // The shape library functions have tensor operands replaced with
+          // `!torch.list<int>` types for the shape. Get the sizes.
+          auto desiredListType = desiredType.dyn_cast<Torch::ListType>();
+          if (!desiredListType)
+            return operand;
+          if (operand.getType().isa<Torch::BaseTensorType>() &&
+              desiredListType.getContainedType().isa<Torch::IntType>()) {
+            return b.create<AtenSizeOp>(loc, desiredType, operand);
+          }
+          return operand;
+        });
+    if (failed(shapeFuncArg))
      return failure();
-    shapeFunctionArgs.push_back(shapeFunctionArg);
+    shapeFuncArgs.push_back(*shapeFuncArg);
  }

-  // Create the call to the shape function!
-  auto call =
-      b.create<mlir::func::CallOp>(loc, shapeFunction, shapeFunctionArgs);
-
-  // Python models multiple results with a tuple, so we need to unpack it
-  // if the op has multiple results.
-  SmallVector<Value> unpackedResults;
-  assert(call.getNumResults() == 1 &&
-         "Multiple results are packed in a tuple in Python!");
-  Value result = call.getResult(0);
-  if (auto tupleType = result.getType().dyn_cast<Torch::TupleType>()) {
-    auto unpack =
-        b.create<PrimTupleUnpackOp>(loc, tupleType.getContainedTypes(), result);
-    llvm::append_range(unpackedResults, unpack.getResults());
-  } else {
-    unpackedResults.push_back(result);
-  }
-
-  // Terminate the region.
-  b.create<ShapeCalculateYieldShapesOp>(loc, unpackedResults);
-  return success();
+  return shapeFuncArgs;
 }

 namespace {
@ -222,74 +62,23 @@ class ReifyShapeCalculationsPass
    ModuleOp module = getOperation();

    // TODO: Find a way to not have to parse this every time.
-    // The shape library is O(#ops we know about), and this pass should be
+    // The library is O(#ops we know about), and this pass should be
    // O(#ops in the program) ideally.
-    auto shapeLibrary = parseSourceString<ModuleOp>(getShapeLibrary(), context);
+    OwningOpRef<ModuleOp> library =
+        parseSourceString<ModuleOp>(getAbstractInterpLibrary(), context);

    // Walk all the operations, and if we have a shape function, wrap the op
    // in a `torch.shape.calculate` op.
-    SmallVector<std::string> neededShapeFunctions;
-    bool hadError = false;
-    module.walk([&](Operation *op) {
-      Location loc = op->getLoc();
-      auto name = op->getName().stripDialect();
-      // For value-semantic variant ops, i.e. valsem-ops (ops that are
-      // mechanically consistent with existing torch conventions of in-place vs.
-      //  out-of-place (value-semantic) variants), remove the prefix when
-      // looking them up in the shape library.
-      if (name.startswith("valsem."))
-        name = name.drop_front(strlen("valsem."));
-      auto shapeFunctionName = ("__torch_mlir_shape_fn." + Twine(name)).str();
-      auto shapeFunction =
-          shapeLibrary->lookupSymbol<func::FuncOp>(shapeFunctionName);
-      if (!shapeFunction)
-        return;
-      neededShapeFunctions.push_back(shapeFunctionName);
-      auto shapeCalculate =
-          OpBuilder(op).create<ShapeCalculateOp>(loc, op->getResultTypes());
-      op->replaceAllUsesWith(shapeCalculate);
-      {
-        // Move the op into the body of the `torch.shape.calculate` op and yield
-        // its results.
-        OpBuilder b(context);
-        Block *block = b.createBlock(&shapeCalculate.getBody());
-        op->moveBefore(block, block->end());
-        b.setInsertionPointAfter(op);
-        b.create<ShapeCalculateYieldOp>(loc, op->getResults());
-      }
-      if (failed(populateShapeCalculationRegion(
-              shapeCalculate, op->getOperands(), shapeFunction))) {
-        hadError = true;
-        return;
-      }
+    SmallVector<std::string> functionsNeeded;
+    WalkResult walkResult = module.walk([&](Operation *op) -> WalkResult {
+      return wrapWithCalculateOpIfLibraryFunctionAvailable(
+          op, *library, LibraryFunctionKind::ShapeFunction, functionsNeeded,
+          shapeFunctionArgsBuilder);
    });

-    if (hadError)
+    if (walkResult.wasInterrupted())
      return signalPassFailure();
-
-    // Import just the functions we need. This includes transitive callees,
-    // so we use a worklist algorithm.
-    llvm::StringSet<> importedFunctions;
-    SmallVector<std::string> worklist;
-    llvm::append_range(worklist, neededShapeFunctions);
-    while (!worklist.empty()) {
-      auto symName = worklist.pop_back_val();
-      if (importedFunctions.count(symName))
-        continue;
-      auto func = shapeLibrary->lookupSymbol<mlir::func::FuncOp>(symName);
-      assert(func && "broken shape library");
-      // Move the shape function from the library to the module this pass
-      // is running on. (this mutates the library, but we re-parse it each time
-      // so this is safe to do).
-      func->moveBefore(&module.getBody()->front());
-      // Set the visibility to private so that the shape functions go away
-      // nicely after we are done with them.
-      func.setVisibility(SymbolTable::Visibility::Private);
-      // Continue the DFS.
-      importedFunctions.insert(symName);
-      func.walk(
-          [&](func::CallOp op) { worklist.push_back(op.getCallee().str()); });
-    }
+    importLibraryFunctions(module, *library, std::move(functionsNeeded));
  }
 };
 } // namespace
--- a/lib/Dialect/Torch/Transforms/SimplifyAbstractInterpCalculationsUtils.cpp
+++ b/lib/Dialect/Torch/Transforms/SimplifyAbstractInterpCalculationsUtils.cpp
@ -0,0 +1,99 @@
+//===----------------------------------------------------------------------===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SimplifyAbstractInterpCalculationsUtils.h"
+
+using namespace mlir;
+using namespace mlir::torch;
+using namespace mlir::torch::Torch;
+
+LogicalResult Torch::updateCalculateOpResultTypes(Operation *calculateOp,
+                                                  int resultNum,
+                                                  Type newResultType,
+                                                  PatternRewriter &rewriter) {
+  Location loc = calculateOp->getLoc();
+  auto result = calculateOp->getResult(resultNum);
+  Type originalResultType = result.getType();
+  Type updatedType;
+  if (auto originalBaseTensorType =
+          originalResultType.template dyn_cast<BaseTensorType>()) {
+    // If we didn't get any new information, there is nothing left for us to do.
+    updatedType = meetTensorTypes(originalBaseTensorType,
+                                  newResultType.cast<BaseTensorType>());
+    if (!updatedType || updatedType == originalBaseTensorType)
+      return rewriter.notifyMatchFailure(
+          calculateOp, "New type information does not refine old type");
+  } else if (auto originalResultType =
+                 result.getType().template dyn_cast<Torch::NumberType>()) {
+    if (!newResultType.isa<Torch::FloatType, Torch::IntType>()) {
+      return rewriter.notifyMatchFailure(
+          calculateOp,
+          "Refinement of `NumberType` must be a `FloatType` or `IntType`");
+    }
+    updatedType = newResultType;
+  } else {
+    return rewriter.notifyMatchFailure(calculateOp,
+                                       "Unimplemented: Expected result type to "
+                                       "be `BaseTensorType` or `NumberType`");
+  }
+
+  // Update all the uses of the result type to the new type, if possible. Insert
+  // a TensorStaticInfoCastOp for any users that might require the exact
+  // previous type.
+  Value originalTypedValue;
+  for (OpOperand &use : llvm::make_early_inc_range(result.getUses())) {
+    if (use.getOwner()
+            ->hasTrait<mlir::torch::Torch::OpTrait::AllowsTypeRefinement>()) {
+      continue;
+    }
+    if (!originalTypedValue) {
+      rewriter.setInsertionPointAfter(calculateOp);
+      if (originalResultType.isa<BaseTensorType>()) {
+        originalTypedValue = rewriter.create<TensorStaticInfoCastOp>(
+            loc, originalResultType, result);
+      } else if (originalResultType.isa<Torch::NumberType>()) {
+        originalTypedValue =
+            rewriter.create<DerefineOp>(loc, originalResultType, result);
+      } else {
+        return rewriter.notifyMatchFailure(
+            calculateOp, "Unimplemented: Expected result type to "
+                         "be `BaseTensorType` or `NumberType`");
+      }
+    }
+    use.set(originalTypedValue);
+  }
+  result.setType(updatedType);
+
+  // Update the value yielded from the body to match the new result type. If we
+  // can refine the def in place, do that, otherwise insert a
+  // TensorStaticInfoCastOp.
+  Operation *yieldValues = calculateOp->getRegion(0).front().getTerminator();
+  OpOperand &use = yieldValues->getOpOperand(resultNum);
+  Value def = use.get();
+  Value newYieldedValue;
+  if (def.isa<OpResult>() &&
+      def.cast<OpResult>()
+          .getDefiningOp()
+          ->hasTrait<mlir::torch::Torch::OpTrait::AllowsTypeRefinement>()) {
+    newYieldedValue = def;
+  } else {
+    rewriter.setInsertionPoint(yieldValues);
+    if (updatedType.isa<BaseTensorType>()) {
+      newYieldedValue =
+          rewriter.create<TensorStaticInfoCastOp>(loc, updatedType, def);
+    } else {
+      newYieldedValue =
+          rewriter.create<PrimUncheckedCastOp>(loc, updatedType, def);
+    }
+  }
+  use.set(newYieldedValue);
+  newYieldedValue.setType(updatedType);
+
+  return success();
+}
--- a/lib/Dialect/Torch/Transforms/SimplifyAbstractInterpCalculationsUtils.h
+++ b/lib/Dialect/Torch/Transforms/SimplifyAbstractInterpCalculationsUtils.h
@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TORCHMLIR_DIALECT_TORCH_TRANSFORMS_SIMPLIFY_ABSTRACT_INTERP_CALCULATIONS_UTILS_H
+#define TORCHMLIR_DIALECT_TORCH_TRANSFORMS_SIMPLIFY_ABSTRACT_INTERP_CALCULATIONS_UTILS_H
+
+#include "mlir/IR/PatternMatch.h"
+#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
+
+namespace mlir {
+namespace torch {
+namespace Torch {
+
+// Updates the type of result `resultNum` of both `calculateOp` and the torch op
+// being wrapped by `calculateOp` to the type `newResultType`.
+LogicalResult updateCalculateOpResultTypes(Operation *calculateOp,
+                                           int resultNum, Type newResultType,
+                                           PatternRewriter &rewriter);
+
+} // namespace Torch
+} // namespace torch
+} // namespace mlir
+
+#endif // TORCHMLIR_DIALECT_TORCH_TRANSFORMS_SIMPLIFY_ABSTRACT_INTERP_CALCULATIONS_UTILS_H
--- a/lib/Dialect/Torch/Transforms/SimplifyDtypeCalculations.cpp
+++ b/lib/Dialect/Torch/Transforms/SimplifyDtypeCalculations.cpp
@ -0,0 +1,209 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+
+#include "SimplifyAbstractInterpCalculationsUtils.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "torch-mlir/Dialect/Torch/Transforms/Passes.h"
+#include "torch-mlir/Dialect/Torch/Utils/TorchUpstream.h"
+#include "torch-mlir/Dialect/Torch/Utils/Utils.h"
+
+using namespace mlir;
+using namespace mlir::torch;
+using namespace mlir::torch::Torch;
+
+static LogicalResult refineDtypeCalculateResult(DtypeCalculateOp op,
+                                                int resultNum,
+                                                PatternRewriter &rewriter) {
+  auto yieldDtypes = op.getCalculation().front().getTerminator();
+  auto dtype = yieldDtypes->getOperand(resultNum);
+  auto result = op->getResult(resultNum);
+
+  int64_t dtypeInt;
+  if (!matchPattern(dtype, m_TorchConstantInt(&dtypeInt)))
+    return rewriter.notifyMatchFailure(
+        op, "Expected result from the DtypeCalculateOp calculation to be a "
+            "constant int");
+  auto dtypeScalarType = static_cast<torch_upstream::ScalarType>(dtypeInt);
+
+  // Calculate the updated type incorporating the new information.
+  Type impliedTypeFromDtype;
+  if (result.getType().isa<Torch::NumberType>()) {
+    FailureOr<Type> torchType =
+        getTorchTypeForScalarType(op->getContext(), dtypeScalarType);
+    if (failed(torchType)) {
+      return rewriter.notifyMatchFailure(
+          op, "Failed to convert result dtype to `Torch::FloatType` or "
+              "`Torch::IntType`");
+    }
+    impliedTypeFromDtype = *torchType;
+  } else if (auto originalResultType =
+                 result.getType().dyn_cast<BaseTensorType>()) {
+    impliedTypeFromDtype =
+        originalResultType.cast<BaseTensorType>().getWithSizesAndDtype(
+            originalResultType.getOptionalSizes(),
+            getTypeForScalarType(op->getContext(), dtypeScalarType));
+  } else {
+    return rewriter.notifyMatchFailure(op,
+                                       "Unimplemented: Expected result type to "
+                                       "be `BaseTensorType` or `NumberType`");
+  }
+  return updateCalculateOpResultTypes(op, resultNum, impliedTypeFromDtype,
+                                      rewriter);
+}
+
+namespace {
+// This pattern propagates information out of the dtype calculation region and
+// into the DtypeCalculateOp result types.
+class RefineDtypeCalculateOp : public OpRewritePattern<DtypeCalculateOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+  LogicalResult matchAndRewrite(DtypeCalculateOp op,
+                                PatternRewriter &rewriter) const override {
+    LogicalResult result = failure();
+    for (int i = 0, e = op->getNumResults(); i != e; i++) {
+      if (succeeded(refineDtypeCalculateResult(op, i, rewriter)))
+        result = success();
+    }
+    return result;
+  }
+};
+} // namespace
+
+namespace {
+class DecomposePromoteDtypesOp : public OpRewritePattern<PromoteDtypesOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+  LogicalResult matchAndRewrite(PromoteDtypesOp op,
+                                PatternRewriter &rewriter) const override {
+    SmallVector<Optional<int64_t>> ranks;
+    SmallVector<int64_t> dtypes;
+    if (!matchPattern(op.getRanks(), m_TorchListOfOptionalConstantInts(ranks))) {
+      return rewriter.notifyMatchFailure(
+          op, "Expected `ranks` to be a list of optional constant ints");
+    }
+
+    if (!matchPattern(op.getDtypes(), m_TorchListOfConstantInts(dtypes))) {
+      return rewriter.notifyMatchFailure(
+          op, "Expected `dtypes` to be a list of constant ints");
+    }
+
+    if (ranks.empty() || dtypes.empty()) {
+      return rewriter.notifyMatchFailure(
+          op, "`ranks` list and `dtypes` list must be non-empty");
+    }
+
+    if (ranks.size() != dtypes.size()) {
+      return rewriter.notifyMatchFailure(
+          op, "`ranks` list and `dtypes` list must have the same size");
+    }
+
+    torch_upstream::ResultTypeState state{};
+    for (auto ranksAndDtypes : llvm::zip(ranks, dtypes)) {
+      Optional<int64_t> rank;
+      int64_t dtype;
+      std::tie(rank, dtype) = ranksAndDtypes;
+      auto scalarType = static_cast<torch_upstream::ScalarType>(dtype);
+
+      bool isScalarOnlyOp = llvm::all_of(
+          ranks, [](Optional<int64_t> rank) { return !rank.has_value(); });
+
+      if (!rank.has_value()) {
+        // If `rank` does not have a value, then we are dealing with a scalar
+        // input. For the type promotion, the behavior of a scalar argument is
+        // dependent on whether the op is performing an operation with only
+        // scalars (such as AtenAddOp) or with scalars and tensors (such as
+        // AtenAddScalarOp). Therefore, we convert back to the original torch
+        // type of the scalar first, and then determine the right scalar type to
+        // use for promotion based on whether the op involves only scalars or
+        // scalars and tensors.
+        FailureOr<Type> torchType =
+            getTorchTypeForScalarType(op->getContext(), scalarType);
+        if (failed(torchType)) {
+          return rewriter.notifyMatchFailure(
+              op, "Dtypes for arguments scalars must be convertible to "
+                  "`Torch::FloatType` or `Torch::IntType`");
+        }
+        Type builtinType = isScalarOnlyOp
+                               ? getBuiltInTypeForTorchScalar(*torchType)
+                               : getDefaultDtypeForTorchScalar(*torchType);
+        scalarType = getScalarTypeForType(builtinType);
+        state.wrappedResult =
+            promote_skip_undefined(state.wrappedResult, scalarType);
+      } else if (rank.value() == 0) {
+        state.zeroResult = promote_skip_undefined(state.zeroResult, scalarType);
+      } else if (rank.value() > 0) {
+        state.dimResult = promote_skip_undefined(state.dimResult, scalarType);
+      } else {
+        return rewriter.notifyMatchFailure(op, "Rank should not be negative");
+      }
+    }
+
+    auto resultType = static_cast<int64_t>(result_type(state));
+    rewriter.replaceOpWithNewOp<ConstantIntOp>(
+        op, rewriter.getI64IntegerAttr(resultType));
+    return success();
+  }
+};
+} // namespace
+
+namespace {
+class RefineNumToTensorScalarOpType
+    : public OpRewritePattern<PrimNumToTensorScalarOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+  LogicalResult matchAndRewrite(PrimNumToTensorScalarOp op,
+                                PatternRewriter &rewriter) const override {
+    auto originalResultType = op.getResult().getType().cast<BaseTensorType>();
+    if (originalResultType.hasDtype())
+      return rewriter.notifyMatchFailure(
+          op, "`PrimNumToTensorScalarOp` already has a dtype");
+
+    Type inputType = getBuiltInTypeForTorchScalar(op.getA().getType());
+    auto impliedTypeFromInputType =
+        originalResultType.cast<BaseTensorType>()
+            .getWithSizesAndDtype(originalResultType.getOptionalSizes(),
+                                  inputType)
+            .cast<BaseTensorType>();
+
+    op.getResult().setType(impliedTypeFromInputType);
+    return success();
+  }
+};
+} // namespace
+
+namespace {
+class SimplifyDtypeCalculationsPass
+    : public SimplifyDtypeCalculationsBase<SimplifyDtypeCalculationsPass> {
+  void runOnOperation() override {
+    MLIRContext *context = &getContext();
+
+    RewritePatternSet patterns(context);
+    patterns.insert<RefineDtypeCalculateOp>(context);
+    patterns.insert<DecomposePromoteDtypesOp>(context);
+    patterns.insert<RefineNumToTensorScalarOpType>(context);
+
+    // TODO: Debug visitation order to make this more efficient.
+    // A single linear scan should suffice.
+    GreedyRewriteConfig config;
+    config.useTopDownTraversal = true;
+    config.maxIterations = GreedyRewriteConfig::kNoIterationLimit;
+    if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns),
+                                            config))) {
+      return signalPassFailure();
+    }
+  }
+};
+} // namespace
+
+std::unique_ptr<OperationPass<func::FuncOp>>
+mlir::torch::Torch::createSimplifyDtypeCalculationsPass() {
+  return std::make_unique<SimplifyDtypeCalculationsPass>();
+}
--- a/lib/Dialect/Torch/Transforms/SimplifyShapeCalculations.cpp
+++ b/lib/Dialect/Torch/Transforms/SimplifyShapeCalculations.cpp
@ -9,20 +9,11 @@

 #include "PassDetail.h"

-#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "SimplifyAbstractInterpCalculationsUtils.h"
 #include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Parser/Parser.h"
-#include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/InliningUtils.h"
-#include "torch-mlir/Dialect/Torch/IR/TorchDialect.h"
-#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
 #include "torch-mlir/Dialect/Torch/Transforms/Passes.h"
 #include "torch-mlir/Dialect/Torch/Utils/Utils.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSet.h"

 using namespace mlir;
 using namespace mlir::torch;
@ -40,10 +31,12 @@ public:
    Location loc = op->getLoc();
    MLIRContext *context = op->getContext();
    if (!op.isForLike())
-      return failure();
+      return rewriter.notifyMatchFailure(op, "Loop is not for-like");
    int64_t maxTripCount;
    if (!matchPattern(op.getMaxTripCount(), m_TorchConstantInt(&maxTripCount)))
-      return failure();
+      return rewriter.notifyMatchFailure(
+          op, "Expected `maxTripCount` to be a constant int");
+    ;
    SmallVector<Value> indices;
    for (int64_t i = 0; i < maxTripCount; i++) {
      // TODO: Add convenience builder.
@ -149,7 +142,8 @@ public:
    for (Operation *user : usersToInterpret) {
      if (auto append = dyn_cast<AtenAppendTOp>(user)) {
        if (!append.use_empty())
-          return failure();
+          return rewriter.notifyMatchFailure(
+              op, "Expected `AtenAppendTOp` to not have users");
        if (append.getSelf() == op) {
          runningList.push_back(append.getEl());
          generatedNewLiteral = true;
@ -159,13 +153,16 @@ public:
      }
      if (auto insert = dyn_cast<AtenInsertTOp>(user)) {
        if (!insert.use_empty())
-          return failure();
+          return rewriter.notifyMatchFailure(
+              op, "Expected `AtenInsertTOp` to not have users");
        int64_t index;
        if (!matchPattern(insert.getIdx(), m_TorchConstantInt(&index)))
-          return failure();
+          return rewriter.notifyMatchFailure(
+              op, "Expected `idx` of `AtenInsertTOp` to be a constant int");
        // The index might be statically out of bounds.
        if (index < 0 || index > static_cast<int64_t>(runningList.size()))
-          return failure();
+          return rewriter.notifyMatchFailure(
+              op, "Index in `AtenInsertTOp` is out of bounds");
        if (insert.getSelf() == op) {
          runningList.insert(runningList.begin() + index, insert.getEl());
          generatedNewLiteral = true;
@ -175,13 +172,15 @@ public:
      }
      if (auto setItem = dyn_cast<Aten_SetItemTOp>(user)) {
        if (!setItem.use_empty())
-          return failure();
+          return rewriter.notifyMatchFailure(
+              op, "Expected `Aten_SetItemTOp` to not have users");
        llvm::Optional<int64_t> indexOpt =
            matchLegalConstantIndexIntoListOfSize(setItem.getIdx(),
                                                  runningList.size());
        // The index might be statically out of bounds.
        if (!indexOpt)
-          return failure();
+          return rewriter.notifyMatchFailure(
+              op, "Index in `Aten_SetItemTOp` is out of bounds");
        if (setItem.getL() == op) {
          runningList[*indexOpt] = setItem.getEl();
          generatedNewLiteral = true;
@ -196,7 +195,7 @@ public:
    }

    if (!generatedNewLiteral)
-      return failure();
+      return rewriter.notifyMatchFailure(op, "No new literal created");

    // Rewrite all users to use the appropriate list literals.
    Value latestLiteral = rewriter.create<PrimListConstructOp>(
@ -285,11 +284,10 @@ public:
 };
 } // namespace

-static void refineShapeCalculateResult(ShapeCalculateOp op, int resultNum,
-                                       PatternRewriter &rewriter,
-                                       bool &madeChange) {
-  auto yieldValues = op.getBody().front().getTerminator();
-  auto yieldShapes = op.getShapeCalculation().front().getTerminator();
+static LogicalResult refineShapeCalculateResult(ShapeCalculateOp op,
+                                                int resultNum,
+                                                PatternRewriter &rewriter) {
+  auto yieldShapes = op.getCalculation().front().getTerminator();
  auto shape = yieldShapes->getOperand(resultNum);
  auto result = op->getResult(resultNum);

@ -298,7 +296,9 @@ static void refineShapeCalculateResult(ShapeCalculateOp op, int resultNum,
  // much as possible to literals.
  auto listConstruct = shape.getDefiningOp<PrimListConstructOp>();
  if (!listConstruct)
-    return;
+    return rewriter.notifyMatchFailure(
+        op, "Expected result from ShapeCalculateOp calculation to be a "
+            "`PrimListConstructOp`");
  llvm::BitVector clobberedElements(listConstruct->getNumOperands());
  // Analyze the users to determine if we can refine the shape.
  for (Operation *user : listConstruct->getUsers()) {
@ -320,7 +320,7 @@ static void refineShapeCalculateResult(ShapeCalculateOp op, int resultNum,
      continue;
    }
    // An unhandled op! We can't make any assumptions about the shape.
-    return;
+    return rewriter.notifyMatchFailure(op, "Unhandled op that mutates lists");
  }

  // Construct the list of sizes implied by the yielded shape.
@ -334,55 +334,15 @@ static void refineShapeCalculateResult(ShapeCalculateOp op, int resultNum,
      sizes.push_back(kUnknownSize);
  }

-  // Calculate the updated type incorporating the new shape information.
-  Type originalResultType = result.getType();
+  auto originalResultType = result.getType().cast<BaseTensorType>();
  auto impliedTypesFromShape =
-      originalResultType.cast<BaseTensorType>().getWithSizesAndDtype(
-          makeArrayRef(sizes), nullptr);
-  auto updatedType =
-      meetTensorTypes(originalResultType.cast<BaseTensorType>(),
-                      impliedTypesFromShape.cast<BaseTensorType>());
-  // If we didn't get any new information, there is nothing left for us to do.
-  if (!updatedType || updatedType == originalResultType)
-    return;
+      originalResultType.cast<BaseTensorType>()
+          .getWithSizesAndDtype(makeArrayRef(sizes),
+                                originalResultType.getOptionalDtype())
+          .cast<BaseTensorType>();

-  // Update all the uses of the result type to the new type, if possible. Insert
-  // a TensorStaticInfoCastOp for any users that might require the exact
-  // previous type.
-  Value originalTypedValue;
-  for (OpOperand &use : llvm::make_early_inc_range(result.getUses())) {
-    if (use.getOwner()
-            ->hasTrait<mlir::torch::Torch::OpTrait::AllowsTypeRefinement>()) {
-      continue;
-    }
-    if (!originalTypedValue) {
-      rewriter.setInsertionPointAfter(op);
-      originalTypedValue = rewriter.create<TensorStaticInfoCastOp>(
-          op->getLoc(), originalResultType, result);
-    }
-    use.set(originalTypedValue);
-  }
-  result.setType(updatedType);
-  madeChange = true;
-
-  // Update the value yielded from the body to match the new result type. If we
-  // can refine the def in place, do that, otherwise insert a
-  // TensorStaticInfoCastOp.
-  OpOperand &use = op.getBody().front().getTerminator()->getOpOperand(resultNum);
-  Value def = use.get();
-  Value newYieldedValue;
-  if (def.isa<OpResult>() &&
-      def.cast<OpResult>()
-          .getDefiningOp()
-          ->hasTrait<mlir::torch::Torch::OpTrait::AllowsTypeRefinement>()) {
-    newYieldedValue = def;
-  } else {
-    rewriter.setInsertionPoint(yieldValues);
-    newYieldedValue =
-        rewriter.create<TensorStaticInfoCastOp>(op->getLoc(), updatedType, def);
-  }
-  use.set(newYieldedValue);
-  newYieldedValue.setType(updatedType);
+  return updateCalculateOpResultTypes(op, resultNum, impliedTypesFromShape,
+                                      rewriter);
 }

 namespace {
@ -393,10 +353,11 @@ public:
  using OpRewritePattern::OpRewritePattern;
  LogicalResult matchAndRewrite(ShapeCalculateOp op,
                                PatternRewriter &rewriter) const override {
-    bool madeChange = false;
+    LogicalResult result = failure();
    for (int i = 0, e = op->getNumResults(); i != e; i++)
-      refineShapeCalculateResult(op, i, rewriter, madeChange);
-    return success(madeChange);
+      if (succeeded(refineShapeCalculateResult(op, i, rewriter)))
+        result = success();
+    return result;
  }
 };
 } // namespace
--- a/lib/Dialect/Torch/Utils/Utils.cpp
+++ b/lib/Dialect/Torch/Utils/Utils.cpp
@ -100,19 +100,46 @@ Type Torch::getTypeForScalarType(
  }
 }

-Type Torch::getTorchTypeForScalarType(MLIRContext *context,
-                                      torch_upstream::ScalarType dtypeInt) {
+FailureOr<Type>
+Torch::getTorchTypeForScalarType(MLIRContext *context,
+                                 torch_upstream::ScalarType dtypeInt) {
  switch (dtypeInt) {
  case torch_upstream::ScalarType::Double:
    return Torch::FloatType::get(context);
  case torch_upstream::ScalarType::Long:
    return Torch::IntType::get(context);
  default:
-    llvm::report_fatal_error(
-        "Unsupported scalar type to Torch type conversion");
+    return failure();
  }
 }

+Type Torch::getDefaultDtypeForTorchScalar(Type type) {
+  MLIRContext *context = type.getContext();
+  if (type.isa<Torch::FloatType>()) {
+    // For now, use float32 which is the initial default dtype returned by
+    // `torch.get_default_dtype`.
+    return Float32Type::get(context);
+  }
+  if (type.isa<Torch::IntType>())
+    return IntegerType::get(context, 64, IntegerType::Signed);
+  if (type.isa<Torch::BoolType>())
+    return IntegerType::get(context, 1);
+  llvm_unreachable(
+      "getDefaultDtypeForTorchScalar called on an unsupported type");
+}
+
+Type Torch::getBuiltInTypeForTorchScalar(Type type) {
+  MLIRContext *context = type.getContext();
+  if (type.isa<Torch::FloatType>())
+    return Float64Type::get(context);
+  if (type.isa<Torch::IntType>())
+    return IntegerType::get(context, 64, IntegerType::Signed);
+  if (type.isa<Torch::BoolType>())
+    return IntegerType::get(context, 1);
+  llvm_unreachable(
+      "getBuiltInTypeForTorchScalar called on an unsupported type");
+}
+
 Value Torch::getDtypeIntValueForType(PatternRewriter &rewriter, Location loc,
                                     Type dtype) {
  int intType = (int)getScalarTypeForType(dtype);
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@ -104,7 +104,8 @@ endif()

 ################################################################################
 # Custom op example
-# Required for running the update_torch_ods.sh and update_shape_lib.sh scripts.
+# Required for running the update_torch_ods.sh and update_abstract_interp_lib.sh
+# scripts.
 ################################################################################

 # add_subdirectory(torch_mlir/_torch_mlir_custom_op_example)
--- a/python/torch_mlir/_torch_mlir_custom_op_example/README.md
+++ b/python/torch_mlir/_torch_mlir_custom_op_example/README.md
@ -5,12 +5,12 @@ If you're reading this, you're likely looking to create or support a third-party
 This isn't much different than [adding a new PyTorch op to torch-mlir](https://github.com/llvm/torch-mlir/wiki/Torch-ops-E2E-implementation).
 You'll still go through the exact same process, with just a small change:

- - Before running `update_torch_ods.sh` or `update_shape_lib.sh`, you'll want to set the `TORCH_MLIR_EXT_PYTHONPATH` to point to wherever your extension lives and the `TORCH_MLIR_EXT_MODULES` to the name of the python module.
+ - Before running `update_torch_ods.sh` or `update_abstract_interp_lib.sh`, you'll want to set the `TORCH_MLIR_EXT_PYTHONPATH` to point to wherever your extension lives and the `TORCH_MLIR_EXT_MODULES` to the name of the python module.

 For instance, let's say you've written a python package called `my_torch_ops`.
 If `my_torch_ops` lives in `/example/subdirectory/`, then you'll want to set `TORCH_MLIR_EXT_PYTHONPATH=/example/subdirectory` and `TORCH_MLIR_EXT_MODULES=my_torch_ops`.
 If you've installed your package (with `pip`, for instance), then you'll only need to set `TORCH_MLIR_EXT_MODULES=my_torch_ops`.
-Note that the `update_torch_ods.sh` and `update_shape_lib.sh` scripts do not use the `PYTHONPATH` environment variable in your current shell.
+Note that the `update_torch_ods.sh` and `update_abstract_interp_lib.sh` scripts do not use the `PYTHONPATH` environment variable in your current shell.
 This is on purpose, but it means that you either need to set `TORCH_MLIR_EXT_PYTHONPATH` to include your package or to include the paths set in your shell's `PYTHONPATH` variable.
 If you have more than one PyTorch extension, you can add them all by including each path in `TORCH_MLIR_EXT_PYTHONPATH` separated by colons (`:`) and each module in `TORCH_MLIR_EXT_MODULES` separated by commas (`,`).

--- a/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/abstract_interp_lib_gen.py
+++ b/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/abstract_interp_lib_gen.py
--- a/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/library_generator.py
+++ b/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/library_generator.py
@ -0,0 +1,189 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# Also available under a BSD-style license. See LICENSE.
+
+import inspect
+import re
+from typing import List, Optional, Union
+
+import torch
+
+from torch_mlir.dialects.torch.importer.jit_ir import ModuleBuilder
+from torch_mlir.passmanager import PassManager
+
+from .registry import Registry
+
+def get_dtype_of_scalar(scalar: Union[int, float]) -> int:
+    # This is hacky. `NumToTensor` is the only PyTorch op for scalars
+    # that when `jit.script`ed converts a float scalar to a tensor
+    # with dtype that corresponds to Python's `float`.
+    #
+    # See definition of `NumToTensor`: https://github.com/pytorch/pytorch/blob/c09929659ce8ba2f1b7b2f6e50084ccbf854d44b/torch/csrc/jit/ir/ir.cpp#L1850
+    # Definition of `fromNumberType` used by `NumToTensor` to
+    # calculate dtype: https://github.com/pytorch/pytorch/blob/c09929659ce8ba2f1b7b2f6e50084ccbf854d44b/aten/src/ATen/core/jit_type.h#L1679
+    #
+    # Note that doing something like
+    # `torch.tensor(scalar).to(type(scalar)).dtype` does not work because
+    # `torch.tensor` needs to know the type of the input at compile time
+    # and there is no `jit.script` support for Python's `type`.
+    #
+    # TODO: A better way to handle this would be to add support for
+    # `isinstance` in torch-mlir, which requires adding a new torch dialect
+    # op.
+    return torch.ops.prim.NumToTensor(scalar).dtype
+
+# When we import into torch-mlir, only the calls to
+# `__torch_mlir_internal_promote_dtypes` are used to generate the
+# `torch.promote_dtypes` ops. Therefore, to avoid generating extra
+# MLIR code in the library, all calls made inside
+# `__torch_mlir_internal_promote_dtypes` are `jit.ignore`d.
+@torch.jit.ignore
+def _get_scalar_with_dtype(dtype: torch.dtype) -> Union[int, float]:
+    if dtype == torch.int64:
+        return 0
+    elif dtype == torch.float64:
+        return 0.0
+    else:
+        raise ValueError(f"Unhandled dtype: {dtype}")
+
+@torch.jit.ignore
+def _promote_scalar_tensor(scalar_dtype: torch.dtype, tensor_rank: int,
+                           tensor_dtype: torch.dtype) -> torch.dtype:
+    scalar = _get_scalar_with_dtype(scalar_dtype)
+    tensor = torch.rand([1] * tensor_rank).to(tensor_dtype)
+    return torch.result_type(scalar, tensor)
+
+@torch.jit.ignore
+def _promote_tensor_tensor(lhs_rank: int, lhs_dtype: torch.dtype,
+                           rhs_rank: int, rhs_dtype: torch.dtype) -> torch.dtype:
+    lhs_tensor = torch.rand([1] * lhs_rank).to(lhs_dtype)
+    rhs_tensor = torch.rand([1] * rhs_rank).to(rhs_dtype)
+    return torch.result_type(lhs_tensor, rhs_tensor)
+
+@torch.jit.ignore
+def _promote_scalar_scalar(lhs_dtype: torch.dtype,
+                           rhs_dtype: torch.dtype) -> torch.dtype:
+    # When `torch.result_type` is used on two scalars, the result
+    # dtype is the dtype one would expect for an op with signature
+    # (Scalar, Scalar) -> (Tensor). However, once a module gets
+    # jit.scripted, all math ops that work on scalars becomes
+    # (Scalar, Scalar) -> (Scalar) ops. So to get the right result
+    # dtype, we use the tensor-tensor promotion rules.
+    return _promote_tensor_tensor(0, lhs_dtype, 0, rhs_dtype)
+
+def promote_dtypes(ranks: List[Optional[int]],
+                   dtypes: List[torch.dtype]) -> torch.dtype:
+    """Apply PyTorch dtype promotion rules and return the result type.
+    """
+    return __torch_mlir_internal_promote_dtypes(ranks, dtypes)
+
+def __torch_mlir_internal_promote_dtypes(ranks: List[Optional[int]],
+                                         dtypes: List[torch.dtype]
+                                         ) -> torch.dtype:
+    """Apply PyTorch dtype promotion rules and return the result type.
+
+    This function serves two purposes:
+    1. It is handled in a special way during import into Torch-MLIR,
+       generating `torch.promote_dtypes` ops
+    2. Computes the actual promotion logic at the Python level in order
+       to be able to test dtype calculation functions against PyTorch
+    """
+    lhs_optional_rank = ranks[0]
+    lhs_dtype = dtypes[0]
+    for rhs_optional_rank, rhs_dtype in zip(ranks, dtypes):
+        if lhs_optional_rank is None and rhs_optional_rank is None:
+            lhs_dtype = _promote_scalar_scalar(lhs_dtype, rhs_dtype)
+        elif lhs_optional_rank is None and rhs_optional_rank is not None:
+            lhs_dtype = _promote_scalar_tensor(
+                lhs_dtype, rhs_optional_rank, rhs_dtype)
+            lhs_optional_rank = rhs_optional_rank
+        elif lhs_optional_rank is not None and rhs_optional_rank is None:
+            lhs_dtype = _promote_scalar_tensor(
+                rhs_dtype, lhs_optional_rank, lhs_dtype)
+        elif lhs_optional_rank is not None and rhs_optional_rank is not None:
+            lhs_dtype = _promote_tensor_tensor(
+                lhs_optional_rank, lhs_dtype, rhs_optional_rank, rhs_dtype)
+            lhs_optional_rank = max(lhs_optional_rank, rhs_optional_rank)
+    return lhs_dtype
+
+def not_present_in_registry(f):
+    """Decorator for abstract interpretation functions not present in the registry.
+
+    This can happen for "valsem" ops that we have in Torch-MLIR, such as
+    torch.valsem.aten.fill.Scalar, which are consistent with PyTorch conventions
+    (e.g. being the value-semantic correspondent of torch.aten.fill_.Scalar),
+    but that for whatever reason are not present in PyTorch. Such ops are useful
+    to keep certain passes within Torch-MLIR as consistent as possible.
+    For such ops, in the shape library generator, we treat them as if they
+    were registered torch ops (so we don't put "valsem" on them), to keep the
+    generator consistent.
+
+    To check if this decorator has been applied, use
+    `hasattr(f, "_not_present_in_registry")`.
+    """
+    f._not_present_in_registry = None
+    return f
+
+def _verify_signature_matches_registry(f, registry: Registry):
+    source = inspect.getsource(f)
+    signature = None
+    for line in source.splitlines():
+        if line.startswith("def "):
+            signature = line
+            break
+    assert signature is not None, f"Could not find signature for {f.__name__}"
+    assert "〡" in signature, f"Malformed signature {signature}. Signature missing the character `〡`"
+    function_name, function_kind = f.__name__.split("〡")
+    atoms = function_name.split("〇")
+    if len(atoms) == 2:
+        atoms += [""]
+    operator = registry.get_by_triple(tuple(atoms))
+    if function_kind == "shape":
+        expected_signature = operator.get_shape_function_signature()
+    elif function_kind == "dtype":
+        expected_signature = operator.get_dtype_function_signature()
+    elif function_kind == "decomposition":
+        expected_signature = operator.get_decomposition_function_signature()
+    else:
+        raise ValueError(f"Invalid Op signature function kind: '{function_kind}'")
+    if signature != expected_signature:
+        raise ValueError(f"Signature mismatch for {f.__name__!r}: expected {expected_signature!r}, got {signature!r}")
+
+def generate_library(globals_) -> str:
+    """Convert all op functions in `globals()` into MLIR."""
+    mb = ModuleBuilder()
+    # We use the registry to ensure that the shape functions are consistent
+    # with the ops.
+    registry = Registry.load()
+    for k, v in globals_.items():
+        if "〇" not in k:
+            continue
+        if not hasattr(v, "_not_present_in_registry"):
+            _verify_signature_matches_registry(v, registry)
+        # Add it to the compilation unit.
+        torch.jit.script(v)
+    for function in torch.jit._state._python_cu.get_functions():
+        # Calls to the function `__torch_mlir_internal_promote_dtypes`
+        # will get converted to the torch-dialect op `torch.promote_dtypes`
+        # during import, so there is no need to import the actual
+        # function.
+        if function.name == "__torch_mlir_internal_promote_dtypes":
+            continue
+        mb.import_function(function)
+    # Clean up the IR a bit before writing it out.
+    pm = PassManager.parse("builtin.module(canonicalize)", context=mb.module.context)
+    pm.run(mb.module)
+    # Munge the IR a bit to make it more systematically accessible.
+    asm = mb.module.operation.get_asm()
+    # We'd like a unique function prefix to avoid collisions with user-
+    # defined symbols. Since all of our shape functions conveniently have
+    # a `〇` in them, we replace the torch namespace with our prefix. E.g.:
+    # __torch__.aten〇add〇Scalar -> __torch_mlir_shape_fn.aten〇add〇Scalar
+    asm = re.sub(r"__torch__\.([^.(]+)\\E3\\80\\87([^.(]+)\\E3\\80\\A1([^.(\"]+)",
+                 r"__torch_mlir_\3_fn.\1\\E3\\80\\87\2",
+                 asm)
+
+    # Put the `〇` back to a regular `.`.
+    asm = asm.replace("\\E3\\80\\87", ".")
+    return asm
--- a/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/registry.py
+++ b/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/registry.py
@ -5,7 +5,7 @@

 """Access to the Torch JIT operator registry."""

-from typing import Dict, List, Tuple, Union
+from typing import Dict, List, Tuple, Union, Callable

 import io
 import itertools
@ -15,6 +15,48 @@ from .utils import TextEmitter
 # Note that this utility exists only in the c-extension.
 from torch_mlir._mlir_libs._jit_ir_importer import get_registered_ops # pytype: disable=import-error

+def _rename_python_keyword_parameter_name(parameter_name: str) -> str:
+    if parameter_name == "from":
+        parameter_name = "from_" # Avoid using a Python keyword.
+    return parameter_name
+
+def _get_default_value(arg: "SIG_ATTR_TYPE") -> str:
+    default = ""
+    if "default_debug" in arg:
+        if "List" in arg["pytype"]:
+            # TorchScript doesn't allow lists as default parameters due
+            # to the weird Python semantics of mutable default
+            # arguments. So munge them into tuples, which work
+            # fine here. We only need these to simplify the invocation
+            # of the abstract interpretation functions as valid Python for
+            # testing against the real ops, and tuples work fine in all
+            # the places this kicks in (e.g. conv dilations -- we aren't
+            # mutating those lists).
+            default_debug = arg["default_debug"].replace(
+                '[', '(').replace(']', ')')
+        elif arg["pytype"] == "str":
+            default_debug = repr(arg["default_debug"]).replace("'", '"')
+        else:
+            default_debug = arg["default_debug"]
+        default = f" = {default_debug}"
+    return default
+
+def _pytype_to_fn_pytype_common(pytype: str) -> str:
+    if "number" in pytype:
+        return pytype.replace("number", "Union[int, float]")
+    # `torch.device` is lowercase.
+    if pytype == "Device":
+        return "device"
+    if pytype == "Optional[Device]":
+        return "Optional[device]"
+    # Generators don't contribute to shapes, and are not scriptable currently.
+    # So just hack them to be passed as "Any".
+    if pytype == "Generator":
+        return "Any"
+    if pytype == "Optional[Generator]":
+        return "Any"
+    return pytype
+
 def _pytype_to_shape_fn_pytype(pytype: str) -> str:
    """Convert a JitOperator pytype to the type relevant in shape functions.

@ -28,31 +70,29 @@ def _pytype_to_shape_fn_pytype(pytype: str) -> str:
    # logically real-valued), so it doesn't really matter much, and
    # `float` helps make it clearer that it's not part of the shape
    # function.
-    if pytype == "number":
-        return "float"
-    if pytype == "Optional[number]":
-        return "Optional[float]"
-    # `torch.device` is lowercase.
-    if pytype == "Device":
-        return "device"
-    if pytype == "Optional[Device]":
-        return "Optional[device]"
-    # Shape functions only care about the shape of tensors.
-    if pytype == "Tensor":
-        return "List[int]"
-    if pytype == "Optional[Tensor]":
-        return "Optional[List[int]]"
-    if pytype == "List[Tensor]":
-        return "List[List[int]]"
-    if pytype == "List[Optional[Tensor]]":
-        return "List[Optional[List[int]]]"
-    # Generators don't contribute to shapes, and are not scriptable currently.
-    # So just hack them to be passed as "Any".
-    if pytype == "Generator":
-        return "Any"
-    if pytype == "Optional[Generator]":
-        return "Any"
-    return pytype
+    # TODO: This is no longer needed. Scalars can be represented by
+    # Union[int, float].
+    if "number" in pytype:
+        return pytype.replace("number", "float")
+    if "Tensor" in pytype:
+        return pytype.replace("Tensor", "List[int]")
+    return _pytype_to_fn_pytype_common(pytype)
+
+def _pytype_to_dtype_fn_pytype(pytype: str) -> str:
+    """Convert a JitOperator pytype to the type relevant in dtype functions.
+
+    In particular, this converts `Tensor` to `int`, along with a few
+    other special cases.
+    """
+    # Dtype functions only care about the rank and dtype of tensors.
+    if "Tensor" in pytype:
+        return pytype.replace("Tensor", "int")
+    return _pytype_to_fn_pytype_common(pytype)
+
+def _pytype_to_decomposition_fn_pytype(pytype: str) -> str:
+    """Convert a JitOperator pytype to the type relevant in decomposition functions.
+    """
+    return _pytype_to_fn_pytype_common(pytype)

 class JitOperator:
    """Information about a single registered `torch::jit::Operator`"""
@ -142,6 +182,23 @@ class JitOperator:
        cpp_class_name = cpp_class_name.lstrip("_")
        return op_name, cpp_class_name

+    def _get_function_signature(self, function_kind: str,
+                                parameter_decl_builder: Callable["SIG_ATTR_TYPE", str],
+                                ret_decl_builder: Callable["SIG_ATTR_TYPE", str]) -> str:
+        mlir_op_name, _ = self.get_mlir_names()
+        # Replace `.` with a valid Python identifier character.
+        # `〇` vaguely looks like `.`.
+        def_name = "〇".join(mlir_op_name.split("."))
+        def_name += f"〡{function_kind}"
+        parameter_decls = list(map(parameter_decl_builder, self.arguments))
+        ret_decls = list(map(ret_decl_builder, self.returns))
+        parameters = ", ".join(parameter_decls)
+        result = ", ".join(ret_decls)
+        if len(ret_decls) >= 2:
+            result = f"Tuple[{result}]"
+
+        return f"def {def_name}({parameters}) -> {result}:"
+
    def get_shape_function_signature(self):
        """Gets the Python function signature for this op's shape function.

@ -150,45 +207,66 @@ class JitOperator:
        ops have extra default arguments and stuff that are tedious to write out
        right.
        """
-        mlir_op_name, _ = self.get_mlir_names()
-        # Replace `.` with a valid Python identifier character.
-        # `〇` vaguely looks like `.`.
-        def_name = "〇".join(mlir_op_name.split("."))
-        parameter_decls = []
-        for arg in self.arguments:
+        def parameter_decl_builder(arg: "SIG_ATTR_TYPE") -> str:
            pytype = _pytype_to_shape_fn_pytype(arg["pytype"])
-            default = ""
-            if "default_debug" in arg:
-                if "List" in arg["pytype"]:
-                    # TorchScript doesn't allow lists as default parameters due
-                    # to the weird Python semantics of mutable default
-                    # arguments. So munge them into tuples, which work
-                    # fine here. We only need these to simplify the invocation
-                    # of the shape functions as valid Python for testing against
-                    # the real ops, and tuples work fine in all the places this
-                    # kicks in (e.g. conv dilations -- we aren't mutating those
-                    # lists).
-                    default_debug = arg["default_debug"].replace(
-                        '[', '(').replace(']', ')')
-                elif arg["pytype"] == "str":
-                    default_debug = repr(arg["default_debug"]).replace("'", '"')
-                else:
-                    default_debug = arg["default_debug"]
-                default = f" = {default_debug}"
-            parameter_name = arg["name"]
-            if parameter_name == "from":
-                parameter_name = "from_" # Avoid using a Python keyword.
-            parameter_decls.append(f"{parameter_name}: {pytype}{default}")
-        ret_decls = []
-        for ret in self.returns:
-            pytype = _pytype_to_shape_fn_pytype(ret["pytype"])
-            ret_decls.append(f"{pytype}")
-        parameters = ", ".join(parameter_decls)
-        result = ", ".join(ret_decls)
-        if len(ret_decls) >= 2:
-            result = f"Tuple[{result}]"
+            default = _get_default_value(arg)
+            parameter_name = _rename_python_keyword_parameter_name(arg["name"])
+            return f"{parameter_name}: {pytype}{default}"

-        return f"def {def_name}({parameters}) -> {result}:"
+        def ret_decl_builder(arg: "SIG_ATTR_TYPE") -> str:
+            return  _pytype_to_shape_fn_pytype(arg["pytype"])
+
+        return self._get_function_signature(
+            "shape", parameter_decl_builder, ret_decl_builder)
+
+    def get_dtype_function_signature(self):
+        """Gets the Python function signature for this op's dtype function.
+
+        While this is technically debug-only output, it is useful to copy-paste
+        it from the debug dump into the library definitions, as many
+        ops have extra default arguments and stuff that are tedious to write out
+        right.
+        """
+        def parameter_decl_builder(arg: "SIG_ATTR_TYPE") -> str:
+            pytype = _pytype_to_dtype_fn_pytype(arg["pytype"])
+            default = _get_default_value(arg)
+            parameter_name = _rename_python_keyword_parameter_name(arg["name"])
+            if "Tensor" in arg["pytype"]:
+                return ", ".join([f"{parameter_name}_rank: {pytype}{default}",
+                                  f"{parameter_name}_dtype: {pytype}{default}"])
+            return f"{parameter_name}: {pytype}{default}"
+
+        def ret_decl_builder(arg: "SIG_ATTR_TYPE") -> str:
+            # The dtype function is expected to return dtypes for scalar
+            # results of type `number`. Here we handle this case because
+            # `_pytype_to_dtype_fn_pytype` will replace `number` with
+            # `Union[int, float]`.
+            if arg["pytype"] == "number":
+                return "int"
+            return _pytype_to_dtype_fn_pytype(arg["pytype"])
+
+        return self._get_function_signature(
+            "dtype", parameter_decl_builder, ret_decl_builder)
+
+    def get_decomposition_function_signature(self):
+        """Gets the Python function signature for this op's decomposition function.
+
+        While this is technically debug-only output, it is useful to copy-paste
+        it from the debug dump into the shape library definitions, as many
+        ops have extra default arguments and stuff that are tedious to write out
+        right.
+        """
+        def parameter_decl_builder(arg: "SIG_ATTR_TYPE") -> str:
+            pytype = _pytype_to_decomposition_fn_pytype(arg["pytype"])
+            default = _get_default_value(arg)
+            parameter_name = _rename_python_keyword_parameter_name(arg["name"])
+            return f"{parameter_name}: {pytype}{default}"
+
+        def ret_decl_builder(arg: "SIG_ATTR_TYPE") -> str:
+            return _pytype_to_decomposition_fn_pytype(arg["pytype"])
+
+        return self._get_function_signature(
+            "decomposition", parameter_decl_builder, ret_decl_builder)

    def __repr__(self):
        f = io.StringIO()
@ -212,6 +290,10 @@ class JitOperator:
            p(f"is_mutable = {self.is_mutable}")
            if any(ret["type"] == "Tensor" for ret in self.returns):
                p(f"shape_function_signature = {self.get_shape_function_signature()}")
+                p(f"decomposition_function_signature = {self.get_decomposition_function_signature()}")
+            if any(ret["type"] in ["Tensor", "Scalar"] for ret in self.returns):
+                p(f"dtype_function_signature = {self.get_dtype_function_signature()}")
+
            if not self.arguments:
                p("arguments = []")
            else:
@ -300,12 +382,13 @@ class Registry:
        return self.by_triple[key]


-# A List[Dict[str, _]] mapping attribute names to:
+# A Dict[str, _] mapping attribute names to:
 #   - str (e.g. {'name': 'dim'} )
 #   - int (e.g. {'N': 1} )
 #   - Dict[str, List[str]]
 #       (e.g. {'alias_info': {'before': ['alias::a'], 'after': ['alias::a']}} )
-SIGLIST_TYPE = List[Dict[str, Union[str, int, Dict[str, List[str]]]]]
+SIG_ATTR_TYPE = Dict[str, Union[str, int, Dict[str, List[str]]]]
+SIGLIST_TYPE = List[SIG_ATTR_TYPE]
 # A Dict[str, _] describing a registered op. Each field is either
 #   - bool (e.g. {'is_mutable': False} )
 #   - Tuple[str] (e.g. {'name': ('aten::size', 'int')} )
--- a/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/shape_lib_gen.py
+++ b/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/shape_lib_gen.py
--- a/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/testing_framework.py
+++ b/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/testing_framework.py
@ -0,0 +1,315 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# Also available under a BSD-style license. See LICENSE.
+
+from typing import Any, List, Iterable, Optional, Callable
+
+import torch
+from torch import Tensor
+
+# ==============================================================================
+# Shape, dtype, and decomposition function testing infrastructure.
+# ==============================================================================
+
+# We expect all functions to be adequately tested. For functions
+# implemented with upstream helpers, additional testing is usually not needed.
+# But for functions that are authored/maintained by the Torch-MLIR
+# project, we expect adequate testing.
+#
+# To do this, we provide decorators
+# - `@check_shape_function`
+# - `@check_dtype_function`
+# - `@check_decomposition_function`
+# which can be used to specify a series of operator invocations (such as "call
+# this operator with two arguments -- a first tensor of size [2, 3] and a second
+# tensor of size [3, 4]"). These tests are then run as part of this script, and
+# any mismatches from the real op's behavior will be reported.
+#
+# A typical use of the decorator might look like:
+# ```
+# @check_shape_function([
+#     Invocation(TensorOfShape(2, 3, 4)), # Basic case.
+#     Invocation(TensorOfShape(2, 3, 4), dim=0), # Test explicit `dim`.
+#     Invocation(TensorOfShape(2, 3, 4), dim=0, keepdim=True), # `keepdim`.
+#     Invocation(TensorOfShape(2, 3, 4), dim=-3), # Negative `dim`.
+#     Invocation(TensorOfShape(2, 3, 4), dim=2), # Maximum valid `dim`.
+#     ErrorInvocation(TensorOfShape(2, 3, 4), dim=-4), # `dim` out of bounds.
+#     ErrorInvocation(TensorOfShape(2, 3, 4), dim=3), # `dim` out of bounds.
+# ])
+# ```
+# Each `Invocation` takes a list of args/kwargs which will be passed to both the
+# shape function and the real op and the results compared.
+# We expect both the successful and error cases to be tested.
+#
+# The typical iteration flow is to add invocations to the list and then re-run
+# `build_tools/update_abstract_interp_lib.sh` to re-run the tests.
+
+class TensorOfShape:
+    """Symbolic placeholder for a tensor argument to an operation.
+
+    Shape functions take tensor arguments as `List[int]`, whereas the real ops
+    take them as `Tensor`, so we need a symbolic representation of a tensor
+    argument to an op in order to represent an invocation that can drive both
+    the shape function and the real op (see `Invocation`).
+
+    A plain list doesn't work, because plain lists are actually legal arguments
+    to a shape function (e.g. conv dilations), and we don't want them to receive
+    this special treatment.
+
+    This class also tracks a dtype of the tensor, since some ops require a
+    specific dtype.
+    """
+    def __init__(self, *shape: int, dtype: torch.dtype = torch.float32):
+        self.shape = list(shape)
+        self.dtype = dtype
+    def __repr__(self):
+        args_str = ", ".join(repr(x) for x in self.shape)
+        if self.dtype is torch.float32:
+            return f"TensorOfShape({args_str})"
+        else:
+            return f"TensorOfShape({args_str}, dtype={self.dtype})"
+
+def LongTensorOfShape(*args, **kwargs):
+    """Helper for indicating a TensorOfShape with integer type."""
+    return TensorOfShape(*args, **kwargs, dtype=torch.long)
+
+def NonZeroDTensorWithDtype(dtype):
+    """Helper for indicating a non-zero dim tensor with custom type."""
+    return TensorOfShape(1, dtype=dtype)
+
+def ZeroDTensorWithDtype(dtype):
+    """Helper for indicating a zero dim tensor with custom type."""
+    return TensorOfShape(dtype=dtype)
+
+def _recursively_transform_tensor_args(
+        o: Any,
+        tensor_transformer: Callable[[TensorOfShape], Any]) -> Any:
+    """Replace `TensorOfShape` with the result of `tensor_transformer`"""
+    if o is None or isinstance(o, (float, int)):
+        return o
+    if isinstance(o, TensorOfShape):
+        return tensor_transformer(o)
+    if isinstance(o, list):
+        return [_recursively_transform_tensor_args(x, tensor_transformer) for x in o]
+    if isinstance(o, tuple):
+        return tuple(_recursively_transform_tensor_args(x, tensor_transformer) for x in o)
+    raise Exception(f"Unhandled type {type(o)}")
+
+def _convert_to_dtype_function_args(arguments: Iterable[Any]) -> List[Any]:
+    """Converts an Invocation argument to a dtype function argument.
+
+    TensorOfShape is replaced with two ints representing the rank
+    and dtype of the tensor, respectively.
+    """
+    def contains_tensor(o: Any) -> bool:
+        if o is None or isinstance(o, (float, int)):
+            return False
+        if isinstance(o, TensorOfShape):
+            return True
+        if isinstance(o, (list, tuple)):
+            for elem in o:
+                if contains_tensor(elem):
+                    return True
+            return False
+        raise Exception(f"Unhandled type {type(o)}")
+
+    result = []
+    for arg in arguments:
+        if contains_tensor(arg):
+            rank_arg = _recursively_transform_tensor_args(
+                arg, lambda x: len(x.shape))
+            dtype_arg = _recursively_transform_tensor_args(
+                arg, lambda x: x.dtype)
+            result += [rank_arg, dtype_arg]
+        else:
+            result.append(arg)
+    return result
+
+class Invocation:
+    """Representation of a single op invocation (i.e. list of args to the op).
+
+    This class is used to represent a single invocation of an op in a way that
+    we can use to both invoke the abstract interpretation function and invoke
+    the actual op, which have slightly different signatures.
+
+    Specifically, this class has special knowledge of `TensorOfShape` and
+    translates it appropriately to either a tensor (for the real op), a
+    `List[int]` for the shape function, and two `int`s representing
+    the tensor rank and dtype in the case of a dtype function.
+
+    This class also tracks whether the invocation is expected to raise an
+    exception for greater precision when interpreting errors raised during
+    testing.
+    """
+    def __init__(self, *args: Any, **kwargs: Any):
+        self.args = list(args)
+        # We assume kwargs don't contain tensors, so they don't need any
+        # special handling.
+        self.kwargs = kwargs
+
+    def is_expected_to_raise_exception(self) -> bool:
+        """Returns true if the invocation is expected to raise an exception.
+
+        The subclass ErrorInvocation overrides this to indicate an Invocation
+        that is expected to raise an exception.
+        """
+        return False
+
+    def to_shape_function_args(self):
+        """Gets positional arguments appropriate for a shape function."""
+        # Make a copy of the size list, since a shape function might
+        # modify it in-place. In the compiler, the lowering always
+        # produces a new list via a fresh invocation of `AtenSizeOp`,
+        # which allocates a new, unaliased list. So in-place mutations
+        # are ok since they make it a bit easier to write some shape
+        # functions.
+        tensor_transformer = lambda o: list(o.shape)
+        return _recursively_transform_tensor_args(
+            self.args, tensor_transformer)
+
+    def to_dtype_function_args(self):
+        """Gets positional arguments appropriate for a dtype function."""
+        return _convert_to_dtype_function_args(self.args)
+
+    def to_real_op_args(self):
+        """Gets positional arguments appropriate for the real op."""
+        tensor_transformer = lambda o: torch.ones(o.shape, dtype=o.dtype)
+        return _recursively_transform_tensor_args(self.args, tensor_transformer)
+
+    def __repr__(self) -> str:
+        args_str = ", ".join(repr(x) for x in self.args)
+        kwargs_str = ""
+        if self.kwargs:
+            kwargs_str = ", " + ", ".join(f"{k}={v}" for k, v in self.kwargs.items())
+        return f"Invocation({args_str}{kwargs_str})"
+
+class ErrorInvocation(Invocation):
+    """An Invocation that raises an exception.
+
+    Explicitly knowing that an invocation is expected to raise an exception
+    avoids certain failure modes of the test infrastructure where a bug
+    slips through when both the abstract interpretation function and the real
+    op raise exceptions due to independent bugs (that cancel each other out and
+    spurioiusly make the two appear to "agree" that an exception needs to be
+    raised).
+    """
+    def is_expected_to_raise_exception(self) -> bool:
+        return True
+
+def _normalize_multiple_results_to_list(t: Any):
+    """Returns a flat list of results.
+
+    This normalizes the fact that Python represents multiple returns with a
+    tuple, but single returns as a single value. We just want a list with
+    N elements for N results.
+    """
+    if isinstance(t, tuple):
+        return list(t)
+    # Shape functions return List[int] instead of tensors.
+    if isinstance(t, (Tensor, list, torch.dtype, int, float)):
+        return [t]
+    raise ValueError(f"Unexpected type {type(t)}")
+
+def _report(f, invocation: Invocation, error_message: str):
+    fn_type =  f.__name__.split("〡")[-1]
+    raise ValueError(f"For {fn_type} function {f.__name__!r} with invocation {invocation}: {error_message}")
+
+def _get_fn_and_golden_results(f, invocation: List[Invocation]):
+    """Run the invocation on the library function and torch op.
+
+    If no unexpected errors are detected, returns a tuple wth the first
+    element being the results from the library function and the second
+    element being the results from the torch op. The results will be `None`
+    if the library function and torch op expectedly result in errors.
+    """
+    fn_name_without_fn_type, fn_type = f.__name__.split("〡")
+    fn_name_parts = fn_name_without_fn_type.split("〇")
+    ns, unqual = fn_name_parts[:2]
+    overload = "default" if len(fn_name_parts) != 3 else fn_name_parts[-1]
+    op = getattr(getattr(getattr(torch.ops, ns), unqual), overload)
+    fn_error, op_error, fn_results, golden_results = None, None, None, None
+    try:
+        fn_results = _normalize_multiple_results_to_list(f(
+            *(getattr(invocation, f"to_{fn_type}_function_args")()),
+            **invocation.kwargs))
+    except Exception as e:
+        fn_error = f"{e}"
+    try:
+        golden_results = _normalize_multiple_results_to_list(op(
+            *invocation.to_real_op_args(),
+            **invocation.kwargs))
+    except Exception as e:
+        op_error = f"{e}"
+
+    # Check for error behavior.
+    if invocation.is_expected_to_raise_exception():
+        if fn_error is None and op_error is None:
+            _report(f, invocation, f"Expected to raise an exception, but neither {fn_type} function n or op raised an exception")
+        if fn_error is None:
+            _report(f, invocation, f"Op raised error {op_error!r}, but shape function did not.")
+        if op_error is None:
+            _report(f, invocation, f"{fn_type} function raised error {fn_error!r}, but op did not.")
+    else:
+        if fn_error is not None and op_error is not None:
+            _report(f, invocation, f"Both {fn_type} function and op raised errors, but were not expected to. {fn_type} function raised error {fn_error!r} and op raised error {op_error!r}.")
+        if fn_error is not None:
+            _report(f, invocation, f"{fn_type} function raised error {fn_error!r} but op did not raise any error.")
+        if op_error is not None:
+            _report(f, invocation, f"Op raised error {op_error!r} but {fn_type} function did not raise any error.")
+
+    return fn_results, golden_results
+
+def check_shape_function(invocations: List[Invocation]):
+    """Decorator that automatically tests a shape function.
+
+    The shape function, which is expected to be named systematically with
+    `〇` instead of `.`, is tested against the corresponding op in
+    `torch.ops.*` function using the given invocations.
+    """
+    def decorator(f):
+        for invocation in invocations:
+            result_shapes, golden_results = _get_fn_and_golden_results(f, invocation)
+            if invocation.is_expected_to_raise_exception():
+                continue
+            # Check for matching results.
+            if len(result_shapes) != len(golden_results):
+                _report(f, invocation, f"Expected {len(golden_results)} result shapes, got {len(result_shapes)}")
+            for result_shape, golden_result in zip(result_shapes, golden_results):
+                result_rank = len(result_shape)
+                golden_rank = len(golden_result.shape)
+                if result_rank != golden_rank:
+                    _report(f, invocation, f"Expected result rank {golden_rank}, got {result_rank}")
+                for dimension_size, golden_dimension_size in zip(result_shape, golden_result.shape):
+                    if dimension_size != golden_dimension_size:
+                        _report(f, invocation, f"Expected result shape {golden_result.shape}, got {result_shape}")
+        return f
+    return decorator
+
+def check_dtype_function(invocations: List[Invocation]):
+    """Decorator that automatically tests a dtype function.
+
+    The dtype function, which is expected to be named systematically with
+    `〇` instead of `.`, is tested against the corresponding op in
+    `torch.ops.*` function using the given invocations.
+    """
+    def decorator(f):
+        for invocation in invocations:
+            result_dtypes, golden_results = _get_fn_and_golden_results(f, invocation)
+            if invocation.is_expected_to_raise_exception():
+                continue
+
+            if len(result_dtypes) != len(golden_results):
+                _report(f, invocation, f"Expected {len(golden_results)} result dtypes, got {len(result_dtypes)}")
+            for result_dtype, golden_result in zip(result_dtypes, golden_results):
+                if isinstance(golden_result, torch.Tensor):
+                    golden_dtype = golden_result.dtype
+                elif isinstance(golden_result, (int, float)):
+                    # Turn Python type to PyTorch dtype
+                    golden_dtype = torch.tensor([]).to(type(golden_result)).dtype
+                else:
+                    raise ValueError(f"Unhandled return type {type(golden_result)}")
+                if result_dtype != golden_dtype:
+                    _report(f, invocation, f"Expected result dtype {golden_dtype}, got {result_dtype}")
+        return f
+    return decorator
--- a/python/torch_mlir/dialects/torch/importer/jit_ir/csrc/node_importer.cpp
+++ b/python/torch_mlir/dialects/torch/importer/jit_ir/csrc/node_importer.cpp
@ -327,13 +327,26 @@ void NodeImporter::importNode(Node *node, MlirBlock appendToBlock,
    auto expectedTypes = c10::fmap(calleeEntryBlock->inputs(), [&](Value *v) {
      return getMlirTypeFromTorchType(loc, v->type(), importOptions);
    });
-    MlirOperation operation = createMlirOperationAtEnd(
-        appendToBlock, "func.call_indirect", loc,
-        getMlirTypesFromValues(loc, node->outputs(), importOptions),
-        lookupMappedValue(node->input(0)),
-        adjustStaticInformationForValues(
-            appendToBlock, loc, lookupMappedValues(node->inputs().slice(1)),
-            expectedTypes, /*userAllowsRefinement=*/false));
+    std::string functionName = node->input(0)->node()->s(c10::attr::name);
+    std::vector<MlirType> resultTypes =
+        getMlirTypesFromValues(loc, node->outputs(), importOptions);
+    std::vector<MlirValue> adjustedFuncArgs = adjustStaticInformationForValues(
+        appendToBlock, loc, lookupMappedValues(node->inputs().slice(1)),
+        expectedTypes, /*userAllowsRefinement=*/false);
+    MlirOperation operation;
+    // `__torch_mlir_internal_promote_dtypes` is a special python function that
+    // users can use in dtype refinement function definitions to get the
+    // promoted result dtype for a PyTorch computation. Here we turn the call to
+    // this function to the torch dialect equivalent op `torch.promote_dtypes`.
+    if (functionName == "__torch_mlir_internal_promote_dtypes") {
+      operation =
+          createMlirOperationAtEnd(appendToBlock, "torch.promote_dtypes", loc,
+                                   resultTypes, adjustedFuncArgs);
+    } else {
+      operation = createMlirOperationAtEnd(
+          appendToBlock, "func.call_indirect", loc, resultTypes,
+          lookupMappedValue(node->input(0)), adjustedFuncArgs);
+    }
    mapResults(node, operation);
    return;
  }
--- a/test/Dialect/Torch/drop-abstract-interp-calculations.mlir
+++ b/test/Dialect/Torch/drop-abstract-interp-calculations.mlir
@ -1,11 +1,11 @@
-// RUN: torch-mlir-opt -torch-drop-shape-calculations -split-input-file %s | FileCheck %s
+// RUN: torch-mlir-opt -torch-drop-abstract-interp-calculations -split-input-file %s | FileCheck %s

-// CHECK-LABEL:   func.func @basic(
+// CHECK-LABEL:   func.func @basic$shape_calculate(
 // CHECK-SAME:                 %[[ARG:.*]]: !torch.vtensor<[2,?],unk>) -> !torch.vtensor {
 // CHECK:           %[[TANH:.*]] = torch.aten.tanh %[[ARG]] : !torch.vtensor<[2,?],unk> -> !torch.vtensor<[2,?],unk>
 // CHECK:           %[[ERASED:.*]] = torch.tensor_static_info_cast %[[TANH]] : !torch.vtensor<[2,?],unk> to !torch.vtensor
 // CHECK:           return %[[ERASED]] : !torch.vtensor
-func.func @basic(%arg0: !torch.vtensor<[2,?],unk>) -> !torch.vtensor {
+func.func @basic$shape_calculate(%arg0: !torch.vtensor<[2,?],unk>) -> !torch.vtensor {
  %int2 = torch.constant.int 2
  %int1 = torch.constant.int 1
  %0 = torch.shape.calculate  {
@ -22,6 +22,26 @@ func.func @basic(%arg0: !torch.vtensor<[2,?],unk>) -> !torch.vtensor {

 // -----

+// CHECK-LABEL:   func.func @basic$dtype_calculate(
+// CHECK-SAME:                     %[[ARG:.*]]: !torch.vtensor<*,f32>) -> !torch.vtensor {
+// CHECK:           %[[INT_6:.*]] = torch.constant.int 6
+// CHECK:           %[[TANH:.*]] = torch.aten.tanh %[[ARG]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
+// CHECK:           %[[CAST:.*]] = torch.tensor_static_info_cast %[[TANH]] : !torch.vtensor<*,f32> to !torch.vtensor
+// CHECK:           return %[[CAST]] : !torch.vtensor
+func.func @basic$dtype_calculate(%arg0: !torch.vtensor<*,f32>) -> !torch.vtensor {
+  %int6 = torch.constant.int 6
+  %0 = torch.dtype.calculate {
+    %2 = torch.aten.tanh %arg0 : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
+    torch.dtype.calculate.yield %2 : !torch.vtensor<*,f32>
+  } dtypes {
+    torch.dtype.calculate.yield.dtypes %int6 : !torch.int
+  } : !torch.vtensor<*,f32>
+  %1 = torch.tensor_static_info_cast %0 : !torch.vtensor<*,f32> to !torch.vtensor
+  return %1 : !torch.vtensor
+}
+
+// -----
+
 // CHECK-LABEL:   func.func @shape_calc_in_loop(
 // CHECK-SAME:                 %[[ARG:.*]]: !torch.vtensor<[2,?],unk>) -> !torch.vtensor<[2,?],unk> {
 func.func @shape_calc_in_loop(%arg: !torch.vtensor<[2,?],unk>) -> !torch.vtensor<[2,?],unk> {
--- a/test/Dialect/Torch/ops.mlir
+++ b/test/Dialect/Torch/ops.mlir
@ -142,6 +142,22 @@ func.func @shape_calculations(%arg0: !torch.vtensor) -> !torch.vtensor {
  return %0 : !torch.vtensor
 }

+func.func @dtype_calculations(%arg0: !torch.vtensor) -> !torch.vtensor {
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.tanh %arg0 : !torch.vtensor -> !torch.vtensor
+    torch.dtype.calculate.yield %1 : !torch.vtensor
+  } dtypes {
+    %2 = torch.prim.dtype %arg0 : !torch.vtensor -> !torch.int
+    torch.dtype.calculate.yield.dtypes %2 : !torch.int
+  } : !torch.vtensor
+  return %0 : !torch.vtensor
+}
+
+func.func @promote_dtypes(%ranks: !torch.list<optional<int>>, %dtypes: !torch.list<int>) -> !torch.int {
+  %0 = torch.promote_dtypes %ranks, %dtypes : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int
+  return %0 : !torch.int
+}
+
 func.func @number_type_subtypes(%arg0: !torch.tensor, %arg1: !torch.list<int>, %arg2: !torch.union<float, int>) {
  %0 = torch.aten.constant_pad_nd %arg0, %arg1, %arg2 : !torch.tensor, !torch.list<int>, !torch.union<float, int> -> !torch.tensor
  return
--- a/test/Dialect/Torch/promote-types.mlir
+++ b/test/Dialect/Torch/promote-types.mlir
@ -1,109 +0,0 @@
-// RUN: torch-mlir-opt -torch-refine-types -split-input-file %s | FileCheck %s
-
-// -----
-
-// CHECK-LABEL:   func @tensor_tensor$same_category_different_width(
-// CHECK-SAME:                                                      %[[VAL_0:.*]]: !torch.vtensor<[1],f32>,
-// CHECK-SAME:                                                      %[[VAL_1:.*]]: !torch.vtensor<[1],f64>,
-// CHECK-SAME:                                                      %[[VAL_2:.*]]: !torch.float) {
-// CHECK:           %[[VAL_3:.*]] = torch.aten.add.Tensor %[[VAL_0]], %[[VAL_1]], %[[VAL_2]] :
-// CHECK-SAME:         !torch.vtensor<[1],f32>, !torch.vtensor<[1],f64>, !torch.float -> !torch.vtensor<[1],f64>
-// CHECK:           return
-func.func @tensor_tensor$same_category_different_width(%t0: !torch.vtensor<[1],f32>,
-                                            %t1: !torch.vtensor<[1],f64>,
-                                            %alpha: !torch.float) {
-  %1 = torch.aten.add.Tensor %t0, %t1, %alpha: !torch.vtensor<[1],f32>, !torch.vtensor<[1],f64>, !torch.float -> !torch.vtensor<[1],unk>
-  return
-}
-
-// -----
-
-// CHECK-LABEL:   func @tensor_tensor$different_category(
-// CHECK-SAME:                                           %[[VAL_0:.*]]: !torch.vtensor<[1],si32>,
-// CHECK-SAME:                                           %[[VAL_1:.*]]: !torch.vtensor<[1],f64>,
-// CHECK-SAME:                                           %[[VAL_2:.*]]: !torch.float) {
-// CHECK:           %[[VAL_3:.*]] = torch.aten.add.Tensor %[[VAL_0]], %[[VAL_1]], %[[VAL_2]] :
-// CHECK-SAME:         !torch.vtensor<[1],si32>, !torch.vtensor<[1],f64>, !torch.float -> !torch.vtensor<[1],f64>
-// CHECK:           return
-func.func @tensor_tensor$different_category(%t0: !torch.vtensor<[1],si32>,
-                                 %t1: !torch.vtensor<[1],f64>,
-                                 %alpha: !torch.float) {
-  %1 = torch.aten.add.Tensor %t0, %t1, %alpha: !torch.vtensor<[1],si32>, !torch.vtensor<[1],f64>, !torch.float -> !torch.vtensor<[1],unk>
-  return
-}
-
-// -----
-
-// CHECK-LABEL:   func @tensor_tensor$same_category_zero_rank_wider(
-// CHECK-SAME:                                                      %[[VAL_0:.*]]: !torch.vtensor<[1],f32>,
-// CHECK-SAME:                                                      %[[VAL_1:.*]]: !torch.vtensor<[],f64>,
-// CHECK-SAME:                                                      %[[VAL_2:.*]]: !torch.int) {
-// CHECK:           %[[VAL_3:.*]] = torch.aten.add.Tensor %[[VAL_0]], %[[VAL_1]], %[[VAL_2]] :
-// CHECK-SAME:         !torch.vtensor<[1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[1],f32>
-// CHECK:           return
-func.func @tensor_tensor$same_category_zero_rank_wider(
-                                                  %t0: !torch.vtensor<[1],f32>,
-                                                  %t1: !torch.vtensor<[],f64>,
-                                                  %alpha: !torch.int) {
-  %1 = torch.aten.add.Tensor %t0, %t1, %alpha: !torch.vtensor<[1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[1],unk>
-  return
-}
-
-// -----
-
-// CHECK-LABEL:   func @tensor_tensor$zero_rank_higher_category(
-// CHECK-SAME:                                                  %[[VAL_0:.*]]: !torch.vtensor<[1],si64>,
-// CHECK-SAME:                                                  %[[VAL_1:.*]]: !torch.vtensor<[],f32>,
-// CHECK-SAME:                                                  %[[VAL_2:.*]]: !torch.int) {
-// CHECK:           %[[VAL_3:.*]] = torch.aten.add.Tensor %[[VAL_0]], %[[VAL_1]], %[[VAL_2]] :
-// CHECK-SAME:         !torch.vtensor<[1],si64>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1],f32>
-// CHECK:           return
-func.func @tensor_tensor$zero_rank_higher_category(%t0: !torch.vtensor<[1],si64>,
-                                        %t1: !torch.vtensor<[],f32>,
-                                        %alpha: !torch.int) {
-  %1 = torch.aten.add.Tensor %t0, %t1, %alpha: !torch.vtensor<[1],si64>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1],unk>
-  return
-}
-
-// -----
-
-// CHECK-LABEL:   func @tensor_tensor$alpha_wider_no_contribution(
-// CHECK-SAME:                                    %[[VAL_0:.*]]: !torch.vtensor<[1],f32>, %[[VAL_1:.*]]: !torch.vtensor<[1],f32>,
-// CHECK-SAME:                                    %[[VAL_2:.*]]: !torch.float) {
-// CHECK:           %[[VAL_3:.*]] = torch.aten.add.Tensor %[[VAL_0]], %[[VAL_1]], %[[VAL_2]] :
-// CHECK-SAME:        !torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>, !torch.float -> !torch.vtensor<[1],f32>
-// CHECK:           return
-func.func @tensor_tensor$alpha_wider_no_contribution(%t0: !torch.vtensor<[1],f32>,
-                               %t1: !torch.vtensor<[1],f32>,
-                               %alpha: !torch.float) {
-  %1 = torch.aten.add.Tensor %t0, %t1, %alpha: !torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>, !torch.float -> !torch.vtensor<[1],unk>
-  return
-}
-
-// -----
-
-// CHECK-LABEL:   func @tensor_scalar$scalar_higher_category(
-// CHECK-SAME:                                               %[[VAL_0:.*]]: !torch.vtensor<[1],si64>,
-// CHECK-SAME:                                               %[[VAL_1:.*]]: !torch.float,
-// CHECK-SAME:                                               %[[VAL_2:.*]]: !torch.int) {
-// CHECK:           %[[VAL_3:.*]] = torch.aten.add.Scalar %[[VAL_0]], %[[VAL_1]], %[[VAL_2]] :
-// CHECK-SAME:        !torch.vtensor<[1],si64>, !torch.float, !torch.int -> !torch.vtensor<[1],f32>
-// CHECK:           return
-func.func @tensor_scalar$scalar_higher_category(%t0: !torch.vtensor<[1],si64>, %scalar: !torch.float, %alpha: !torch.int) {
-  %1 = torch.aten.add.Scalar %t0, %scalar, %alpha: !torch.vtensor<[1], si64>, !torch.float, !torch.int -> !torch.vtensor<[1],unk>
-  return
-}
-
-// -----
-
-// CHECK-LABEL:   func @tensor_scalar$scalar_same_category_wider(
-// CHECK-SAME:                                                   %[[VAL_0:.*]]: !torch.vtensor<[1],si32>,
-// CHECK-SAME:                                                   %[[VAL_1:.*]]: !torch.int,
-// CHECK-SAME:                                                   %[[VAL_2:.*]]: !torch.int) {
-// CHECK:           %[[VAL_3:.*]] = torch.aten.add.Scalar %[[VAL_0]], %[[VAL_1]], %[[VAL_2]] :
-// CHECK-SAME:        !torch.vtensor<[1],si32>, !torch.int, !torch.int -> !torch.vtensor<[1],si32>
-// CHECK:           return
-func.func @tensor_scalar$scalar_same_category_wider(%t0: !torch.vtensor<[1],si32>, %scalar: !torch.int, %alpha: !torch.int) {
-  %1 = torch.aten.add.Scalar %t0, %scalar, %alpha: !torch.vtensor<[1], si32>, !torch.int, !torch.int -> !torch.vtensor<[1],unk>
-  return
-}
--- a/test/Dialect/Torch/refine-types-branch.mlir
+++ b/test/Dialect/Torch/refine-types-branch.mlir
@ -121,14 +121,14 @@ func.func @prim.loop$region_arg_to_internal(%none: !torch.none) -> !torch.option
 // -----

 // CHECK-LABEL:   func.func @f
-// CHECK: %[[ATEN:.*]] = torch.aten.tanh %{{.*}} : !torch.vtensor -> !torch.vtensor<*,f32>
+// CHECK: %[[ATEN:.*]] = torch.aten.cos %{{.*}} : !torch.vtensor -> !torch.vtensor<*,f32>
 // CHECK: %[[CAST:.*]] = torch.tensor_static_info_cast %[[ATEN]] : !torch.vtensor<*,f32> to !torch.vtensor
 // CHECK: return %[[CAST]] : !torch.vtensor
 func.func @f(%arg0: !torch.vtensor<*,f32>) -> !torch.vtensor {
  %cast = torch.tensor_static_info_cast %arg0 : !torch.vtensor<*,f32> to !torch.vtensor
  cf.br ^bb1(%cast: !torch.vtensor)
 ^bb1(%arg1: !torch.vtensor):
-  %1 = torch.aten.tanh %arg1 : !torch.vtensor -> !torch.vtensor
+  %1 = torch.aten.cos %arg1 : !torch.vtensor -> !torch.vtensor
  return %1 : !torch.vtensor
 }

@ -136,11 +136,11 @@ func.func @f(%arg0: !torch.vtensor<*,f32>) -> !torch.vtensor {

 // CHECK-LABEL:   func.func @f
 // CHECK: func.func private @callee
-// CHECK-NEXT: torch.aten.tanh %{{.*}} : !torch.vtensor -> !torch.vtensor<*,f32>
+// CHECK-NEXT: torch.aten.cos %{{.*}} : !torch.vtensor -> !torch.vtensor<*,f32>
 func.func @f() {
  builtin.module {
    func.func private @callee(%arg0: !torch.vtensor) {
-      %1 = torch.aten.tanh %arg0 : !torch.vtensor -> !torch.vtensor
+      %1 = torch.aten.cos %arg0 : !torch.vtensor -> !torch.vtensor
      return
    }
    func.func @caller(%arg0: !torch.vtensor<*,f32>) {
--- a/test/Dialect/Torch/refine-types.mlir
+++ b/test/Dialect/Torch/refine-types.mlir
@ -9,36 +9,36 @@
 // -----
 // CHECK-LABEL:   func.func @basic(
 // CHECK-SAME:                      %[[ARG0:.*]]: !torch.vtensor<*,f32>) -> !torch.vtensor {
-// CHECK:           %[[TANH:.*]] = torch.aten.tanh %[[ARG0]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
-// CHECK:           %[[RESULT:.*]] = torch.tensor_static_info_cast %[[TANH]] : !torch.vtensor<*,f32> to !torch.vtensor
+// CHECK:           %[[COS:.*]] = torch.aten.cos %[[ARG0]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
+// CHECK:           %[[RESULT:.*]] = torch.tensor_static_info_cast %[[COS]] : !torch.vtensor<*,f32> to !torch.vtensor
 // CHECK:           return %[[RESULT]] : !torch.vtensor
 func.func @basic(%arg0: !torch.vtensor<*,f32>) -> !torch.vtensor {
-  %1 = torch.aten.tanh %arg0 : !torch.vtensor<*,f32> -> !torch.vtensor
+  %1 = torch.aten.cos %arg0 : !torch.vtensor<*,f32> -> !torch.vtensor
  return %1 : !torch.vtensor
 }

 // -----
 // CHECK-LABEL:   func.func @keep_existing_shape_information(
 // CHECK-SAME:                                          %[[ARG0:.*]]: !torch.vtensor<*,f32>) -> !torch.vtensor<[2],f32> {
-// CHECK:           %[[TANH:.*]] = torch.aten.tanh %[[ARG0]] : !torch.vtensor<*,f32> -> !torch.vtensor<[2],f32>
-// CHECK:           return %[[TANH]] : !torch.vtensor<[2],f32>
+// CHECK:           %[[COS:.*]] = torch.aten.cos %[[ARG0]] : !torch.vtensor<*,f32> -> !torch.vtensor<[2],f32>
+// CHECK:           return %[[COS]] : !torch.vtensor<[2],f32>
 func.func @keep_existing_shape_information(%arg0: !torch.vtensor<*,f32>) -> !torch.vtensor<[2],f32> {
-  %1 = torch.aten.tanh %arg0 : !torch.vtensor<*,f32> -> !torch.vtensor<[2], f32>
+  %1 = torch.aten.cos %arg0 : !torch.vtensor<*,f32> -> !torch.vtensor<[2], f32>
  return %1 : !torch.vtensor<[2],f32>
 }

 // -----
 // CHECK-LABEL:   func.func @propagate_through_multiple_ops(
 // CHECK-SAME:                                         %[[ARG0:.*]]: !torch.vtensor<*,f32>) -> !torch.vtensor {
-// CHECK:           %[[TANH0:.*]] = torch.aten.tanh %[[ARG0]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
-// CHECK:           %[[TANH1:.*]] = torch.aten.tanh %[[TANH0]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
-// CHECK:           %[[TANH2:.*]] = torch.aten.tanh %[[TANH1]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
-// CHECK:           %[[TANH3:.*]] = torch.tensor_static_info_cast %[[TANH2]] : !torch.vtensor<*,f32> to !torch.vtensor
-// CHECK:           return %[[TANH3]] : !torch.vtensor
+// CHECK:           %[[COS0:.*]] = torch.aten.cos %[[ARG0]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
+// CHECK:           %[[COS1:.*]] = torch.aten.cos %[[COS0]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
+// CHECK:           %[[COS2:.*]] = torch.aten.cos %[[COS1]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
+// CHECK:           %[[COS3:.*]] = torch.tensor_static_info_cast %[[COS2]] : !torch.vtensor<*,f32> to !torch.vtensor
+// CHECK:           return %[[COS3]] : !torch.vtensor
 func.func @propagate_through_multiple_ops(%arg0: !torch.vtensor<*,f32>) -> !torch.vtensor {
-  %1 = torch.aten.tanh %arg0 : !torch.vtensor<*,f32> -> !torch.vtensor
-  %2 = torch.aten.tanh %1 : !torch.vtensor -> !torch.vtensor
-  %3 = torch.aten.tanh %2 : !torch.vtensor -> !torch.vtensor
+  %1 = torch.aten.cos %arg0 : !torch.vtensor<*,f32> -> !torch.vtensor
+  %2 = torch.aten.cos %1 : !torch.vtensor -> !torch.vtensor
+  %3 = torch.aten.cos %2 : !torch.vtensor -> !torch.vtensor
  return %3 : !torch.vtensor
 }

@ -47,99 +47,18 @@ func.func @propagate_through_multiple_ops(%arg0: !torch.vtensor<*,f32>) -> !torc
 // refinement.
 // CHECK-LABEL:   func.func @mixed_allowing_not_allowing_type_refinement(
 // CHECK-SAME:                                                      %[[ARG0:.*]]: !torch.vtensor<*,f32>) -> (!torch.vtensor, !torch.vtensor) {
-// CHECK:           %[[TANH0:.*]] = torch.aten.tanh %[[ARG0]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
-// CHECK:           %[[ERASED:.*]] = torch.tensor_static_info_cast %[[TANH0]] : !torch.vtensor<*,f32> to !torch.vtensor
-// CHECK:           %[[TANH1:.*]] = torch.aten.tanh %[[TANH0]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
+// CHECK:           %[[COS0:.*]] = torch.aten.cos %[[ARG0]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
+// CHECK:           %[[ERASED:.*]] = torch.tensor_static_info_cast %[[COS0]] : !torch.vtensor<*,f32> to !torch.vtensor
+// CHECK:           %[[COS1:.*]] = torch.aten.cos %[[COS0]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
 // CHECK:           return %[[ERASED]], %[[ERASED]] : !torch.vtensor, !torch.vtensor
 func.func @mixed_allowing_not_allowing_type_refinement(%arg0: !torch.vtensor<*,f32>) -> (!torch.vtensor, !torch.vtensor) {
-  %1 = torch.aten.tanh %arg0 : !torch.vtensor<*,f32> -> !torch.vtensor
-  %3 = torch.aten.tanh %1 : !torch.vtensor -> !torch.vtensor
+  %1 = torch.aten.cos %arg0 : !torch.vtensor<*,f32> -> !torch.vtensor
+  %3 = torch.aten.cos %1 : !torch.vtensor -> !torch.vtensor
  return %1, %1 : !torch.vtensor, !torch.vtensor
 }

 // -----
-// CHECK-LABEL:   func.func @type_promotion$same_category_different_width(
-// CHECK-SAME:                                                       %[[ARG0:.*]]: !torch.vtensor<[?],si32>,
-// CHECK-SAME:                                                       %[[ARG1:.*]]: !torch.vtensor<[?],si64>) -> !torch.vtensor<[?],unk> {
-// CHECK:           %[[ALPHA:.*]] = torch.constant.int 3
-// CHECK:           %[[ADD:.*]] = torch.aten.add.Tensor %[[ARG0]], %[[ARG1]], %[[ALPHA]] : !torch.vtensor<[?],si32>, !torch.vtensor<[?],si64>, !torch.int -> !torch.vtensor<[?],si64>
-// CHECK:           %[[RESULT:.*]] = torch.tensor_static_info_cast %[[ADD]] : !torch.vtensor<[?],si64> to !torch.vtensor<[?],unk>
-// CHECK:           return %[[RESULT]] : !torch.vtensor<[?],unk>
-func.func @type_promotion$same_category_different_width(%arg0: !torch.vtensor<[?],si32>, %arg1: !torch.vtensor<[?],si64>) -> !torch.vtensor<[?],unk> {
-  %int3 = torch.constant.int 3
-  %0 = torch.aten.add.Tensor %arg0, %arg1, %int3 : !torch.vtensor<[?],si32>, !torch.vtensor<[?],si64>, !torch.int -> !torch.vtensor<[?],unk>
-  return %0 : !torch.vtensor<[?],unk>
-}

-// -----
-// CHECK-LABEL:   func.func @type_promotion$different_category(
-// CHECK-SAME:                                            %[[ARG0:.*]]: !torch.vtensor<[?],si64>,
-// CHECK-SAME:                                            %[[ARG1:.*]]: !torch.vtensor<[?],f32>) -> !torch.vtensor<[?],unk> {
-// CHECK:           %[[ALPHA:.*]] = torch.constant.int 3
-// CHECK:           %[[ADD:.*]] = torch.aten.add.Tensor %[[ARG0]], %[[ARG1]], %[[ALPHA]] : !torch.vtensor<[?],si64>, !torch.vtensor<[?],f32>, !torch.int -> !torch.vtensor<[?],f32>
-// CHECK:           %[[RESULT:.*]] = torch.tensor_static_info_cast %[[ADD]] : !torch.vtensor<[?],f32> to !torch.vtensor<[?],unk>
-// CHECK:           return %[[RESULT]] : !torch.vtensor<[?],unk>
-func.func @type_promotion$different_category(%arg0: !torch.vtensor<[?],si64>, %arg1: !torch.vtensor<[?],f32>) -> !torch.vtensor<[?],unk> {
-  %int3 = torch.constant.int 3
-  %0 = torch.aten.add.Tensor %arg0, %arg1, %int3 : !torch.vtensor<[?],si64>, !torch.vtensor<[?],f32>, !torch.int -> !torch.vtensor<[?],unk>
-  return %0 : !torch.vtensor<[?],unk>
-}
-
-// -----
-// CHECK-LABEL:   func.func @type_promotion$same_category_zero_rank_wider(
-// CHECK-SAME:                                                       %[[ARG0:.*]]: !torch.vtensor<[?],f32>,
-// CHECK-SAME:                                                       %[[ARG1:.*]]: !torch.vtensor<[],f64>) -> !torch.vtensor<[?],unk> {
-// CHECK:           %[[ALPHA:.*]] = torch.constant.float 2.300000e+00
-// CHECK:           %[[ADD:.*]] = torch.aten.add.Tensor %[[ARG0]], %[[ARG1]], %[[ALPHA]] : !torch.vtensor<[?],f32>, !torch.vtensor<[],f64>, !torch.float -> !torch.vtensor<[?],f32>
-// CHECK:           %[[RESULT:.*]] = torch.tensor_static_info_cast %[[ADD]] : !torch.vtensor<[?],f32> to !torch.vtensor<[?],unk>
-// CHECK:           return %[[RESULT]] : !torch.vtensor<[?],unk>
-func.func @type_promotion$same_category_zero_rank_wider(%arg0: !torch.vtensor<[?],f32>, %arg1: !torch.vtensor<[],f64>) -> !torch.vtensor<[?],unk> {
-  %float2.300000e00 = torch.constant.float 2.300000e+00
-  %0 = torch.aten.add.Tensor %arg0, %arg1, %float2.300000e00 : !torch.vtensor<[?],f32>, !torch.vtensor<[],f64>, !torch.float -> !torch.vtensor<[?],unk>
-  return %0 : !torch.vtensor<[?],unk>
-}
-
-// -----
-// CHECK-LABEL:   func.func @type_promotion$zero_rank_higher_category(
-// CHECK-SAME:                                                   %[[ARG0:.*]]: !torch.vtensor<[?],si64>,
-// CHECK-SAME:                                                   %[[ARG1:.*]]: !torch.vtensor<[],f32>) -> !torch.vtensor<[?],unk> {
-// CHECK:           %[[ALPHA:.*]] = torch.constant.int 2
-// CHECK:           %[[ADD:.*]] = torch.aten.add.Tensor %[[ARG0]], %[[ARG1]], %[[ALPHA]] : !torch.vtensor<[?],si64>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[?],f32>
-// CHECK:           %[[RESULT:.*]] = torch.tensor_static_info_cast %[[ADD]] : !torch.vtensor<[?],f32> to !torch.vtensor<[?],unk>
-// CHECK:           return %[[RESULT]] : !torch.vtensor<[?],unk>
-func.func @type_promotion$zero_rank_higher_category(%arg0: !torch.vtensor<[?],si64>, %arg1: !torch.vtensor<[],f32>) -> !torch.vtensor<[?],unk> {
-  %int2 = torch.constant.int 2
-  %0 = torch.aten.add.Tensor %arg0, %arg1, %int2 : !torch.vtensor<[?],si64>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[?],unk>
-  return %0 : !torch.vtensor<[?],unk>
-}
-
-// -----
-// CHECK-LABEL:   func.func @type_promotion$alpha_wider(
-// CHECK-SAME:                                     %[[ARG0:.*]]: !torch.vtensor<[?],f32>,
-// CHECK-SAME:                                     %[[ARG1:.*]]: !torch.vtensor<[],f32>) -> !torch.vtensor<[?],unk> {
-// CHECK:           %[[ALPHA:.*]] = torch.constant.float 2.300000e+00
-// CHECK:           %[[ADD:.*]] = torch.aten.add.Tensor %[[ARG0]], %[[ARG1]], %[[ALPHA]] : !torch.vtensor<[?],f32>, !torch.vtensor<[],f32>, !torch.float -> !torch.vtensor<[?],f32>
-// CHECK:           %[[RESULT:.*]] = torch.tensor_static_info_cast %[[ADD]] : !torch.vtensor<[?],f32> to !torch.vtensor<[?],unk>
-// CHECK:           return %[[RESULT]] : !torch.vtensor<[?],unk>
-func.func @type_promotion$alpha_wider(%arg0: !torch.vtensor<[?],f32>, %arg1: !torch.vtensor<[],f32>) -> !torch.vtensor<[?],unk> {
-  %float2.300000e00 = torch.constant.float 2.300000e+00
-  %0 = torch.aten.add.Tensor %arg0, %arg1, %float2.300000e00 : !torch.vtensor<[?],f32>, !torch.vtensor<[],f32>, !torch.float -> !torch.vtensor<[?],unk>
-  return %0 : !torch.vtensor<[?],unk>
-}
-
-// -----
-// CHECK-LABEL:   func.func @type_promotion_scalar_operation(
-// CHECK-SAME:                         %[[FLOAT:.*]]: !torch.float,
-// CHECK-SAME:                         %[[INT:.*]]: !torch.int) -> !torch.number {
-// CHECK:           %[[ADD:.*]] = torch.aten.add %[[FLOAT]], %[[INT]] : !torch.float, !torch.int -> !torch.float
-// CHECK:           %[[RET:.*]] = torch.derefine %[[ADD]] : !torch.float to !torch.number
-// CHECK:           return %[[RET]] : !torch.number
-func.func @type_promotion_scalar_operation(%float: !torch.float, %int: !torch.int) -> !torch.number {
-  %ret = torch.aten.add %float, %int : !torch.float, !torch.int -> !torch.number
-  return %ret : !torch.number
-}
-
-// -----
 // CHECK-LABEL:   func.func @torch.overwrite.tensor.contents$dynamic_overwrites_static(
 // CHECK-SAME:                                                           %[[STATIC:.*]]: !torch.vtensor<[2],f32>,
 // CHECK-SAME:                                                           %[[DYNAMIC:.*]]: !torch.vtensor<[?],f32>) -> !torch.vtensor<[2],f32> {
--- a/test/Dialect/Torch/reify-dtype-calculations.mlir
+++ b/test/Dialect/Torch/reify-dtype-calculations.mlir
@ -0,0 +1,56 @@
+// RUN: torch-mlir-opt -torch-reify-dtype-calculations -split-input-file %s | FileCheck %s
+
+// CHECK: module {
+// CHECK: func.func private @__torch_mlir_dtype_fn.aten.tanh(
+
+// CHECK-LABEL:   func.func @basic(
+// CHECK-SAME:                %[[ARG:.*]]: !torch.vtensor) -> !torch.vtensor {
+// CHECK:           %[[RESULT:.*]] = torch.dtype.calculate  {
+// CHECK:             %[[TANH:.*]] = torch.aten.tanh %[[ARG]] : !torch.vtensor -> !torch.vtensor
+// CHECK:             torch.dtype.calculate.yield %[[TANH]] : !torch.vtensor
+// CHECK:           } dtypes  {
+// CHECK:             %[[SIZE:.*]] = torch.aten.size %[[ARG]] : !torch.vtensor -> !torch.list<int>
+// CHECK:             %[[RANK:.*]] = torch.aten.len.t %[[SIZE]] : !torch.list<int> -> !torch.int
+// CHECK:             %[[DTYPE:.*]] = torch.prim.dtype %[[ARG]] : !torch.vtensor -> !torch.int
+// CHECK:             %[[RESULT_DTYPE:.*]] = func.call @__torch_mlir_dtype_fn.aten.tanh(%[[RANK]], %[[DTYPE]]) : (!torch.int, !torch.int) -> !torch.int
+// CHECK:             torch.dtype.calculate.yield.dtypes %[[RESULT_DTYPE]] : !torch.int
+// CHECK:           } : !torch.vtensor
+// CHECK:           return %[[RESULT:.*]] : !torch.vtensor
+func.func @basic(%arg0: !torch.vtensor) -> !torch.vtensor {
+  %0 = torch.aten.tanh %arg0 : !torch.vtensor -> !torch.vtensor
+  return %0 : !torch.vtensor
+}
+
+// -----
+
+// CHECK-LABEL:   func.func private @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.library_generator.promote_dtypes(
+// CHECK:          {{.*}} = torch.promote_dtypes {{.*}} : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int
+
+// CHECK-LABEL:   func.func private @__torch_mlir_dtype_fn.aten.floor_divide(
+// CHECK:           {{.*}} = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.library_generator.promote_dtypes({{.*}}
+
+// CHECK-LABEL:   func.func @op_with_dtype_promotion(
+// CHECK:             {{.*}} = func.call @__torch_mlir_dtype_fn.aten.floor_divide({{.*}}
+func.func @op_with_dtype_promotion(%arg0: !torch.vtensor, %arg1: !torch.vtensor) -> !torch.vtensor {
+  %0 = torch.aten.floor_divide %arg0, %arg1 : !torch.vtensor, !torch.vtensor -> !torch.vtensor
+  return %0 : !torch.vtensor
+}
+
+// -----
+
+// CHECK-LABEL:   func.func private @__torch_mlir_dtype_fn.aten.floor_divide(
+
+// CHECK-LABEL:   func.func @turn_tensors_into_rank_and_dtype_args(
+// CHECK-SAME:                                                     %[[ARG0:.*]]: !torch.vtensor,
+// CHECK-SAME:                                                     %[[ARG1:.*]]: !torch.vtensor) -> !torch.vtensor {
+// CHECK:             %[[SIZE0:.*]] = torch.aten.size %[[ARG0]] : !torch.vtensor -> !torch.list<int>
+// CHECK:             %[[RANK0:.*]] = torch.aten.len.t %[[SIZE0]] : !torch.list<int> -> !torch.int
+// CHECK:             %[[DTYPE0:.*]] = torch.prim.dtype %[[ARG0]] : !torch.vtensor -> !torch.int
+// CHECK:             %[[SIZE1:.*]] = torch.aten.size %[[ARG1]] : !torch.vtensor -> !torch.list<int>
+// CHECK:             %[[RANK1:.*]] = torch.aten.len.t %[[SIZE1]] : !torch.list<int> -> !torch.int
+// CHECK:             %[[DTYPE1:.*]] = torch.prim.dtype %[[ARG1]] : !torch.vtensor -> !torch.int
+// CHECK:             {{.*}} = func.call @__torch_mlir_dtype_fn.aten.floor_divide(%[[RANK0]], %[[DTYPE0]], %[[RANK1]], %[[DTYPE1]]) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.int
+func.func @turn_tensors_into_rank_and_dtype_args(%arg0: !torch.vtensor, %arg1: !torch.vtensor) -> !torch.vtensor {
+  %0 = torch.aten.floor_divide %arg0, %arg1 : !torch.vtensor, !torch.vtensor -> !torch.vtensor
+  return %0 : !torch.vtensor
+}
--- a/test/Dialect/Torch/simplify-dtype-calculations.mlir
+++ b/test/Dialect/Torch/simplify-dtype-calculations.mlir
@ -0,0 +1,285 @@
+// RUN: torch-mlir-opt -torch-simplify-dtype-calculations -split-input-file %s | FileCheck %s
+
+// CHECK-LABEL:   func.func @basic(
+// CHECK-SAME:                     %[[ARG:.*]]: !torch.vtensor<*,f32>) -> !torch.vtensor {
+// CHECK:           %[[DTYPE_INT:.*]] = torch.constant.int 6
+// CHECK:           %[[RESULT:.*]] = torch.dtype.calculate {
+// CHECK:             %[[TANH:.*]] = torch.aten.tanh %[[ARG]] : !torch.vtensor<*,f32> -> !torch.vtensor<*,f32>
+// CHECK:             torch.dtype.calculate.yield %[[TANH]] : !torch.vtensor<*,f32>
+// CHECK:           } dtypes {
+// CHECK:             torch.dtype.calculate.yield.dtypes %[[DTYPE_INT]] : !torch.int
+// CHECK:           } : !torch.vtensor<*,f32>
+// CHECK:           %[[CAST:.*]] = torch.tensor_static_info_cast %[[RESULT]] : !torch.vtensor<*,f32> to !torch.vtensor
+// CHECK:           return %[[CAST]] : !torch.vtensor
+func.func @basic(%arg0: !torch.vtensor<*,f32>) -> !torch.vtensor {
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.tanh %arg0 : !torch.vtensor<*,f32> -> !torch.vtensor
+    torch.dtype.calculate.yield %1 : !torch.vtensor
+  } dtypes {
+    %2 = torch.prim.dtype %arg0 : !torch.vtensor<*,f32> -> !torch.int
+    torch.dtype.calculate.yield.dtypes %2 : !torch.int
+  } : !torch.vtensor
+  return %0 : !torch.vtensor
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @promote_dtypes$tensor_tensor_same_category_different_width(
+// CHECK:             {{.*}} = torch.aten.add.Tensor {{.*}} -> !torch.vtensor<[1],f64>
+func.func @promote_dtypes$tensor_tensor_same_category_different_width(%arg0: !torch.vtensor<[1],f32>, %arg1: !torch.vtensor<[1],f64>, %arg2: !torch.float) {
+  %int1 = torch.constant.int 1
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add.Tensor %arg0, %arg1, %arg2 : !torch.vtensor<[1],f32>, !torch.vtensor<[1],f64>, !torch.float -> !torch.vtensor<[1],unk>
+    torch.dtype.calculate.yield %1 : !torch.vtensor<[1],unk>
+  } dtypes {
+    %ranks = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<optional<int>>
+    %f32_dtype = torch.prim.dtype %arg0 : !torch.vtensor<[1],f32> -> !torch.int
+    %f64_dtype = torch.prim.dtype %arg1 : !torch.vtensor<[1],f64> -> !torch.int
+    %dtypes = torch.prim.ListConstruct %f32_dtype, %f64_dtype : (!torch.int, !torch.int) -> !torch.list<int>
+    %3 = torch.promote_dtypes %ranks, %dtypes : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int
+    torch.dtype.calculate.yield.dtypes %3 : !torch.int
+  } : !torch.vtensor<[1],unk>
+  return
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @promote_dtypes$tensor_tensor_different_category(
+// CHECK:             {{.*}} = torch.aten.add.Tensor {{.*}} -> !torch.vtensor<[1],f64>
+func.func @promote_dtypes$tensor_tensor_different_category(%arg0: !torch.vtensor<[1],si32>, %arg1: !torch.vtensor<[1],f64>, %arg2: !torch.float) {
+  %int1 = torch.constant.int 1
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add.Tensor %arg0, %arg1, %arg2 : !torch.vtensor<[1],si32>, !torch.vtensor<[1],f64>, !torch.float -> !torch.vtensor<[1],unk>
+    torch.dtype.calculate.yield %1 : !torch.vtensor<[1],unk>
+  } dtypes {
+    %si32_dtype = torch.prim.dtype %arg0 : !torch.vtensor<[1],si32> -> !torch.int
+    %f64_dtype = torch.prim.dtype %arg1 : !torch.vtensor<[1],f64> -> !torch.int
+    %ranks = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<optional<int>>
+    %dtypes = torch.prim.ListConstruct %si32_dtype, %f64_dtype : (!torch.int, !torch.int) -> !torch.list<int>
+    %3 = torch.promote_dtypes %ranks, %dtypes : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int
+    torch.dtype.calculate.yield.dtypes %3 : !torch.int
+  } : !torch.vtensor<[1],unk>
+  return
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @promote_dtypes$tensor_tensor_same_category_zero_rank_wider(
+// CHECK:             {{.*}} = torch.aten.add.Tensor {{.*}} -> !torch.vtensor<[1],f32>
+func.func @promote_dtypes$tensor_tensor_same_category_zero_rank_wider(%arg0: !torch.vtensor<[1],f32>, %arg1: !torch.vtensor<[],f64>, %arg2: !torch.int) {
+  %int0 = torch.constant.int 0
+  %int1 = torch.constant.int 1
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add.Tensor %arg0, %arg1, %arg2 : !torch.vtensor<[1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[1],unk>
+    torch.dtype.calculate.yield %1 : !torch.vtensor<[1],unk>
+  } dtypes {
+    %f32_dtype = torch.prim.dtype %arg0 : !torch.vtensor<[1],f32> -> !torch.int
+    %f64_dtype = torch.prim.dtype %arg1 : !torch.vtensor<[],f64> -> !torch.int
+    %ranks = torch.prim.ListConstruct %int1, %int0 : (!torch.int, !torch.int) -> !torch.list<optional<int>>
+    %dtypes = torch.prim.ListConstruct %f32_dtype, %f64_dtype : (!torch.int, !torch.int) -> !torch.list<int>
+    %3 = torch.promote_dtypes %ranks, %dtypes : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int
+    torch.dtype.calculate.yield.dtypes %3 : !torch.int
+  } : !torch.vtensor<[1],unk>
+  return
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @promote_dtypes$tensor_tensor_zero_rank_higher_category(
+// CHECK:             {{.*}} = torch.aten.add.Tensor {{.*}} -> !torch.vtensor<[1],f32>
+func.func @promote_dtypes$tensor_tensor_zero_rank_higher_category(%arg0: !torch.vtensor<[1],si64>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.int) {
+  %int0 = torch.constant.int 0
+  %int1 = torch.constant.int 1
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add.Tensor %arg0, %arg1, %arg2 : !torch.vtensor<[1],si64>, !torch.vtensor<[],f32>, !torch.int -> !torch.vtensor<[1],unk>
+    torch.dtype.calculate.yield %1 : !torch.vtensor<[1],unk>
+  } dtypes {
+    %si64_dtype = torch.prim.dtype %arg0 : !torch.vtensor<[1],si64> -> !torch.int
+    %f32_dtype = torch.prim.dtype %arg1 : !torch.vtensor<[],f32> -> !torch.int
+    %ranks = torch.prim.ListConstruct %int1, %int0 : (!torch.int, !torch.int) -> !torch.list<optional<int>>
+    %dtypes = torch.prim.ListConstruct %si64_dtype, %f32_dtype : (!torch.int, !torch.int) -> !torch.list<int>
+    %3 = torch.promote_dtypes %ranks, %dtypes : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int
+    torch.dtype.calculate.yield.dtypes %3 : !torch.int
+  } : !torch.vtensor<[1],unk>
+  return
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @promote_dtypes$tensor_tensor_alpha_wider_no_contribution(
+// CHECK:             {{.*}} = torch.aten.add.Tensor {{.*}} -> !torch.vtensor<[1],f32>
+func.func @promote_dtypes$tensor_tensor_alpha_wider_no_contribution(%arg0: !torch.vtensor<[1],f32>, %arg1: !torch.vtensor<[1],f32>, %arg2: !torch.float) {
+  %int1 = torch.constant.int 1
+  %none = torch.constant.none
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add.Tensor %arg0, %arg1, %arg2 : !torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>, !torch.float -> !torch.vtensor<[1],unk>
+    torch.dtype.calculate.yield %1 : !torch.vtensor<[1],unk>
+  } dtypes {
+    %f32_dtype = torch.prim.dtype %arg0 : !torch.vtensor<[1],f32> -> !torch.int
+    %alpha_as_tensor = torch.prim.NumToTensor.Scalar %arg2 : !torch.float -> !torch.tensor<[],f64>
+    %f64_dtype = torch.prim.dtype %alpha_as_tensor : !torch.tensor<[],f64> -> !torch.int
+    %ranks = torch.prim.ListConstruct %int1, %int1, %none : (!torch.int, !torch.int, !torch.none) -> !torch.list<optional<int>>
+    %dtypes = torch.prim.ListConstruct %f32_dtype, %f32_dtype, %f64_dtype : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
+    %3 = torch.promote_dtypes %ranks, %dtypes : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int
+    torch.dtype.calculate.yield.dtypes %3 : !torch.int
+  } : !torch.vtensor<[1],unk>
+  return
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @promote_dtypes$tensor_scalar_scalar_higher_category(
+// CHECK:             {{.*}} = torch.aten.add.Scalar {{.*}} -> !torch.vtensor<[1],f32>
+func.func @promote_dtypes$tensor_scalar_scalar_higher_category(%arg0: !torch.vtensor<[1],si64>, %arg1: !torch.float, %arg2: !torch.int) {
+  %none = torch.constant.none
+  %int1 = torch.constant.int 1
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add.Scalar %arg0, %arg1, %arg2 : !torch.vtensor<[1],si64>, !torch.float, !torch.int -> !torch.vtensor<[1],unk>
+    torch.dtype.calculate.yield %1 : !torch.vtensor<[1],unk>
+  } dtypes {
+    %si64_dtype = torch.prim.dtype %arg0 : !torch.vtensor<[1],si64> -> !torch.int
+    %ranks = torch.prim.ListConstruct %int1, %none : (!torch.int, !torch.none) -> !torch.list<optional<int>>
+    %arg1_as_tensor = torch.prim.NumToTensor.Scalar %arg1 : !torch.float -> !torch.tensor<[],f64>
+    %f64_dtype = torch.prim.dtype %arg1_as_tensor : !torch.tensor<[],f64> -> !torch.int
+    %dtypes = torch.prim.ListConstruct %si64_dtype, %f64_dtype : (!torch.int, !torch.int) -> !torch.list<int>
+    %5 = torch.promote_dtypes %ranks, %dtypes : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int
+    torch.dtype.calculate.yield.dtypes %5 : !torch.int
+  } : !torch.vtensor<[1],unk>
+  return
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @promote_dtypes$tensor_scalar_scalar_same_category_wider(
+// CHECK:             {{.*}} = torch.aten.add.Scalar {{.*}} -> !torch.vtensor<[1],si32>
+func.func @promote_dtypes$tensor_scalar_scalar_same_category_wider(%arg0: !torch.vtensor<[1],si32>, %arg1: !torch.int, %arg2: !torch.int) {
+  %none = torch.constant.none
+  %int3 = torch.constant.int 3
+  %int1 = torch.constant.int 1
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add.Scalar %arg0, %arg1, %arg2 : !torch.vtensor<[1],si32>, !torch.int, !torch.int -> !torch.vtensor<[1],unk>
+    torch.dtype.calculate.yield %1 : !torch.vtensor<[1],unk>
+  } dtypes {
+    %si32_dtype = torch.prim.dtype %arg0 : !torch.vtensor<[1],si32> -> !torch.int
+    %ranks = torch.prim.ListConstruct %int1, %none : (!torch.int, !torch.none) -> !torch.list<optional<int>>
+    %arg1_as_tensor = torch.prim.NumToTensor.Scalar %arg1 : !torch.int -> !torch.tensor<[],si64>
+    %si64_dtype = torch.prim.dtype %arg1_as_tensor : !torch.tensor<[],si64> -> !torch.int
+    %dtypes = torch.prim.ListConstruct %si32_dtype, %si64_dtype : (!torch.int, !torch.int) -> !torch.list<int>
+    %5 = torch.promote_dtypes %ranks, %dtypes : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int
+    torch.dtype.calculate.yield.dtypes %5 : !torch.int
+  } : !torch.vtensor<[1],unk>
+  return
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @promote_dtypes$scalar_scalar_different_category(
+// CHECK:             {{.*}} = torch.aten.add {{.*}} -> !torch.float
+func.func @promote_dtypes$scalar_scalar_different_category(%arg0: !torch.float, %arg1: !torch.int) -> !torch.number {
+  %none = torch.constant.none
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add %arg0, %arg1 : !torch.float, !torch.int -> !torch.number
+    torch.dtype.calculate.yield %1 : !torch.number
+  } dtypes {
+    %ranks = torch.prim.ListConstruct %none, %none : (!torch.none, !torch.none) -> !torch.list<optional<int>>
+    %arg0_as_tensor = torch.prim.NumToTensor.Scalar %arg0 : !torch.float -> !torch.tensor<[],f64>
+    %f64_dtype = torch.prim.dtype %arg0_as_tensor : !torch.tensor<[],f64> -> !torch.int
+    %arg1_as_tensor = torch.prim.NumToTensor.Scalar %arg1 : !torch.int -> !torch.tensor<[],si64>
+    %si64_dtype = torch.prim.dtype %arg1_as_tensor : !torch.tensor<[],si64> -> !torch.int
+    %dtypes = torch.prim.ListConstruct %f64_dtype, %si64_dtype : (!torch.int, !torch.int) -> !torch.list<int>
+    %7 = torch.promote_dtypes %ranks, %dtypes : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int
+    torch.dtype.calculate.yield.dtypes %7 : !torch.int
+  } : !torch.number
+  return %0 : !torch.number
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @promote_dtypes$scalar_scalar_same_category(
+// CHECK:             {{.*}} = torch.aten.add {{.*}} -> !torch.int
+func.func @promote_dtypes$scalar_scalar_same_category(%arg0: !torch.int, %arg1: !torch.int) -> !torch.number {
+  %none = torch.constant.none
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add %arg0, %arg1 : !torch.int, !torch.int -> !torch.number
+    torch.dtype.calculate.yield %1 : !torch.number
+  } dtypes {
+    %ranks = torch.prim.ListConstruct %none, %none : (!torch.none, !torch.none) -> !torch.list<optional<int>>
+    %arg0_as_tensor = torch.prim.NumToTensor.Scalar %arg0 : !torch.int -> !torch.tensor<[],si64>
+    %si64_dtype = torch.prim.dtype %arg0_as_tensor : !torch.tensor<[],si64> -> !torch.int
+    %dtypes = torch.prim.ListConstruct %si64_dtype, %si64_dtype : (!torch.int, !torch.int) -> !torch.list<int>
+    %7 = torch.promote_dtypes %ranks, %dtypes : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int
+    torch.dtype.calculate.yield.dtypes %7 : !torch.int
+  } : !torch.number
+  return %0 : !torch.number
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @refine_dtype$invalid_dtype_for_scalar(
+// CHECK:             {{.*}} = torch.aten.add {{.*}} -> !torch.number
+func.func @refine_dtype$invalid_dtype_for_scalar(%arg0: !torch.int, %arg1: !torch.int) -> !torch.number {
+  %none = torch.constant.none
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add %arg0, %arg1 : !torch.int, !torch.int -> !torch.number
+    torch.dtype.calculate.yield %1 : !torch.number
+  } dtypes {
+    // dtype int for int32
+    %int3 = torch.constant.int 3
+    torch.dtype.calculate.yield.dtypes %int3 : !torch.int
+  } : !torch.number
+  return %0 : !torch.number
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @refine_dtype$no_simplification
+// CHECK:             {{.*}} = torch.aten.add {{.*}} -> !torch.number
+func.func @refine_dtype$no_simplification(%arg0: !torch.int, %arg1: !torch.int, %dtype: !torch.int) -> !torch.number {
+  %none = torch.constant.none
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add %arg0, %arg1 : !torch.int, !torch.int -> !torch.number
+    torch.dtype.calculate.yield %1 : !torch.number
+  } dtypes {
+    torch.dtype.calculate.yield.dtypes %dtype : !torch.int
+  } : !torch.number
+  return %0 : !torch.number
+}
+
+// -----
+
+// If result type is already refined (even if wrong, as is the case here),
+// don't make any changes to result type.
+// TODO: This case should result in an error
+// CHECK-LABEL:   func.func @refine_dtype$result_type_already_refined
+// CHECK:             {{.*}} = torch.aten.add {{.*}} -> !torch.int
+func.func @refine_dtype$result_type_already_refined(%arg0: !torch.float, %arg1: !torch.float) -> !torch.int {
+  %none = torch.constant.none
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add %arg0, %arg1 : !torch.float, !torch.float -> !torch.int
+    torch.dtype.calculate.yield %1 : !torch.int
+  } dtypes {
+    // dtype int for float64
+    %int7 = torch.constant.int 7
+    torch.dtype.calculate.yield.dtypes %int7 : !torch.int
+  } : !torch.int
+  return %0 : !torch.int
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @refine_dtype$derefine_result_type(
+// CHECK:             {{.*}} = torch.aten.add {{.*}} -> !torch.int
+// CHECK:           %[[ERASED:.*]] = torch.derefine {{.*}} : !torch.int to !torch.number
+// CHECK:           return %[[ERASED]] : !torch.number
+func.func @refine_dtype$derefine_result_type(%arg0: !torch.int, %arg1: !torch.int) -> !torch.number {
+  %none = torch.constant.none
+  %0 = torch.dtype.calculate {
+    %1 = torch.aten.add %arg0, %arg1 : !torch.int, !torch.int -> !torch.number
+    torch.dtype.calculate.yield %1 : !torch.number
+  } dtypes {
+    // dtype int for int64
+    %int4 = torch.constant.int 4
+    torch.dtype.calculate.yield.dtypes %int4 : !torch.int
+  } : !torch.number
+  return %0 : !torch.number
+}