torch-mlir/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp

//===------------------------------------------------------------*- C++ -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Also available under a BSD-style license. See LICENSE.
//
//===----------------------------------------------------------------------===//

#include "torch-mlir/Conversion/TorchOnnxToTorch/Patterns.h"
#include "torch-mlir/Conversion/TorchOnnxToTorch/Utils.h"
#include "torch-mlir/Dialect/Torch/Utils/Utils.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"

using namespace mlir;
using namespace mlir::torch;
using namespace mlir::torch::onnx_c;

// Simple rewrites for the default domain.
// See: https://onnx.ai/onnx/operators/
// For operators that are effectively version invariant, we register with
// sinceVersion==1. We interpret this to include the following spec
// diffs that are irrelevant to this level of lowering:
//   * Supported element types.
//   * Limited broadcasting to full broadcasting support.
//
// There are a lot of spec revisions that basically generalized elementwise
// to be more normal and a direct translation vs a special case. This
// results in a lot of ONNX test cases that all reduce to the exact same
// thing here, so we simplify.

// utilities
namespace {
// In case the ReduceSum Op was not the first operation performed on the data,
// we provide the original operand through storeResult, which will be modified
// if the result will be passed onto another operation, and will be used for
// noop_with_empty_axes handling before that.
LogicalResult reducedSumImpl(OpBinder binder,
                             ConversionPatternRewriter &rewriter, Value data,
                             Torch::ValueTensorType resultType,
                             Value &storeResult, int64_t keepDims,
                             int64_t noop_with_empty_axes,
                             bool isIntermediateOp) {

  SmallVector<Value> axesList;
  Value axesVal;
  if (!binder.tensorOperandAtIndex(axesVal, 1)) {
    auto inputType = dyn_cast<Torch::ValueTensorType>(data.getType());
    if (!inputType.hasSizes() || !resultType.hasSizes()) {
      return rewriter.notifyMatchFailure(
          binder.op, "unimplemented: expected input and result to have shapes");
    }

    if (inputType.areAllSizesKnown() && resultType.areAllSizesKnown()) {
      SmallVector<int64_t> inputShape{inputType.getSizes()};
      SmallVector<int64_t> resultShape{resultType.getSizes()};
      // if the shapes are equal, none of the dims is reduced
      if (llvm::equal(inputShape, resultShape)) {
        // simply fill in the op and return
        rewriter.replaceOp(binder.op, data);
        return success();
      }
      if (areAllElementsDistinct(inputShape)) {
        // The check for the input shape elements to be distinct is added
        // for the cases like:
        // Input: [3, 2, 2] -> Output: [3, 2]
        // For the above case, from the input and output shape it can't be
        // inferred whether the dim:1 is reduced or dim:2. To avoid these
        // type of cases, the check has been placed.
        SmallVector<int64_t> reduceDims;
        unsigned resultShapeCounter = 0;
        for (unsigned i = 0; i < inputShape.size(); i++) {
          if (resultShapeCounter < resultShape.size() &&
              inputShape[i] == resultShape[resultShapeCounter]) {
            resultShapeCounter++;
          } else {
            reduceDims.push_back(i);
            if (resultShapeCounter < resultShape.size() &&
                resultShape[resultShapeCounter] == 1)
              resultShapeCounter++;
          }
        }
        for (auto i : reduceDims) {
          axesList.push_back(rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getI64IntegerAttr(i)));
        }
      }
    }
    if (axesList.empty()) {
      Torch::BaseTensorType axesType =
          cast<Torch::BaseTensorType>(axesVal.getType());
      auto axesTy = dyn_cast<Torch::ValueTensorType>(axesVal.getType());
      auto axesShape = axesTy.getSizes();
      if (axesShape.size() != 1 || axesShape[0] == Torch::kUnknownSize)
        return failure();

      Value zero = rewriter.create<Torch::ConstantIntOp>(
          binder.getLoc(), rewriter.getType<Torch::IntType>(),
          rewriter.getI64IntegerAttr(0));
      SmallVector<int64_t> selectSizes{1};
      auto selType = rewriter.getType<Torch::ValueTensorType>(
          selectSizes, axesType.getOptionalDtype());
      int64_t numAxes = axesShape[0];
      for (int64_t i = 0; i < numAxes; ++i) {
        Value iv = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getI64IntegerAttr(i));
        Value extract = rewriter.create<Torch::AtenSelectIntOp>(
            binder.getLoc(), selType, axesVal, zero, iv);
        Value dim = rewriter.create<Torch::AtenItemOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(), extract);
        axesList.push_back(dim);
      }
    }
  }

  SmallVector<int64_t> axesInts;
  if (!binder.s64IntegerArrayAttr(axesInts, "axes", {})) {
    for (int64_t i = 0, s = axesInts.size(); i < s; ++i) {
      Value iv = rewriter.create<Torch::ConstantIntOp>(
          binder.getLoc(), rewriter.getType<Torch::IntType>(),
          rewriter.getI64IntegerAttr(axesInts[i]));
      axesList.push_back(iv);
    }
  }

  // Do not include absolute value in the noop
  if (axesList.empty() && noop_with_empty_axes) {
    rewriter.replaceOp(binder.op, storeResult);
    return success();
  }

  Value dimValueList = rewriter.create<Torch::PrimListConstructOp>(
      binder.getLoc(),
      Torch::ListType::get(Torch::IntType::get(binder.op->getContext())),
      axesList);
  Value keepDimBool =
      rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), keepDims);
  Value dType = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
  // If we are using the ReducedSum as an intermediate op to be passed into
  // another operation, we might not want to replace the Op. So we create a new
  // Op and store the result in a variable.
  if (!isIntermediateOp) {
    rewriter.replaceOpWithNewOp<Torch::AtenSumDimIntListOp>(
        binder.op, resultType, data, dimValueList, keepDimBool,
        /*dtype=*/dType);
  } else {
    storeResult = rewriter.create<Torch::AtenSumDimIntListOp>(
        binder.getLoc(), resultType, data, dimValueList, keepDimBool,
        /*dtype=*/dType);
  }
  return success();
}

Value getValueList(OpBinder binder, ConversionPatternRewriter &rewriter,
                   Value operand) {
  SmallVector<Value> itemList;
  auto sizes = dyn_cast<Torch::ValueTensorType>(operand.getType()).getSizes();
  Torch::BaseTensorType operandType =
      cast<Torch::BaseTensorType>(operand.getType());

  SmallVector<int64_t> selectSizes;
  selectSizes.push_back(1);
  Type selectResultType = operandType.getWithSizesAndDtype(
      llvm::ArrayRef(selectSizes), operandType.getOptionalDtype());

  auto extract = [&rewriter, &binder](Value x, Value v) {
    auto xTy = cast<Torch::ValueTensorType>(x.getType());
    Type extractTy = rewriter.getType<Torch::FloatType>();
    if (isa<IntegerType>(xTy.getDtype()))
      extractTy = rewriter.getType<Torch::IntType>();

    return rewriter.create<Torch::AtenItemOp>(binder.getLoc(), extractTy, v);
  };

  Value zero = rewriter.create<Torch::ConstantIntOp>(
      binder.getLoc(), rewriter.getType<Torch::IntType>(),
      rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));

  MLIRContext *context = binder.op->getContext();
  for (int i = 2; i < sizes[0]; i++) {
    Value selectIndex = rewriter.create<Torch::ConstantIntOp>(
        binder.getLoc(), rewriter.getType<Torch::IntType>(),
        rewriter.getIntegerAttr(rewriter.getIntegerType(64), i));
    Value ext = rewriter.create<Torch::AtenSelectIntOp>(
        binder.getLoc(), selectResultType, operand, zero, selectIndex);
    Value item = extract(operand, ext);
    itemList.push_back(item);
  }
  auto xTy = cast<Torch::ValueTensorType>(operand.getType());
  Value ValueList;
  if (isa<IntegerType>(xTy.getDtype())) {
    ValueList = rewriter.create<Torch::PrimListConstructOp>(
        binder.getLoc(), Torch::ListType::get(Torch::IntType::get(context)),
        itemList);
  } else {
    ValueList = rewriter.create<Torch::PrimListConstructOp>(
        binder.getLoc(), Torch::ListType::get(Torch::FloatType::get(context)),
        itemList);
  }
  return ValueList;
}
} // namespace

void mlir::torch::onnx_c::populateDefaultDomainQtoZ(
    OnnxCustomOpConversionPattern &patterns) {
  patterns.onOp(
      "QuantizeLinear", 1,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        llvm::SmallVector<Value> operands;
        if (binder.tensorOperands(operands, 3) ||
            binder.tensorResultType(resultType))
          return failure();

        auto loc = binder.getLoc();
        Value operand = operands[0];
        Value scale = operands[1];
        Value zeropoint = operands[2];

        auto scaleTy = dyn_cast<Torch::ValueTensorType>(scale.getType());
        if (!scaleTy || !scaleTy.hasSizes())
          return rewriter.notifyMatchFailure(binder.op, "requires known rank");
        if (!resultType.hasDtype())
          return rewriter.notifyMatchFailure(binder.op,
                                             "requires known result dtype");

        auto resultETy = resultType.getDtype();

        bool rank0 = scaleTy.getSizes().size() == 0;
        bool length1 =
            scaleTy.getSizes().size() == 1 && scaleTy.getSizes()[0] == 1;

        if (!rank0 && !length1)
          return rewriter.notifyMatchFailure(binder.op,
                                             "unimplemented: non-scalar scale");

        auto qTensorTy = getQTorchTypeFromTorchIntType(resultType);
        if (!qTensorTy) {
          return rewriter.notifyMatchFailure(binder.op,
                                             "unsupported result dtype");
        }

        auto torchqTy = Torch::getScalarTypeForType(qTensorTy.getDtype());

        Value tyConst = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64),
                                    static_cast<int64_t>(torchqTy)));

        scale = rewriter.create<Torch::AtenItemOp>(
            loc, rewriter.getType<Torch::FloatType>(), scale);

        bool fpResult = isa<mlir::FloatType>(resultETy);
        Type zeropointTy = rewriter.getType<Torch::IntType>();
        if (fpResult)
          zeropointTy = rewriter.getType<Torch::FloatType>();
        zeropoint =
            rewriter.create<Torch::AtenItemOp>(loc, zeropointTy, zeropoint);

        if (fpResult) {
          Value none = rewriter.create<Torch::ConstantNoneOp>(loc);
          Value cstFalse = rewriter.create<Torch::ConstantBoolOp>(loc, false);
          Value one = rewriter.create<Torch::ConstantFloatOp>(
              loc, rewriter.getF64FloatAttr(1.0));
          Value div = rewriter.create<Torch::AtenDivScalarOp>(
              loc, operand.getType(), operand, scale);
          Value add = rewriter.create<Torch::AtenAddScalarOp>(
              loc, operand.getType(), div, zeropoint, one);

          rewriter.replaceOpWithNewOp<Torch::AtenToDtypeOp>(
              binder.op, resultType, add, tyConst,
              /*non_blocking=*/cstFalse, /*copy=*/cstFalse,
              /*memory_format=*/none);
          return success();
        }

        auto quantize = rewriter.create<Torch::AtenQuantizePerTensorOp>(
            loc, qTensorTy, operand, scale, zeropoint, tyConst);
        rewriter.replaceOpWithNewOp<Torch::AtenIntReprOp>(binder.op, resultType,
                                                          quantize);
        return success();
      });
  patterns.onOp(
      "QLinearConv", 1,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        llvm::SmallVector<Value> operands;
        if ((binder.tensorOperands(operands, 8) &&
             binder.tensorOperands(operands, 9)) ||
            binder.tensorResultType(resultType))
          return failure();
        Value a = operands[0];
        Value aScale = operands[1];
        Value aZp = operands[2];
        Value b = operands[3];
        Value bScale = operands[4];
        Value bZp = operands[5];
        Value cScale = operands[6];
        Value cZp = operands[7];
        Value c = operands.size() == 9 ? operands[8] : nullptr;

        auto check = [](Value v) {
          auto vTy = cast<Torch::ValueTensorType>(v.getType());
          return llvm::all_of(vTy.getSizes(), [](int64_t d) { return d == 1; });
        };
        if (!check(aScale) || !check(aZp) || !check(bScale) || !check(bZp) ||
            !check(cScale) || !check(cScale))
          return rewriter.notifyMatchFailure(
              binder.op, "not supported for non per-tensor quantization");

        auto extract = [&rewriter, &binder](Value v) {
          auto vTy = cast<Torch::ValueTensorType>(v.getType());
          Type extractTy = rewriter.getType<Torch::FloatType>();
          if (isa<IntegerType>(vTy.getDtype()))
            extractTy = rewriter.getType<Torch::IntType>();

          return rewriter.create<Torch::AtenItemOp>(binder.getLoc(), extractTy,
                                                    v);
        };

        aZp = extract(aZp);
        bZp = extract(bZp);
        cZp = extract(cZp);
        aScale = extract(aScale);
        bScale = extract(bScale);
        cScale = extract(cScale);

        auto make = [&rewriter, &binder](Value v, Value scale,
                                         Value zp) -> Value {
          auto ty = cast<Torch::ValueTensorType>(v.getType());
          auto newTy = getQTorchTypeFromTorchIntType(ty);
          return rewriter.create<Torch::Aten_MakePerTensorQuantizedTensorOp>(
              binder.getLoc(), newTy, v, scale, zp);
        };

        a = make(a, aScale, aZp);
        b = make(b, bScale, bZp);

        auto cTy = rewriter.getType<Torch::ValueTensorType>(
            resultType.getOptionalSizes(),
            rewriter.getIntegerType(32, /*issigned=*/true));

        // TODO(suderman): insert convolution operator.
        llvm::SmallVector<Value> newOperands = {a, b};
        if (c)
          newOperands.push_back(c);

        cTy = rewriter.getType<Torch::ValueTensorType>(
            resultType.getOptionalSizes(),
            rewriter.getType<Torch::QInt32Type>());

        llvm::SmallVector<NamedAttribute> newAttributes;
        newAttributes.push_back(
            rewriter.getNamedAttr("name", rewriter.getStringAttr("onnx.Conv")));
        for (auto namedAttr : binder.op->getAttrDictionary()) {
          if (namedAttr.getName().getValue().compare("name") == 0)
            continue;
          llvm::errs() << namedAttr.getName() << "\n";
          newAttributes.push_back(namedAttr);
        }

        c = rewriter
                .create<Torch::OperatorOp>(binder.getLoc(), cTy, newOperands,
                                           newAttributes,
                                           binder.op->getRegions().size())
                .getResult(0);

        Value outScale = rewriter.create<Torch::AtenMulFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(), aScale,
            bScale);
        Value outZp = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));
        c = rewriter.create<Torch::Aten_MakePerTensorQuantizedTensorOp>(
            binder.getLoc(), cTy, c, outScale, outZp);
        cTy = rewriter.getType<Torch::ValueTensorType>(
            resultType.getOptionalSizes(), rewriter.getF32Type());

        c = rewriter.create<Torch::AtenDequantizeSelfOp>(binder.getLoc(), cTy,
                                                         c);
        cTy = getQTorchTypeFromTorchIntType(resultType);
        Value dtyVal = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(
                rewriter.getIntegerType(64),
                static_cast<int64_t>(
                    Torch::getScalarTypeForType(cTy.getDtype()))));
        c = rewriter.create<Torch::AtenQuantizePerTensorOp>(
            binder.getLoc(), cTy, c, cScale, cZp, dtyVal);
        rewriter.replaceOpWithNewOp<Torch::AtenIntReprOp>(binder.op, resultType,
                                                          c);
        return success();
      });
  patterns.onOp(
      "QLinearMatMul", 1,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        llvm::SmallVector<Value> operands;
        if (binder.tensorOperands(operands, 8) ||
            binder.tensorResultType(resultType))
          return failure();
        Value a = operands[0];
        Value aScale = operands[1];
        Value aZp = operands[2];
        Value b = operands[3];
        Value bScale = operands[4];
        Value bZp = operands[5];
        Value cScale = operands[6];
        Value cZp = operands[7];

        auto check = [](Value v) {
          auto vTy = cast<Torch::ValueTensorType>(v.getType());
          for (auto dim : vTy.getSizes())
            if (dim != 1)
              return false;
          return true;
        };
        if (!check(aScale) || !check(aZp) || !check(bScale) || !check(bZp) ||
            !check(cScale) || !check(cScale))
          return rewriter.notifyMatchFailure(
              binder.op, "not supported for non per-tensor quantization");

        Value emptyList = rewriter.create<Torch::PrimListConstructOp>(
            binder.getLoc(),
            rewriter.getType<Torch::ListType>(
                rewriter.getType<Torch::IntType>()),
            ValueRange{});
        auto extract = [&rewriter, &binder, &emptyList](Value v) {
          auto vTy = cast<Torch::ValueTensorType>(v.getType());
          if (!vTy.getSizes().empty()) {
            vTy = rewriter.getType<Torch::ValueTensorType>(
                ArrayRef<int64_t>({}), vTy.getOptionalDtype());
            v = rewriter.create<Torch::AtenReshapeOp>(binder.getLoc(), vTy, v,
                                                      emptyList);
          }

          Type extractTy = rewriter.getType<Torch::FloatType>();
          if (isa<IntegerType>(vTy.getDtype()))
            extractTy = rewriter.getType<Torch::IntType>();

          return rewriter.create<Torch::AtenItemOp>(binder.getLoc(), extractTy,
                                                    v);
        };

        aZp = extract(aZp);
        bZp = extract(bZp);
        cZp = extract(cZp);
        aScale = extract(aScale);
        bScale = extract(bScale);
        cScale = extract(cScale);

        auto make = [&rewriter, &binder](Value v, Value scale,
                                         Value zp) -> Value {
          auto ty = cast<Torch::ValueTensorType>(v.getType());
          auto newTy = getQTorchTypeFromTorchIntType(ty);
          return rewriter.create<Torch::Aten_MakePerTensorQuantizedTensorOp>(
              binder.getLoc(), newTy, v, scale, zp);
        };

        a = make(a, aScale, aZp);
        b = make(b, bScale, bZp);

        auto cTy = rewriter.getType<Torch::ValueTensorType>(
            resultType.getOptionalSizes(),
            rewriter.getIntegerType(32, /*issigned=*/true));

        Value c;
        if (cTy.getSizes().size() == 2) {
          c = rewriter.create<Torch::AtenMmOp>(binder.getLoc(), cTy, a, b);
        } else {
          c = rewriter.create<Torch::AtenBmmOp>(binder.getLoc(), cTy, a, b);
        }

        cTy = rewriter.getType<Torch::ValueTensorType>(
            resultType.getOptionalSizes(),
            rewriter.getType<Torch::QInt32Type>());

        Value mmScale = rewriter.create<Torch::AtenMulFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(), aScale,
            bScale);
        Value mmZp = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));
        c = rewriter.create<Torch::Aten_MakePerTensorQuantizedTensorOp>(
            binder.getLoc(), cTy, c, mmScale, mmZp);
        cTy = rewriter.getType<Torch::ValueTensorType>(
            resultType.getOptionalSizes(), rewriter.getF32Type());

        c = rewriter.create<Torch::AtenDequantizeSelfOp>(binder.getLoc(), cTy,
                                                         c);
        cTy = dyn_cast<Torch::ValueTensorType>(
            getQTorchTypeFromTorchIntType(resultType));
        Value dtyVal = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(
                rewriter.getIntegerType(64),
                static_cast<int64_t>(
                    Torch::getScalarTypeForType(cTy.getDtype()))));
        c = rewriter.create<Torch::AtenQuantizePerTensorOp>(
            binder.getLoc(), cTy, c, cScale, cZp, dtyVal);
        rewriter.replaceOpWithNewOp<Torch::AtenIntReprOp>(binder.op, resultType,
                                                          c);
        return success();
      });
  patterns.onOp("Reciprocal", 1,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value operand;
                  if (binder.tensorOperand(operand) ||
                      binder.tensorResultType(resultType))
                    return failure();
                  rewriter.replaceOpWithNewOp<Torch::AtenReciprocalOp>(
                      binder.op, resultType, operand);
                  return success();
                });
  patterns.onOp(
      "Relu", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value x;
        if (binder.tensorOperand(x) || binder.tensorResultType(resultType))
          return failure();

        rewriter.replaceOpWithNewOp<Torch::AtenReluOp>(binder.op, resultType,
                                                       x);
        return success();
      });
  patterns.onOp("Round", 11,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value operand;
                  if (binder.tensorOperand(operand) ||
                      binder.tensorResultType(resultType))
                    return failure();
                  rewriter.replaceOpWithNewOp<Torch::AtenRoundOp>(
                      binder.op, resultType, operand);
                  return success();
                });
  patterns.onOp("RNN", 1,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  return OnnxRnnExpander(binder, rewriter);
                });
  patterns.onOp(
      "Scatter", 9, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        int64_t axis;
        if (binder.s64IntegerAttr(axis, "axis", {}))
          return rewriter.notifyMatchFailure(binder.op, "axis bind failure");

        Torch::ValueTensorType resultTy;
        Value data, indices, updates;
        if (binder.tensorOperandAtIndex(data, 0) ||
            binder.tensorOperandAtIndex(indices, 1) ||
            binder.tensorOperandAtIndex(updates, 2) ||
            binder.tensorResultType(resultTy))
          return failure();

        auto dataTy = cast<Torch::ValueTensorType>(data.getType()),
             indicesTy = cast<Torch::ValueTensorType>(indices.getType()),
             updatesTy = cast<Torch::ValueTensorType>(updates.getType());

        int64_t dataRank = dataTy.getSizes().size(),
                indicesRank = indicesTy.getSizes().size(),
                updatesRank = updatesTy.getSizes().size();

        if ((dataRank < 1) || (indicesRank < 1) || (updatesRank < 1) ||
            (axis < -dataRank) || (axis >= dataRank))
          return failure();

        Value axisValue = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(axis));

        rewriter.replaceOpWithNewOp<Torch::AtenScatterSrcOp>(
            binder.op, resultTy, data, axisValue, indices, updates);

        return success();
      });
  patterns.onOp(
      "ScatterElements", 1,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        SmallVector<Value> valList;
        int64_t axis;
        std::string reduction;
        int64_t numOperands = binder.op->getNumOperands();
        if (binder.tensorOperands(valList, numOperands) ||
            binder.s64IntegerAttr(axis, "axis", 0) ||
            binder.customOpNameStringAttr(reduction, "reduction", "none") ||
            binder.tensorResultType(resultType))
          return failure();

        auto loc = binder.getLoc();
        Value data = valList[0];
        Value indices = valList[1];
        Value updates = valList[2];

        // ONNX allows negative axis.
        if (axis < 0)
          axis +=
              cast<Torch::ValueTensorType>(data.getType()).getSizes().size();

        Value constAxis = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64), axis));

        Value zero = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getI64IntegerAttr(0));
        Value one = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getI64IntegerAttr(1));

        Value axisSize = rewriter.create<Torch::AtenSizeIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(), data,
            constAxis);

        auto indicesTy = cast<Torch::ValueTensorType>(indices.getType());
        Value indicesAdd = rewriter.create<Torch::AtenAddScalarOp>(
            loc, indicesTy, indices, axisSize, one);

        Value inputNeg = rewriter.create<Torch::AtenLtScalarOp>(
            loc,
            rewriter.getType<Torch::ValueTensorType>(indicesTy.getSizes(),
                                                     rewriter.getI1Type()),
            indices, zero);

        indices = rewriter.create<Torch::AtenWhereSelfOp>(
            loc, indicesTy, inputNeg, indicesAdd, indices);

        if (reduction == "none") {
          rewriter.replaceOpWithNewOp<Torch::AtenScatterSrcOp>(
              binder.op, resultType, data, constAxis, indices, updates);
          return success();
        }

        // TODO: Implement max and min cases
        if (reduction == "mul") {
          reduction = "multiply";
        } else if (reduction == "max" || reduction == "min") {
          return rewriter.notifyMatchFailure(
              binder.op, "max/min reduction unsupported for scatter elements");
        }

        Value cstStrReduction =
            rewriter.create<Torch::ConstantStrOp>(binder.getLoc(), reduction);

        rewriter.replaceOpWithNewOp<Torch::AtenScatterReduceOp>(
            binder.op, resultType, data, constAxis, indices, updates,
            cstStrReduction);
        return success();
      });
  patterns.onOp(
      "SequenceConstruct", 11,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        SmallVector<Value> operands;
        Torch::ListType resultType;

        if (binder.tensorOperands(operands, binder.getNumOperands()) ||
            binder.tensorListResultType(resultType))
          return failure();

        rewriter.replaceOpWithNewOp<Torch::PrimListConstructOp>(
            binder.op, resultType, operands);
        return success();
      });
  patterns.onOp(
      "SequenceLength", 11,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        // onnx.SequenceLength takes a sequence(list) of tensors, and returns
        // a zero rank tensor with the length.
        Torch::ValueTensorType resultType;
        Value x;
        if (binder.tensorListOperand(x) || binder.tensorResultType(resultType))
          return failure();

        Value cstFalse =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), false);
        Value none = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());

        Value len = rewriter.create<Torch::AtenLenTOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(), x);

        // AtenLenTOp returns a torch.int, so we have to
        // put that in a tensor.
        rewriter.replaceOpWithNewOp<Torch::AtenTensorIntOp>(
            binder.op, resultType, len, none, none, cstFalse);

        return success();
      });
  patterns.onOp(
      "Sigmoid", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value x;
        if (binder.tensorOperand(x) || binder.tensorResultType(resultType))
          return failure();

        rewriter.replaceOpWithNewOp<Torch::AtenSigmoidOp>(binder.op, resultType,
                                                          x);
        return success();
      });
  patterns.onOp("Sin", 7,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value operand;
                  if (binder.tensorOperand(operand) ||
                      binder.tensorResultType(resultType))
                    return failure();
                  rewriter.replaceOpWithNewOp<Torch::AtenSinOp>(
                      binder.op, resultType, operand);
                  return success();
                });
  patterns.onOp("Tanh", 1,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value operand;
                  if (binder.tensorOperand(operand) ||
                      binder.tensorResultType(resultType))
                    return failure();
                  rewriter.replaceOpWithNewOp<Torch::AtenTanhOp>(
                      binder.op, resultType, operand);
                  return success();
                });
  patterns.onOp("Sqrt", 1,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value operand;
                  if (binder.tensorOperand(operand) ||
                      binder.tensorResultType(resultType))
                    return failure();
                  rewriter.replaceOpWithNewOp<Torch::AtenSqrtOp>(
                      binder.op, resultType, operand);
                  return success();
                });
  patterns.onOp(
      "Sub", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value x;
        Value y;
        if (binder.tensorOperands(x, y) || binder.tensorResultType(resultType))
          return failure();
        Value const1 = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64), 1));
        rewriter.replaceOpWithNewOp<Torch::AtenSubTensorOp>(
            binder.op, resultType, x, y, const1);
        return success();
      });
  patterns.onOp(
      "Sum", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        if (binder.op->getNumOperands() == 1) {
          Torch::ValueTensorType resultType;
          Value x;
          if (binder.tensorOperand(x) || binder.tensorResultType(resultType))
            return failure();
          rewriter.replaceOp(binder.op, x);
          return success();
        }
        Torch::ValueTensorType resultType;
        SmallVector<Value> valList;
        int64_t numOperands = binder.op->getNumOperands();
        if (binder.tensorOperands(valList, numOperands) ||
            binder.tensorResultType(resultType))
          return failure();
        Value const1 = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64), 1));
        // Short circuit to binary add
        if (numOperands == 2) {
          rewriter.replaceOpWithNewOp<Torch::AtenAddTensorOp>(
              binder.op, resultType, valList[0], valList[1], const1);
          return success();
        }
        // When binder.op->getNumOperands() > 2
        Value curr = rewriter.create<Torch::AtenAddTensorOp>(
            binder.getLoc(), resultType, valList[0], valList[1], const1);
        for (int i = 2; i < numOperands; i++) {
          if (i == numOperands - 1) {
            curr = rewriter.create<Torch::AtenAddTensorOp>(
                binder.getLoc(), resultType, curr, valList[i], const1);
          } else {
            SmallVector<int64_t> resultBroadcastShapeInt;
            SmallVector<Value> resultBroadcastShapeValue;
            Torch::computeBroadcastShape(rewriter, binder.getLoc(), curr,
                                         valList[i], resultBroadcastShapeInt,
                                         resultBroadcastShapeValue);
            auto baseType = Torch::ValueTensorType::get(
                binder.op->getContext(), resultBroadcastShapeInt,
                resultType.getOptionalDtype());
            curr = rewriter.create<Torch::AtenAddTensorOp>(
                binder.getLoc(), baseType, curr, valList[i], const1);
          }
        }
        rewriter.replaceOp(binder.op, curr);
        return success();
      });
  patterns.onOp("Where", 1,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  SmallVector<Value> valList;
                  int64_t numOperands = binder.op->getNumOperands();
                  if (binder.tensorOperands(valList, numOperands) ||
                      binder.tensorResultType(resultType))
                    return failure();
                  Value condition = valList[0];
                  Value x = valList[1];
                  Value y = valList[2];
                  rewriter.replaceOpWithNewOp<Torch::AtenWhereSelfOp>(
                      binder.op, resultType, condition, x, y);
                  return success();
                });
  patterns.onOp(
      "Xor", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value x;
        Value y;
        if (binder.tensorOperands(x, y) || binder.tensorResultType(resultType))
          return failure();
        rewriter.replaceOpWithNewOp<Torch::AtenLogicalXorOp>(binder.op,
                                                             resultType, x, y);
        return success();
      });
  patterns.onOp(
      "Squeeze", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        SmallVector<Value> inputOperands;
        if (binder.tensorOperands(inputOperands, binder.op->getNumOperands()) ||
            binder.tensorResultType(resultType))
          return failure();

        Value data = inputOperands[0];
        auto inputType = dyn_cast<Torch::ValueTensorType>(data.getType());
        if (!inputType.hasSizes() || !resultType.hasSizes())
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented: expected input and result to have shapes");

        int64_t inputRank = inputType.getSizes().size();
        int64_t resultRank = resultType.getSizes().size();
        int64_t rankDiff = inputRank - resultRank;
        if (rankDiff == 0) {
          // In this case, no dimension is squeezed. Hence just replace the op
          // with input.
          rewriter.replaceOp(binder.op, data);
          return success();
        }

        if (inputOperands.size() == 1) {
          // Case: `axes` value is not present which means squeeze all the
          // dimensions with shape value 1.
          rewriter.replaceOpWithNewOp<Torch::AtenSqueezeOp>(binder.op,
                                                            resultType, data);
          return success();
        }

        SmallVector<Value> dimList;
        if (inputType.areAllSizesKnown() && resultType.areAllSizesKnown()) {
          // If the input shape and result shape is statically known then the
          // list of dims to be squeezed can be derived from those shapes. As a
          // result, we don't have to wait for the dim values to be known at
          // runtime which is also expected by the downstream pipeline.
          SmallVector<int64_t> inputShape(inputType.getSizes());
          SmallVector<int64_t> resultShape(resultType.getSizes());
          SmallVector<int64_t> squeezeDims;
          unsigned resultShapeCounter = 0;
          for (unsigned i = 0; i < inputRank; i++) {
            if (resultShapeCounter < resultRank &&
                inputShape[i] == resultShape[resultShapeCounter]) {
              resultShapeCounter++;
            } else {
              squeezeDims.push_back(i);
            }
          }
          for (auto i : squeezeDims) {
            dimList.push_back(rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getI64IntegerAttr(i)));
          }
        }

        if (dimList.empty()) {
          Value axes = inputOperands[1];
          Torch::BaseTensorType axesType =
              cast<Torch::BaseTensorType>(axes.getType());
          SmallVector<int64_t> selectSizes{1};
          Type selectResultType = axesType.getWithSizesAndDtype(
              selectSizes, axesType.getOptionalDtype());
          Value zero = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(),
              rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));
          for (int i = 0; i < rankDiff; i++) {
            // Go through the axes list and get each dim in the list
            Value selectIndex = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(),
                rewriter.getIntegerAttr(rewriter.getIntegerType(64), i));
            Value extract = rewriter.create<Torch::AtenSelectIntOp>(
                binder.getLoc(), selectResultType, axes, zero, selectIndex);
            Value dim = rewriter.create<Torch::AtenItemOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(), extract);
            dimList.push_back(dim);
          }
        }
        Value dimValueList = rewriter.create<Torch::PrimListConstructOp>(
            binder.getLoc(),
            rewriter.getType<Torch::ListType>(
                rewriter.getType<Torch::IntType>()),
            dimList);
        rewriter.replaceOpWithNewOp<Torch::PrimsSqueezeOp>(
            binder.op, resultType, data, dimValueList);
        return success();
      });
  patterns.onOp(
      "Unsqueeze", 13,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        // Unlike squeeze where we are able to lower to Torch::PrimsSqueezeOp,
        // pytorch does not support torch.unsqueeze to insert multiple new dims.
        // discussion can be found here:
        // https://github.com/pytorch/pytorch/issues/9410
        // So, for now, we unroll into multiple unsqueezes.
        Location loc = binder.getLoc();
        Torch::ValueTensorType resultType;
        Value data, axes;
        if (binder.tensorOperands(data, axes) ||
            binder.tensorResultType(resultType))
          return failure();
        auto inputType = dyn_cast<Torch::ValueTensorType>(data.getType());
        if (!inputType.hasSizes() || !resultType.hasSizes())
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented: expected input and result to have shapes");

        int64_t inputRank = inputType.getSizes().size();
        int64_t resultRank = resultType.getSizes().size();
        int64_t rankDiff = resultRank - inputRank;
        if (rankDiff == 0) {
          // In this case, no dimension is unsqueezed. Hence just replace the op
          // with input.
          rewriter.replaceOp(binder.op, data);
          return success();
        }

        SmallVector<int64_t> unsqueezeDims;
        SmallVector<int64_t> inputShape(inputType.getSizes());
        if (inputType.areAllSizesKnown() && resultType.areAllSizesKnown()) {
          // If the input shape and result shape is statically known then the
          // list of dims to be squeezed can be derived from those shapes. As a
          // result, we don't have to wait for the dim values to be known at
          // runtime which is also expected by the downstream pipeline.
          SmallVector<int64_t> resultShape(resultType.getSizes());
          unsigned inputShapeCounter = 0;
          for (unsigned i = 0; i < resultRank; i++) {
            if (inputShapeCounter < inputRank &&
                inputShape[inputShapeCounter] == resultShape[i]) {
              inputShapeCounter++;
            } else {
              unsqueezeDims.push_back(i);
            }
          }
        } else {
          SmallVector<int64_t> unsqueezeDimsInts;
          if (!matchPattern(axes, m_OnnxListOfConstantInts(unsqueezeDimsInts)))
            return rewriter.notifyMatchFailure(
                binder.op, "only support constant int axes values");

          for (auto dim : unsqueezeDimsInts)
            unsqueezeDims.push_back(dim < 0 ? dim + resultRank : dim);
          // If we don't sort, unsqueezing first on 4 and then on 0 would fail
          // for shape = {x,y,z}, and axes [4,0]
          llvm::sort(unsqueezeDims.begin(), unsqueezeDims.end());
        }
        Value result = data;
        SmallVector<int64_t> unsqueezeShape = inputShape;
        for (auto dim : unsqueezeDims) {
          unsqueezeShape.insert(unsqueezeShape.begin() + dim, 1);
          Type unsqueezeType = resultType.getWithSizesAndDtype(
              unsqueezeShape, resultType.getOptionalDtype());
          Value cstDim = rewriter.create<Torch::ConstantIntOp>(
              loc, rewriter.getI64IntegerAttr(dim));
          result = rewriter.create<Torch::AtenUnsqueezeOp>(loc, unsqueezeType,
                                                           result, cstDim);
        }
        rewriter.replaceOp(binder.op, result);
        return success();
      });
  patterns.onOp(
      "Softmax", 13, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value input;
        int64_t axis;
        if (binder.tensorOperand(input) ||
            binder.s64IntegerAttr(axis, "axis", -1) ||
            binder.tensorResultType(resultType))
          return failure();

        // ONNX allows negative axis.
        if (axis < 0)
          axis +=
              cast<Torch::ValueTensorType>(input.getType()).getSizes().size();

        Value constAxis = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64), axis));

        Value noneVal = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());

        rewriter.replaceOpWithNewOp<Torch::AtenSoftmaxIntOp>(
            binder.op, resultType, input, constAxis, /*dtype=*/noneVal);
        return success();
      });

  patterns.onOp(
      "Selu", 6, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        // y = gamma * (alpha * e^x - alpha) for x <= 0, y = gamma * x for x > 0
        Torch::ValueTensorType resultType;
        float alpha, gamma;
        Value operand;
        // Refer https://onnx.ai/onnx/operators/onnx__Selu.html for the default
        // alpha and gamma values.
        if (binder.tensorOperand(operand) ||
            binder.f32FloatAttr(alpha, "alpha", 1.67326) ||
            binder.f32FloatAttr(gamma, "gamma", 1.0507) ||
            binder.tensorResultType(resultType))
          return failure();

        Value vAlpha = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), alpha));

        Value vScale = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), gamma));

        Value vInputScale = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), 1.0));

        rewriter.replaceOpWithNewOp<Torch::AtenEluOp>(
            binder.op, resultType, operand, vAlpha, vScale, vInputScale);
        return success();
      });
  patterns.onOp("ReduceL1", 1,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  int64_t keepDims, noop_with_empty_axes;
                  Value operand;
                  if (binder.tensorOperandAtIndex(operand, 0) ||
                      binder.tensorResultType(resultType) ||
                      binder.s64IntegerAttr(keepDims, "keepdims", 1) ||
                      binder.s64IntegerAttr(noop_with_empty_axes,
                                            "noop_with_empty_axes", 0))
                    return failure();

                  Value data = rewriter.create<Torch::AtenAbsOp>(
                      binder.getLoc(), operand.getType(), operand);

                  return reducedSumImpl(binder, rewriter, data, resultType,
                                        /*storeValue=*/operand, keepDims,
                                        noop_with_empty_axes, false);
                });
  patterns.onOp(
      "ReduceL2", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value operand;
        int64_t keepDims, noop_with_empty_axes;
        if (binder.tensorOperandAtIndex(operand, 0) ||
            binder.tensorResultType(resultType) ||
            binder.s64IntegerAttr(keepDims, "keepdims", 1) ||
            binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes",
                                  0))
          return failure();

        // A ReduceL2 op is equivalent to the following sequence of operations:
        // Mul(x, x) -> ReduceSum -> CastF32 -> Sqrt -> CastLike(resultType)
        Value squareOfOperand = rewriter.create<Torch::AtenMulTensorOp>(
            binder.getLoc(), operand.getType(), operand, operand);

        auto reducedSum =
            reducedSumImpl(binder, rewriter, squareOfOperand, resultType,
                           operand, keepDims, noop_with_empty_axes, true);
        if (failed(reducedSum))
          return rewriter.notifyMatchFailure(
              binder.op,
              "Failed to perform sum operation on square of operand");

        Value castDType = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(/*Float32Type*/ 6));

        Value noneVal = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        Value constFalse =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), false);

        // Perform an AtenToDtype op on the squared sum of the operand, stored
        // now in operand itself.
        auto size = dyn_cast<Torch::ValueTensorType>(operand.getType())
                        .getOptionalSizes();
        auto f32ResultType = rewriter.getType<Torch::ValueTensorType>(
            size, rewriter.getF32Type());
        Value operandCast = rewriter.create<Torch::AtenToDtypeOp>(
            binder.getLoc(), f32ResultType, operand, castDType,
            /*non_blocking=*/constFalse, /*copy=*/constFalse,
            /*memory_format=*/noneVal);

        Value operandSqrt = rewriter.create<Torch::AtenSqrtOp>(
            binder.getLoc(), f32ResultType, operandCast);

        Value resultDtype = Torch::getDtypeIntValueForType(
            rewriter, binder.getLoc(), resultType.getDtype());
        rewriter.replaceOpWithNewOp<Torch::AtenToDtypeOp>(
            binder.op, resultType, operandSqrt, resultDtype,
            /*non_blocking=*/constFalse, /*copy=*/constFalse,
            /*memory_format=*/noneVal);
        return success();
      });
  patterns.onOp("ReduceLogSum", 1,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value data;
                  int64_t keepDims, noop_with_empty_axes;
                  if (binder.tensorOperandAtIndex(data, 0) ||
                      binder.tensorResultType(resultType) ||
                      binder.s64IntegerAttr(keepDims, "keepdims", 1) ||
                      binder.s64IntegerAttr(noop_with_empty_axes,
                                            "noop_with_empty_axes", 0))
                    return failure();

                  auto reducedSumBool =
                      reducedSumImpl(binder, rewriter, data, resultType,
                                     /*storeValue=*/data, keepDims,
                                     noop_with_empty_axes, true);

                  if (failed(reducedSumBool))
                    return rewriter.notifyMatchFailure(
                        binder.op,
                        "Failed to perform sum operation on square of operand");

                  rewriter.replaceOpWithNewOp<Torch::AtenLogOp>(
                      binder.op, resultType, data);
                  return success();
                });
  patterns.onOp(
      "ReduceLogSumExp", 1,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value data;
        int64_t keepDims, noop_with_empty_axes;
        if (binder.tensorOperandAtIndex(data, 0) ||
            binder.tensorResultType(resultType) ||
            binder.s64IntegerAttr(keepDims, "keepdims", 1) ||
            binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes",
                                  0))
          return failure();

        // out = Log(reducesum(exp(data)))
        Value castDType = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(/*Float64Type*/ 7));
        Value noneVal = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        Value constFalse =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), false);
        auto size =
            dyn_cast<Torch::ValueTensorType>(data.getType()).getOptionalSizes();
        auto f64ResultType = rewriter.getType<Torch::ValueTensorType>(
            size, rewriter.getF64Type());
        Value dataCast = rewriter.create<Torch::AtenToDtypeOp>(
            binder.getLoc(), f64ResultType, data, castDType,
            /*non_blocking=*/constFalse, /*copy=*/constFalse,
            /*memory_format=*/noneVal);
        Value dataExp = rewriter.create<Torch::AtenExpOp>(
            binder.getLoc(), f64ResultType, dataCast);
        auto f64ReduceType = rewriter.getType<Torch::ValueTensorType>(
            resultType.getOptionalSizes(), rewriter.getF64Type());
        auto reducedSumBool = reducedSumImpl(
            binder, rewriter, dataExp, f64ReduceType,
            /*storeValue=*/data, keepDims, noop_with_empty_axes, true);
        if (failed(reducedSumBool))
          return rewriter.notifyMatchFailure(
              binder.op,
              "Failed to perform sum operation on square of operand");
        Value finalResult = rewriter.create<Torch::AtenLogOp>(
            binder.getLoc(), f64ReduceType, data);
        Value resultDtype = Torch::getDtypeIntValueForType(
            rewriter, binder.getLoc(), resultType.getDtype());
        rewriter.replaceOpWithNewOp<Torch::AtenToDtypeOp>(
            binder.op, resultType, finalResult, resultDtype,
            /*non_blocking=*/constFalse, /*copy=*/constFalse,
            /*memory_format=*/noneVal);
        return success();
      });
  patterns.onOp("ReduceSum", 1,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value data;
                  int64_t keepDims, noop_with_empty_axes;
                  if (binder.tensorOperandAtIndex(data, 0) ||
                      binder.tensorResultType(resultType) ||
                      binder.s64IntegerAttr(keepDims, "keepdims", 1) ||
                      binder.s64IntegerAttr(noop_with_empty_axes,
                                            "noop_with_empty_axes", 0))
                    return failure();

                  return reducedSumImpl(binder, rewriter, data, resultType,
                                        /*storeValue=*/data, keepDims,
                                        noop_with_empty_axes, false);
                });
  patterns.onOp("ReduceSumSquare", 1,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value data;
                  int64_t keepDims, noop_with_empty_axes;
                  if (binder.tensorOperandAtIndex(data, 0) ||
                      binder.tensorResultType(resultType) ||
                      binder.s64IntegerAttr(keepDims, "keepdims", 1) ||
                      binder.s64IntegerAttr(noop_with_empty_axes,
                                            "noop_with_empty_axes", 0))
                    return failure();

                  Value dataSquare = rewriter.create<Torch::AtenMulTensorOp>(
                      binder.getLoc(), data.getType(), data, data);

                  return reducedSumImpl(binder, rewriter, dataSquare,
                                        resultType,
                                        /*storeValue=*/data, keepDims,
                                        noop_with_empty_axes, false);
                });
  patterns.onOp(
      "ReduceMean", 1,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value data;
        int64_t keepDims, noop_with_empty_axes;
        if (binder.tensorOperandAtIndex(data, 0) ||
            binder.tensorResultType(resultType) ||
            binder.s64IntegerAttr(keepDims, "keepdims", 1) ||
            binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes",
                                  0))
          return failure();

        SmallVector<Value> axesList;

        Value axesVal;
        if (!binder.tensorOperandAtIndex(axesVal, 1)) {
          auto inputType = dyn_cast<Torch::ValueTensorType>(data.getType());
          if (!inputType.hasSizes() || !resultType.hasSizes()) {
            return rewriter.notifyMatchFailure(
                binder.op,
                "unimplemented: expected input and result to have shapes");
          }

          // If the input shape and result shape is statically known then the
          // list of dims to be squeezed can be derived from those shapes. As a
          // result, we don't have to wait for the dim values to be known at
          // runtime which is also expected by the downstream pipeline.
          if (inputType.areAllSizesKnown() && resultType.areAllSizesKnown()) {
            SmallVector<int64_t> inputShape{inputType.getSizes()};
            SmallVector<int64_t> resultShape{resultType.getSizes()};
            if (llvm::equal(inputShape, resultShape)) {
              // Case: none of the dimension is reduced.
              rewriter.replaceOp(binder.op, data);
              return success();
            }
            if (areAllElementsDistinct(inputShape)) {
              // The check for the input shape elements to be distinct is added
              // for the cases like:
              // Input: [3, 2, 2] -> Output: [3, 2]
              // For the above case, from the input and output shape it can't be
              // inferred whether the dim:1 is reduced or dim:2. To avoid these
              // type of cases, the check has been placed.
              SmallVector<int64_t> reduceDims;
              unsigned resultShapeCounter = 0;
              for (unsigned i = 0; i < inputShape.size(); i++) {
                if (resultShapeCounter < resultShape.size() &&
                    inputShape[i] == resultShape[resultShapeCounter]) {
                  resultShapeCounter++;
                } else {
                  reduceDims.push_back(i);
                  if (resultShapeCounter < resultShape.size() &&
                      resultShape[resultShapeCounter] == 1)
                    resultShapeCounter++;
                }
              }
              for (auto i : reduceDims) {
                axesList.push_back(rewriter.create<Torch::ConstantIntOp>(
                    binder.getLoc(), rewriter.getI64IntegerAttr(i)));
              }
            }
          }

          if (axesList.empty()) {
            Torch::BaseTensorType axesType =
                cast<Torch::BaseTensorType>(axesVal.getType());
            auto axesTy = dyn_cast<Torch::ValueTensorType>(axesVal.getType());
            auto axesShape = axesTy.getSizes();
            if (axesShape.size() != 1 || axesShape[0] == Torch::kUnknownSize)
              return failure();

            Value zero = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(),
                rewriter.getI64IntegerAttr(0));
            SmallVector<int64_t> selectSizes{1};
            auto selType = rewriter.getType<Torch::ValueTensorType>(
                selectSizes, axesType.getOptionalDtype());
            int64_t numAxes = axesShape[0];
            for (int64_t i = 0; i < numAxes; ++i) {
              Value iv = rewriter.create<Torch::ConstantIntOp>(
                  binder.getLoc(), rewriter.getType<Torch::IntType>(),
                  rewriter.getI64IntegerAttr(i));
              Value extract = rewriter.create<Torch::AtenSelectIntOp>(
                  binder.getLoc(), selType, axesVal, zero, iv);
              Value dim = rewriter.create<Torch::AtenItemOp>(
                  binder.getLoc(), rewriter.getType<Torch::IntType>(), extract);
              axesList.push_back(dim);
            }
          }
        }

        SmallVector<int64_t> axesInts;
        if (!binder.s64IntegerArrayAttr(axesInts, "axes", {})) {
          for (int64_t i = 0, s = axesInts.size(); i < s; ++i) {
            Value iv = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(),
                rewriter.getI64IntegerAttr(axesInts[i]));
            axesList.push_back(iv);
          }
        }

        // deal with case when axes is empty
        if (axesList.empty() && noop_with_empty_axes) {
          rewriter.replaceOp(binder.op, data);
          return success();
        }

        Value dimValueList = rewriter.create<Torch::PrimListConstructOp>(
            binder.getLoc(),
            Torch::ListType::get(Torch::IntType::get(binder.op->getContext())),
            axesList);
        Value keepDimBool =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), keepDims);
        Value noneVal = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        rewriter.replaceOpWithNewOp<Torch::AtenMeanDimOp>(
            binder.op, resultType, data, dimValueList, keepDimBool,
            /*dtype=*/noneVal);
        return success();
      });
  patterns.onOp(
      "ReduceMax", 13,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        // AtenAmaxOp allows us to pass a list of dims
        Torch::ValueTensorType resultType;
        Value data;
        Value axes;
        int64_t keepDims;
        int64_t noop_with_empty_axes;
        if (binder.tensorOperandAtIndex(data, 0) ||
            binder.tensorResultType(resultType) ||
            binder.s64IntegerAttr(keepDims, "keepdims", 1) ||
            binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes",
                                  0))
          return failure();

        auto dataTy = cast<Torch::BaseTensorType>(data.getType());
        Torch::IntType torchIntTy = rewriter.getType<Torch::IntType>();

        // If any of the input dims are 0 we set to the lower limit:
        if (llvm::any_of(dataTy.getSizes(), [](int64_t d) { return d == 0; }) &&
            (llvm::any_of(dataTy.getSizes(),
                          [](int64_t d) { return d == Torch::kUnknownSize; }) ||
             keepDims)) {
          auto dty = dataTy.getDtype();
          Value scalar;
          if (FloatType fpTy = dyn_cast<FloatType>(dty)) {
            auto inf =
                APFloat::getInf(fpTy.getFloatSemantics(), /*Negative=*/true);
            scalar = rewriter.create<Torch::ConstantFloatOp>(
                binder.getLoc(), rewriter.getType<Torch::FloatType>(),
                rewriter.getFloatAttr(rewriter.getF64Type(),
                                      inf.convertToDouble()));
          }

          if (IntegerType intTy = dyn_cast<IntegerType>(dty)) {
            auto minInt =
                intTy.isSigned()
                    ? APInt::getSignedMinValue(intTy.getIntOrFloatBitWidth())
                    : APInt::getMinValue(intTy.getIntOrFloatBitWidth());
            scalar = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), torchIntTy,
                rewriter.getIntegerAttr(rewriter.getIntegerType(64),
                                        minInt.getSExtValue()));
          }

          llvm::SmallVector<Value> fillDims;
          for (int i = 0, s = resultType.getSizes().size(); i < s; ++i) {
            auto staticDim = resultType.getSizes()[i];
            if (staticDim != Torch::kUnknownSize) {
              fillDims.push_back(rewriter.create<Torch::ConstantIntOp>(
                  binder.getLoc(), torchIntTy,
                  rewriter.getI64IntegerAttr(staticDim)));
              continue;
            }

            Value iv = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), torchIntTy, rewriter.getI64IntegerAttr(i));
            fillDims.push_back(rewriter.create<Torch::AtenSizeIntOp>(
                binder.getLoc(), torchIntTy, data, iv));
          }

          Value none = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
          Value fillDimsList = rewriter.create<Torch::PrimListConstructOp>(
              binder.getLoc(), Torch::ListType::get(torchIntTy), fillDims);
          rewriter.replaceOpWithNewOp<Torch::AtenFullOp>(
              binder.op, resultType, fillDimsList, scalar, none, none, none,
              none);
          return success();
        }

        // Previous version of the operation had the axes as an attribute:
        SmallVector<Value> axesList;
        llvm::SmallVector<int64_t> axesAttr;
        if (!binder.s64IntegerArrayAttr(axesAttr, "axes", {})) {
          for (int i = 0, s = axesAttr.size(); i < s; ++i) {
            axesList.push_back(rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), torchIntTy,
                rewriter.getI64IntegerAttr(axesAttr[i])));
          }
        }

        // Extract the axes values from the axes operand:
        if (!binder.tensorOperandAtIndex(axes, 1)) {
          Torch::BaseTensorType axesType =
              cast<Torch::BaseTensorType>(axes.getType());
          SmallVector<int64_t> selectSizes{1};
          Type selectResultType = axesType.getWithSizesAndDtype(
              selectSizes, axesType.getOptionalDtype());
          auto sizes = axesType.getSizes();

          Value zero = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(),
              rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));

          // Extract the value of each axes:
          for (int i = 0; i < sizes[0]; i++) {
            // Go through the axes list and get each dim in the list
            Value selectIndex = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(),
                rewriter.getIntegerAttr(rewriter.getIntegerType(64), i));
            Value extract = rewriter.create<Torch::AtenSelectIntOp>(
                binder.getLoc(), selectResultType, axes, zero, selectIndex);
            Value dim = rewriter.create<Torch::AtenItemOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(), extract);
            axesList.push_back(dim);
          }
        }

        // Handle the noop case:
        if (axesList.empty() && noop_with_empty_axes) {
          rewriter.replaceOp(binder.op, data);
          return success();
        }

        // Deal with case when no axes arg is passed but not a noop:
        if (axesList.empty()) {
          int64_t numDims = dyn_cast<Torch::ValueTensorType>(data.getType())
                                .getSizes()
                                .size();
          for (int i = 0; i < numDims; i++) {
            Value curr = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(),
                rewriter.getIntegerAttr(rewriter.getIntegerType(64), i));
            axesList.push_back(curr);
          }
        }

        // Handle negative axis:
        Value rankVal = rewriter.create<Torch::AtenDimOp>(binder.getLoc(),
                                                          torchIntTy, data);
        Value zero = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getI64IntegerAttr(0));
        for (Value &axes : axesList) {
          Value isNegative =
              rewriter.create<Torch::AtenLtIntOp>(binder.getLoc(), axes, zero);
          isNegative = rewriter.create<Torch::AtenIntBoolOp>(binder.getLoc(),
                                                             isNegative);
          Value finalOffset = rewriter.create<Torch::AtenMulIntOp>(
              binder.getLoc(), isNegative, rankVal);
          axes = rewriter.create<Torch::AtenAddIntOp>(binder.getLoc(), axes,
                                                      finalOffset);
        }

        Value dimValueList = rewriter.create<Torch::PrimListConstructOp>(
            binder.getLoc(), Torch::ListType::get(torchIntTy), axesList);
        Value keepDimBool =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), keepDims);
        rewriter.replaceOpWithNewOp<Torch::AtenAmaxOp>(
            binder.op, resultType, data, dimValueList, keepDimBool);
        return success();
      });

  patterns.onOp(
      "ReduceMin", 13,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        // AtenAminOp allows us to pass a list of dims
        Torch::ValueTensorType resultType;
        Value data;
        Value axes;
        int64_t keepDims;
        int64_t noop_with_empty_axes;
        if (binder.tensorOperandAtIndex(data, 0) ||
            binder.tensorResultType(resultType) ||
            binder.s64IntegerAttr(keepDims, "keepdims", 1) ||
            binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes",
                                  0))
          return failure();

        auto dataTy = cast<Torch::BaseTensorType>(data.getType());
        Torch::IntType torchIntTy = rewriter.getType<Torch::IntType>();

        // If any of the input dims are 0 we set to the upper limit:
        if (llvm::any_of(dataTy.getSizes(), [](int64_t d) { return d == 0; }) &&
            (llvm::any_of(dataTy.getSizes(),
                          [](int64_t d) { return d == Torch::kUnknownSize; }) ||
             keepDims)) {
          auto dty = dataTy.getDtype();
          Value scalar;
          if (FloatType fpTy = dyn_cast<FloatType>(dty)) {
            auto inf = APFloat::getInf(fpTy.getFloatSemantics());
            scalar = rewriter.create<Torch::ConstantFloatOp>(
                binder.getLoc(), rewriter.getType<Torch::FloatType>(),
                rewriter.getFloatAttr(rewriter.getF64Type(),
                                      inf.convertToDouble()));
          }

          if (IntegerType intTy = dyn_cast<IntegerType>(dty)) {
            auto mx =
                intTy.isSigned()
                    ? APInt::getSignedMaxValue(intTy.getIntOrFloatBitWidth())
                    : APInt::getMaxValue(intTy.getIntOrFloatBitWidth());
            scalar = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), torchIntTy,
                rewriter.getIntegerAttr(rewriter.getIntegerType(64),
                                        mx.getSExtValue()));
          }

          llvm::SmallVector<Value> fillDims;
          for (int i = 0, s = resultType.getSizes().size(); i < s; ++i) {
            auto staticDim = resultType.getSizes()[i];
            if (staticDim != Torch::kUnknownSize) {
              fillDims.push_back(rewriter.create<Torch::ConstantIntOp>(
                  binder.getLoc(), torchIntTy,
                  rewriter.getI64IntegerAttr(staticDim)));
              continue;
            }

            Value iv = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), torchIntTy, rewriter.getI64IntegerAttr(i));
            fillDims.push_back(rewriter.create<Torch::AtenSizeIntOp>(
                binder.getLoc(), torchIntTy, data, iv));
          }

          Value none = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
          Value fillDimsList = rewriter.create<Torch::PrimListConstructOp>(
              binder.getLoc(), Torch::ListType::get(torchIntTy), fillDims);
          rewriter.replaceOpWithNewOp<Torch::AtenFullOp>(
              binder.op, resultType, fillDimsList, scalar, none, none, none,
              none);
          return success();
        }

        // Previous version of the operation had the axes as an attribute:
        SmallVector<Value> axesList;
        llvm::SmallVector<int64_t> axesAttr;
        if (!binder.s64IntegerArrayAttr(axesAttr, "axes", {})) {
          for (int i = 0, s = axesAttr.size(); i < s; ++i) {
            axesList.push_back(rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), torchIntTy,
                rewriter.getI64IntegerAttr(axesAttr[i])));
          }
        }

        // Extract the axes values from the axes operand:
        if (!binder.tensorOperandAtIndex(axes, 1)) {
          Torch::BaseTensorType axesType =
              cast<Torch::BaseTensorType>(axes.getType());
          SmallVector<int64_t> selectSizes{1};
          Type selectResultType = axesType.getWithSizesAndDtype(
              selectSizes, axesType.getOptionalDtype());
          auto sizes = axesType.getSizes();

          Value zero = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(),
              rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));

          // Extract the value of each axes:
          for (int i = 0; i < sizes[0]; i++) {
            // Go through the axes list and get each dim in the list
            Value selectIndex = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(),
                rewriter.getIntegerAttr(rewriter.getIntegerType(64), i));
            Value extract = rewriter.create<Torch::AtenSelectIntOp>(
                binder.getLoc(), selectResultType, axes, zero, selectIndex);
            Value dim = rewriter.create<Torch::AtenItemOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(), extract);
            axesList.push_back(dim);
          }
        }

        // Handle the noop case:
        if (axesList.empty() && noop_with_empty_axes) {
          rewriter.replaceOp(binder.op, data);
          return success();
        }

        // Deal with case when no axes arg is passed but not a noop:
        if (axesList.empty()) {
          int64_t numDims = dyn_cast<Torch::ValueTensorType>(data.getType())
                                .getSizes()
                                .size();
          for (int i = 0; i < numDims; i++) {
            Value curr = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(),
                rewriter.getIntegerAttr(rewriter.getIntegerType(64), i));
            axesList.push_back(curr);
          }
        }

        // Handle negative axis:
        Value rankVal = rewriter.create<Torch::AtenDimOp>(binder.getLoc(),
                                                          torchIntTy, data);
        Value zero = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getI64IntegerAttr(0));
        for (Value &axes : axesList) {
          Value isNegative =
              rewriter.create<Torch::AtenLtIntOp>(binder.getLoc(), axes, zero);
          isNegative = rewriter.create<Torch::AtenIntBoolOp>(binder.getLoc(),
                                                             isNegative);
          Value finalOffset = rewriter.create<Torch::AtenMulIntOp>(
              binder.getLoc(), isNegative, rankVal);
          axes = rewriter.create<Torch::AtenAddIntOp>(binder.getLoc(), axes,
                                                      finalOffset);
        }

        Value dimValueList = rewriter.create<Torch::PrimListConstructOp>(
            binder.getLoc(), Torch::ListType::get(torchIntTy), axesList);
        Value keepDimBool =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), keepDims);
        rewriter.replaceOpWithNewOp<Torch::AtenAminOp>(
            binder.op, resultType, data, dimValueList, keepDimBool);
        return success();
      });

  patterns.onOp(
      "Shape", 9, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value operand;
        int64_t start, end;
        if (binder.tensorOperand(operand) ||
            binder.tensorResultType(resultType) ||
            binder.s64IntegerAttr(start, "start", 0) ||
            binder.s64IntegerAttr(end, "end", -1))
          return failure();

        auto inputType = dyn_cast<Torch::ValueTensorType>(operand.getType());
        int64_t inputRank = inputType.getSizes().size();

        auto shapeType = Torch::ValueTensorType::get(
            binder.op->getContext(), SmallVector<int64_t>{inputRank},
            resultType.getOptionalDtype());
        Value shape = rewriter.create<Torch::Aten_ShapeAsTensorOp>(
            binder.getLoc(), shapeType, operand);

        if (inputRank == 0) {
          rewriter.replaceOpWithNewOp<Torch::TensorStaticInfoCastOp>(
              binder.op, resultType, shape);
          return success();
        }

        if (start == 0 && end == -1) {
          rewriter.replaceOp(binder.op, shape);
          return success();
        }

        Value sv = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(start));
        Value ev = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(end));
        Value step = rewriter.create<Torch::ConstantIntOp>(binder.getLoc(), 1);
        Value dim = rewriter.create<Torch::ConstantIntOp>(binder.getLoc(), 0);

        rewriter.replaceOpWithNewOp<Torch::AtenSliceTensorOp>(
            binder.op, resultType, shape, dim, sv, ev, step);
        return success();
      });

  patterns.onOp("Sinh", 9,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value operand;
                  if (binder.tensorOperand(operand) ||
                      binder.tensorResultType(resultType))
                    return failure();

                  rewriter.replaceOpWithNewOp<Torch::AtenSinhOp>(
                      binder.op, resultType, operand);
                  return success();
                });

  // split with fixed-size parts
  // Arguments:
  // - input: the tensor to split
  // Attributes:
  // - axis: the axis along which to split the input
  // - num_outputs: the number of outputs to produce
  // Outputs:
  // - outputs: the produced outputs. Variadic with num_outputs elements.
  // Note: torch.aten gives a list of tensors, but ONNX gives a variadic list of
  // tensors
  //       so we need to unpack the list
  patterns.onOp(
      "Split", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Value self;
        int64_t axis;
        int64_t numOutputs;
        if (binder.tensorOperand(self))
          return rewriter.notifyMatchFailure(
              binder.op, "Not converting to AtenSplitTensorOp due to input "
                         "tensor mismatch");
        if (binder.s64IntegerAttr(axis, "axis", 0))
          return rewriter.notifyMatchFailure(binder.op,
                                             "Failed to get axis attribute");

        numOutputs = binder.op->getNumResults();
        if (binder.s64IntegerAttr(numOutputs, "num_outputs", numOutputs))
          return rewriter.notifyMatchFailure(
              binder.op, "Failed to get num_outputs attribute");

        auto loc = binder.getLoc();
        auto result0Ty =
            cast<Torch::ValueTensorType>(binder.op->getResult(0).getType());
        auto resultNTy = cast<Torch::ValueTensorType>(
            binder.op->getResults().back().getType());
        auto selfTy = cast<Torch::ValueTensorType>(self.getType());

        int64_t dim = axis;
        if (dim < 0)
          dim += selfTy.getSizes().size();

        Value dimValue = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getType<Torch::IntType>(),
            rewriter.getI64IntegerAttr(dim));

        Value vNumOutputs = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getType<Torch::IntType>(),
            rewriter.getI64IntegerAttr(numOutputs));

        Value one = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getI64IntegerAttr(1));
        Value zero = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getI64IntegerAttr(0));

        Value vDimSize = rewriter.create<Torch::AtenSizeIntOp>(
            loc, rewriter.getType<Torch::IntType>(), self, dimValue);

        Value addNumOutputs =
            rewriter.create<Torch::AtenAddIntOp>(loc, vDimSize, vNumOutputs);
        Value subOne =
            rewriter.create<Torch::AtenSubIntOp>(loc, addNumOutputs, one);
        Value splitSize =
            rewriter.create<Torch::AtenFloordivIntOp>(loc, subOne, vNumOutputs);

        llvm::SmallVector<Value> outputs;
        Value step = one;
        Value start = zero;

        for (int i = 0; i < numOutputs - 1; ++i) {
          Value end =
              rewriter.create<Torch::AtenAddIntOp>(loc, start, splitSize);
          Value slice = rewriter.create<Torch::AtenSliceTensorOp>(
              loc, result0Ty, self, dimValue, start, end, step);
          start = end;
          outputs.push_back(slice);
        }

        Value end = vDimSize;
        Value lastSlice = rewriter.create<Torch::AtenSliceTensorOp>(
            loc, resultNTy, self, dimValue, start, end, step);
        outputs.push_back(lastSlice);

        rewriter.replaceOp(binder.op, outputs);

        return success();
      });

  // split with variable parts
  // Arguments:
  // - input: the tensor to split
  // - split: the sizes of the splits to be produced
  // Attributes:
  // - axis: the axis along which to split the input
  // - num_outputs: the number of outputs to produce
  // Outputs:
  // - outputs: the produced outputs. Variadic with num_outputs elements.
  // Note: torch.aten gives a list of tensors, but ONNX gives a variadic list of
  // tensors
  //       so we need to unpack the list
  patterns.onOp(
      "Split", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Value self;
        Value split;
        int64_t axis;
        int64_t num_outputs;
        if (binder.tensorOperandAtIndex(self, 0) ||
            binder.tensorOperandAtIndex(split, 1))
          return rewriter.notifyMatchFailure(
              binder.op, "Not converting to AtenSplitWithSizesOp due to input "
                         "tensor mismatch");
        if (binder.s64IntegerAttr(axis, "axis", 0))
          return rewriter.notifyMatchFailure(binder.op,
                                             "Failed to get axis attribute");
        if (binder.s64IntegerAttr(num_outputs, "num_outputs", 0))
          return rewriter.notifyMatchFailure(
              binder.op, "Failed to get num_outputs attribute");

        auto result0Ty =
            cast<Torch::ValueTensorType>(binder.op->getResult(0).getType());
        auto selfTy =
            cast<Torch::ValueTensorType>(binder.op->getOperand(0).getType());

        int64_t dim = axis;
        if (dim < 0)
          dim += selfTy.getSizes().size();

        llvm::SmallVector<int64_t> intermediateShape(result0Ty.getSizes());
        for (auto result : binder.op->getResultTypes()) {
          int64_t d = cast<Torch::ValueTensorType>(result).getSizes()[dim];
          intermediateShape[dim] = d == intermediateShape[dim] ? d : -1;
        }

        Torch::PrimTolistOp splitToList = rewriter.create<Torch::PrimTolistOp>(
            binder.getLoc(),
            Torch::ListType::get(rewriter.getType<Torch::IntType>()), split);

        Value dimValue = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64), dim));

        // TODO: Attempting to use the shape expected by the ONNX mlir as ground
        // truth. For now just use dynamic shapes.
        auto resultOuterType =
            Torch::ListType::get(rewriter.getType<Torch::ValueTensorType>(
                /*std::optional<llvm::ArrayRef<int64_t>>=*/intermediateShape,
                result0Ty.getOptionalDtype()));
        Torch::AtenSplitWithSizesOp new_op =
            rewriter.create<Torch::AtenSplitWithSizesOp>(
                binder.getLoc(), resultOuterType, self,
                splitToList.getResult(0), dimValue);

        // the onnx op is variadic with multiple results, but AtenSplitWithSizes
        // outputs a list so we need to unpack the list
        rewriter.replaceOpWithNewOp<Torch::PrimListUnpackOp>(
            binder.op, binder.op->getResults().getType(), new_op.getResult());

        return success();
      });

  patterns.onOp("Tan", 7,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value operand;
                  if (binder.tensorOperand(operand) ||
                      binder.tensorResultType(resultType))
                    return failure();
                  rewriter.replaceOpWithNewOp<Torch::AtenTanOp>(
                      binder.op, resultType, operand);
                  return success();
                });

  patterns.onOp(
      "Transpose", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        auto loc = binder.getLoc();
        Torch::ValueTensorType resultType;
        Value operand;
        if (binder.tensorOperand(operand) ||
            binder.tensorResultType(resultType))
          return failure();
        auto operandType = cast<Torch::ValueTensorType>(operand.getType());
        TensorType tensorType = operandType.toBuiltinTensor();
        if (!tensorType || !tensorType.hasRank())
          return failure();

        // Default permutation is to reverse orders:
        int64_t rank = tensorType.getRank();
        llvm::SmallVector<int64_t> reverse(rank);
        for (int64_t i = 0; i < rank; ++i) {
          reverse[i] = rank - i - 1;
        }

        llvm::SmallVector<int64_t> permutations;
        if (failed(binder.s64IntegerArrayAttr(permutations, "perm", reverse)))
          return rewriter.notifyMatchFailure(binder.op,
                                             "Failed to obtain permutations");

        if (static_cast<int64_t>(permutations.size()) != rank)
          return rewriter.notifyMatchFailure(
              binder.op, "Permutation length does not match operand rank");

        llvm::SmallVector<int64_t> shape(tensorType.getShape());
        llvm::SmallVector<int64_t> current(rank);
        for (int64_t i = 0; i < rank; ++i) {
          current[i] = i;
        }

        for (auto &dim : permutations)
          dim = dim < 0 ? dim + rank : dim;

        // We need to override to the destination if known:
        if (resultType.hasSizes()) {
          for (int i = 0; i < rank; ++i) {
            shape[permutations[i]] = resultType.getSizes()[i];
          }
        }

        // Convert dynamic shape dimension:
        for (unsigned i = 0; i < shape.size(); i++) {
          if (shape[i] == ShapedType::kDynamic)
            shape[i] = Torch::kUnknownSize;
        }

        for (int64_t i = 0; i < rank; ++i) {
          if (current[i] == permutations[i])
            continue;

          int64_t target = i + 1;
          for (; target < rank; ++target) {
            if (current[target] == permutations[i])
              break;
          }

          std::swap(shape[i], shape[target]);
          std::swap(current[i], current[target]);

          Value dim0 = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(),
              rewriter.getIntegerAttr(rewriter.getIntegerType(64), i));

          Value dim1 = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(),
              rewriter.getIntegerAttr(rewriter.getIntegerType(64), target));

          operand = rewriter.create<Torch::AtenTransposeIntOp>(
              loc,
              Torch::ValueTensorType::get(tensorType.getContext(), shape,
                                          operandType.getOptionalDtype()),
              operand, dim0, dim1);
        }

        rewriter.replaceOp(binder.op, operand);
        return success();
      });
  patterns.onOp(
      "Slice", 13, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultTorchType;
        Value operand, starts, ends;
        // Handle if axes are not provided

        if (binder.tensorOperandAtIndex(operand, 0) ||
            binder.tensorOperandAtIndex(starts, 1) ||
            binder.tensorOperandAtIndex(ends, 2) ||
            binder.tensorResultType(resultTorchType)) {
          return failure();
        }

        auto context = rewriter.getContext();
        auto operandTorchTy = cast<Torch::ValueTensorType>(operand.getType());
        auto operandTy =
            dyn_cast<RankedTensorType>(operandTorchTy.toBuiltinTensor());

        if (!operandTy)
          return rewriter.notifyMatchFailure(
              binder.op,
              "Expected tensor operator argument to be a ranked tensor type");

        auto startsTorchTy = cast<Torch::ValueTensorType>(starts.getType());
        auto startsTy =
            dyn_cast<RankedTensorType>(startsTorchTy.toBuiltinTensor());
        int startSize = startsTy.getDimSize(0);

        auto endsTorchTy = cast<Torch::ValueTensorType>(ends.getType());
        auto endsTy = dyn_cast<RankedTensorType>(endsTorchTy.toBuiltinTensor());
        int endSize = endsTy.getDimSize(0);
        auto resultTy =
            dyn_cast<RankedTensorType>(resultTorchType.toBuiltinTensor());
        if (!resultTy)
          return rewriter.notifyMatchFailure(
              binder.op, "Expected result type to be a ranked tensor type");

        Location loc = binder.getLoc();

        // Binding `axes` from its arguments or through a default value
        Value axes;
        if (binder.getNumOperands() >= 4) {
          if (binder.tensorOperandAtIndex(axes, 3)) {
            return failure();
          }
        }

        // Binding `steps` from its arguments or through a default value
        Value steps;
        if (binder.getNumOperands() >= 5) {
          if (binder.tensorOperandAtIndex(steps, 4)) {
            return failure();
          }
        } else {
          // The default `steps` value is a 1d tensor filled with ones with a
          // size equal to the size of `starts` and `ends`.
          Value none = rewriter.create<Torch::ConstantNoneOp>(loc);
          Value sizeStepInput = rewriter.create<Torch::ConstantIntOp>(
              loc, rewriter.getType<Torch::IntType>(),
              rewriter.getIntegerAttr(rewriter.getIntegerType(64), startSize));
          Value sizeStepsInput = rewriter.create<Torch::PrimListConstructOp>(
              loc,
              Torch::ListType::get(
                  Torch::IntType::get(binder.op->getContext())),
              sizeStepInput);
          steps = rewriter.create<Torch::AtenOnesOp>(
              loc, startsTorchTy, sizeStepsInput, none, none, none, none);
        }

        if (!(endsTy.getRank() == 1 && startsTy.getRank() == 1 &&
              startSize == endSize))
          return rewriter.notifyMatchFailure(
              binder.op, "Expected the rank of starts and ends tensors to be 1 "
                         "and their dimensions to match");

        if (axes) {
          auto axesTorchTy = cast<Torch::ValueTensorType>(axes.getType());
          auto axesTy =
              dyn_cast<RankedTensorType>(axesTorchTy.toBuiltinTensor());
          int64_t numAxes = axesTy.getDimSize(0);

          if (!(axesTy && numAxes == endSize))
            return rewriter.notifyMatchFailure(
                binder.op, "Axes should be the same size of starts and ends");
        }

        auto stepsTy = dyn_cast<RankedTensorType>(
            cast<Torch::ValueTensorType>(steps.getType()).toBuiltinTensor());

        if (!(stepsTy && stepsTy.getDimSize(0) == endsTy.getDimSize(0)))
          return rewriter.notifyMatchFailure(
              binder.op, "Steps should be the same size of starts and ends");

        Value zero = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));

        auto select = [&](Value v, Value k) -> Value {
          auto ty = cast<Torch::ValueTensorType>(v.getType());
          auto sel = rewriter.create<Torch::AtenIndexSelectOp>(
              loc,
              Torch::ValueTensorType::get(ty.getContext(), ArrayRef<int64_t>{1},
                                          ty.getOptionalDtype()),
              v, zero, k);
          Value item = rewriter.create<Torch::AtenItemOp>(
              loc, rewriter.getType<Torch::IntType>(), sel);
          return item;
        };

        llvm::SmallVector<int64_t> intermediateShape(operandTy.getShape());
        for (int i = 0, s = operandTy.getRank(); i < s; ++i) {
          if (operandTy.getDimSize(i) != resultTy.getDimSize(i))
            intermediateShape[i] = -1;
          if (intermediateShape[i] == ShapedType::kDynamic)
            intermediateShape[i] = Torch::kUnknownSize;
        }
        auto intermediateType = Torch::ValueTensorType::get(
            context, intermediateShape, resultTorchType.getOptionalDtype());
        for (int i = 0; i < endSize; ++i) {

          Value k = rewriter.create<Torch::ConstantIntOp>(
              loc, rewriter.getType<Torch::IntType>(),
              rewriter.getIntegerAttr(rewriter.getIntegerType(64), i));
          Value kTensor = rewriter.create<Torch::PrimNumToTensorScalarOp>(
              loc,
              Torch::ValueTensorType::get(
                  context, ArrayRef<int64_t>{1},
                  rewriter.getIntegerType(64, /*signed*/ 1)),
              k);

          Value start = select(starts, kTensor);
          Value end = select(ends, kTensor);
          Value axis = axes ? select(axes, kTensor) : k;
          Value step = select(steps, kTensor);

          auto sliceType = intermediateType;
          sliceType = i == (endSize - 1) ? resultTorchType : sliceType;
          operand = rewriter.create<Torch::AtenSliceTensorOp>(
              loc, sliceType, operand, axis, start, end, step);
        }

        rewriter.replaceOp(binder.op, operand);
        return success();
      });
  patterns.onOp(
      "Reshape", 5, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value data;
        Value shape;
        int64_t allowzero;
        if (binder.tensorOperands(data, shape) ||
            binder.tensorResultType(resultType) ||
            binder.s64IntegerAttr(allowzero, "allowzero", 0))
          return failure();

        // If the result shape is static then we can create a result shape list
        // directly using the result shape values (integers).
        if (resultType.hasSizes()) {
          bool hasStaticShape = resultType.areAllSizesKnown();
          ArrayRef<int64_t> resultShapeInt = resultType.getSizes();
          if (hasStaticShape) {
            SmallVector<Value> resultShape;
            for (int64_t dim : resultShapeInt) {
              resultShape.push_back(rewriter.create<Torch::ConstantIntOp>(
                  binder.getLoc(), rewriter.getI64IntegerAttr(dim)));
            }
            Value resultShapeList = rewriter.create<Torch::PrimListConstructOp>(
                binder.getLoc(),
                Torch::ListType::get(
                    Torch::IntType::get(binder.op->getContext())),
                resultShape);
            rewriter.replaceOpWithNewOp<Torch::AtenReshapeOp>(
                binder.op, resultType, data, resultShapeList);
            return success();
          }
        }

        Torch::BaseTensorType shapeType =
            cast<Torch::BaseTensorType>(shape.getType());
        SmallVector<Value> dimList;
        SmallVector<int64_t> selectSizes;
        selectSizes.push_back(1);
        Type selectResultType = shapeType.getWithSizesAndDtype(
            llvm::ArrayRef(selectSizes), shapeType.getOptionalDtype());
        auto shapeSizes =
            dyn_cast<Torch::ValueTensorType>(shape.getType()).getSizes();
        auto dataSizes =
            dyn_cast<Torch::ValueTensorType>(data.getType()).getSizes();
        Value zero = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));
        if (allowzero == 0) {
          // convert shape (tensor) into torch int list while dealing with zero
          // vals
          for (int i = 0; i < shapeSizes[0]; i++) {
            // Go through the shape list and get each dim in the list
            Value selectIndex = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(),
                rewriter.getIntegerAttr(rewriter.getIntegerType(64), i));
            Value extract = rewriter.create<Torch::AtenSelectIntOp>(
                binder.getLoc(), selectResultType, shape, zero, selectIndex);
            Value dim = rewriter.create<Torch::AtenItemOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(), extract);
            // deal with zero axis values: replace with original dim value in
            // input
            Value isZero =
                rewriter.create<Torch::AtenEqIntOp>(binder.getLoc(), dim, zero);
            isZero =
                rewriter.create<Torch::AtenIntBoolOp>(binder.getLoc(), isZero);

            int64_t dataRank = dataSizes.size();
            if (i < dataRank) {
              auto torchIntTy = rewriter.getType<Torch::IntType>();
              auto int64Ty = rewriter.getIntegerType(64, true);
              auto dimTy = rewriter.getType<Torch::ValueTensorType>(
                  ArrayRef<int64_t>(), int64Ty);
              auto boolTy = rewriter.getType<Torch::ValueTensorType>(
                  ArrayRef<int64_t>(), rewriter.getI1Type());
              Value iv = rewriter.create<Torch::ConstantIntOp>(
                  binder.getLoc(), rewriter.getI64IntegerAttr(i));
              Value inDim = rewriter.create<Torch::AtenSizeIntOp>(
                  binder.getLoc(), torchIntTy, data, iv);
              isZero = rewriter.create<Torch::PrimNumToTensorScalarOp>(
                  binder.getLoc(), boolTy, isZero);
              inDim = rewriter.create<Torch::PrimNumToTensorScalarOp>(
                  binder.getLoc(), dimTy, inDim);
              dim = rewriter.create<Torch::PrimNumToTensorScalarOp>(
                  binder.getLoc(), dimTy, dim);
              Value finalDim = rewriter.create<Torch::AtenWhereSelfOp>(
                  binder.getLoc(), dimTy, isZero, inDim, dim);
              dim = rewriter.create<Torch::AtenItemOp>(
                  binder.getLoc(), rewriter.getType<Torch::IntType>(),
                  finalDim);
            }
            dimList.push_back(dim);
          }
          Value dimValueList = rewriter.create<Torch::PrimListConstructOp>(
              binder.getLoc(),
              Torch::ListType::get(
                  Torch::IntType::get(binder.op->getContext())),
              dimList);
          rewriter.replaceOpWithNewOp<Torch::AtenReshapeOp>(
              binder.op, resultType, data, dimValueList);
          return success();
        }
        // convert axes (tensor) into torch int list
        for (int i = 0; i < shapeSizes[0]; i++) {
          // Go through the axes list and get each dim in the list
          Value selectIndex = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(),
              rewriter.getIntegerAttr(rewriter.getIntegerType(64), i));
          Value extract = rewriter.create<Torch::AtenSelectIntOp>(
              binder.getLoc(), selectResultType, shape, zero, selectIndex);
          Value dim = rewriter.create<Torch::AtenItemOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(), extract);
          dimList.push_back(dim);
        }
        Value dimValueList = rewriter.create<Torch::PrimListConstructOp>(
            binder.getLoc(),
            Torch::ListType::get(Torch::IntType::get(binder.op->getContext())),
            dimList);
        rewriter.replaceOpWithNewOp<Torch::AtenReshapeOp>(binder.op, resultType,
                                                          data, dimValueList);
        return success();
      });
  patterns.onOp(
      "ReduceProd", 13,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        // ReduceProd allows us to pass a list of dims but AtenProdDimIn only
        // allow one dim as input.
        Torch::ValueTensorType resultType;
        Value data;
        Value axes;
        int64_t keepDims;
        int64_t noop_with_empty_axes;
        if (binder.tensorOperandAtIndex(data, 0) ||
            binder.tensorResultType(resultType) ||
            binder.s64IntegerAttr(keepDims, "keepdims", 1) ||
            binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes",
                                  0))
          return failure();

        auto dataTy = cast<Torch::BaseTensorType>(data.getType());
        Torch::IntType torchIntTy = rewriter.getType<Torch::IntType>();

        if (!resultType.hasSizes() || !resultType.areAllSizesKnown() ||
            !dataTy.areAllSizesKnown())
          return rewriter.notifyMatchFailure(
              binder.op,
              "Expected the input and result type to have known sizes");

        int64_t rank = dataTy.getSizes().size();
        SmallVector<Value> axesList;
        Value zero = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(0));

        // Previous version of the operation had the axes as an attribute:
        llvm::SmallVector<int64_t> axesAttr;
        if (!binder.s64IntegerArrayAttr(axesAttr, "axes", {})) {
          for (int i = 0, s = axesAttr.size(); i < s; ++i) {
            axesList.push_back(rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), torchIntTy,
                rewriter.getI64IntegerAttr(axesAttr[i])));
          }
        }

        // Handle cases that axes are explicitly specified.
        // Extract the axes values from the axes operand.
        // This really shouldn't happen but it helps pass weird tests.
        // TODO: Derive the chosen axes from the data type and final result type
        // instead of using the dynamic axes at operand[1].
        if (!binder.tensorOperandAtIndex(axes, 1)) {
          Torch::BaseTensorType axesType =
              cast<Torch::BaseTensorType>(axes.getType());
          auto sizes = axesType.getSizes();
          for (int i = 0; i < sizes[0]; i++) {
            Value selectIndex = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getI64IntegerAttr(i));
            Value extract = rewriter.create<Torch::AtenSelectIntOp>(
                binder.getLoc(),
                axesType.getWithSizesAndDtype(llvm::SmallVector<int64_t>{1},
                                              axesType.getOptionalDtype()),
                axes, zero, selectIndex);
            Value dim = rewriter.create<Torch::AtenItemOp>(binder.getLoc(),
                                                           torchIntTy, extract);
            axesList.push_back(dim);
          }
        }

        // Handle the noop case:
        // When axes is empty and noop_with_empty_axes is set to true, input
        // tensor will not be reduced, and the output tensor would be
        // equivalent to input tensor.
        if (axesList.empty() && noop_with_empty_axes) {
          rewriter.replaceOp(binder.op, data);
          return success();
        }

        // Handle case when no axes arg is passed but not a noop:
        // Manually set positive axis to all dims.
        if (axesList.empty()) {
          for (int i = 0; i < rank; i++) {
            Value dimValue = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getI64IntegerAttr(i));
            axesList.push_back(dimValue);
          }
        }

        // Handle negative axis:
        Value rankVal = rewriter.create<Torch::AtenDimOp>(binder.getLoc(),
                                                          torchIntTy, data);
        for (Value &axes : axesList) {
          Value isNegative =
              rewriter.create<Torch::AtenLtIntOp>(binder.getLoc(), axes, zero);
          isNegative = rewriter.create<Torch::AtenIntBoolOp>(binder.getLoc(),
                                                             isNegative);
          Value finalOffset = rewriter.create<Torch::AtenMulIntOp>(
              binder.getLoc(), isNegative, rankVal);
          axes = rewriter.create<Torch::AtenAddIntOp>(binder.getLoc(), axes,
                                                      finalOffset);
        }

        // Handle multiple axes case:
        // ReduceProd on each dim, always set keepDimsBool == True to avoid
        // segfault.
        Value trueVal =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), true);
        Value noneVal = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        SmallVector<int64_t> intermediateShape(rank, Torch::kUnknownSize);
        Value dataReduceProd = data;
        for (int i = 0, numAxes = axesList.size(); i < numAxes; i++) {
          auto axis = axesList[i];
          if (keepDims && i == numAxes - 1) {
            dataReduceProd = rewriter.create<Torch::AtenProdDimIntOp>(
                binder.getLoc(),
                dataTy.getWithSizesAndDtype(resultType.getSizes(),
                                            dataTy.getOptionalDtype()),
                dataReduceProd, axis, trueVal, noneVal);
            rewriter.replaceOp(binder.op, dataReduceProd);
            return success();
          }
          Type resultTyReduceProd = dataTy.getWithSizesAndDtype(
              ArrayRef(intermediateShape), dataTy.getOptionalDtype());
          dataReduceProd = rewriter.create<Torch::AtenProdDimIntOp>(
              binder.getLoc(), resultTyReduceProd, dataReduceProd, axis,
              trueVal, noneVal);
        }

        // Derived the final shape of the tensor after prod loop of each axis.
        SmallVector<int64_t> dataReduceProdSize;
        auto dataSize = dataTy.getSizes();
        auto resultTypeSizes = resultType.getSizes();
        if (!keepDims) {
          // Handle the keepDimsBool == False case:
          // 2 point algorithm to derive the static shape after prod loop.
          int j = 0;
          for (int i = 0; i < rank; i++) {
            if (resultTypeSizes.size() && dataSize[i] == resultTypeSizes[j]) {
              dataReduceProdSize.push_back(resultTypeSizes[i]);
              j++;
              continue;
            }
            dataReduceProdSize.push_back(1);
          }
        }

        // Handle the keepDimsBool == False case:
        // Reshape the prod loop result to the final result shape.
        SmallVector<Value> dataReduceProdShape;
        for (auto dim : dataReduceProdSize)
          dataReduceProdShape.push_back(rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getI64IntegerAttr(dim)));
        Value dataReduceProdShapeList =
            rewriter.create<Torch::PrimListConstructOp>(
                binder.getLoc(),
                rewriter.getType<Torch::ListType>(
                    rewriter.getType<Torch::IntType>()),
                dataReduceProdShape);
        rewriter.replaceOpWithNewOp<Torch::AtenReshapeOp>(
            binder.op, resultType, dataReduceProd, dataReduceProdShapeList);
        return success();
      });
  patterns.onOp(
      "Range", 11, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        // ONNX.Range(start, limit, delta) -- limit is exclusive

        Torch::ValueTensorType resultType;
        Value start, limit, delta;
        auto loc = binder.getLoc();
        Value none = rewriter.create<Torch::ConstantNoneOp>(loc);
        if (binder.tensorOperandAtIndex(start, 0) ||
            binder.tensorOperandAtIndex(limit, 1) ||
            binder.tensorOperandAtIndex(delta, 2) ||
            binder.tensorResultType(resultType))
          return failure();

        // Convert a 0-dimensional/Scalar Tensor ([]) to Scalar Torch Numeric
        // Value torch.tensor(1.1) equivalent in ONNX to 1.1 as an example
        // type of start, limit, delta can be one of: double, float, int16,
        // int32, int64 Assuming start, limit and delta to be same type (could
        // they be different?)
        Torch::BaseTensorType startTensorType =
            cast<Torch::BaseTensorType>(start.getType());
        bool isFloatDType = startTensorType.getDtype().isF64() ||
                            startTensorType.getDtype().isF32();
        bool isIntDType = startTensorType.getDtype().isInteger(16) ||
                          startTensorType.getDtype().isInteger(32) ||
                          startTensorType.getDtype().isInteger(64);
        if (!isFloatDType && !isIntDType) {
          return rewriter.notifyMatchFailure(
              binder.op, "Expected the start, limit, delta to be one of "
                         "double, float, int16, int32, int64");
        }
        Value scalarStart, scalarLimit, scalarDelta;
        if (isFloatDType) {
          scalarStart = getItemOp<Torch::FloatType>(binder, rewriter, start);
          scalarLimit = getItemOp<Torch::FloatType>(binder, rewriter, limit);
          scalarDelta = getItemOp<Torch::FloatType>(binder, rewriter, delta);
        } else {
          scalarStart = getItemOp<Torch::IntType>(binder, rewriter, start);
          scalarLimit = getItemOp<Torch::IntType>(binder, rewriter, limit);
          scalarDelta = getItemOp<Torch::IntType>(binder, rewriter, delta);
        }
        rewriter.replaceOpWithNewOp<Torch::AtenArangeStartStepOp>(
            binder.op, resultType, scalarStart, scalarLimit, scalarDelta, none,
            none, none, none);
        return success();
      });
  patterns.onOp(
      "Size", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value operand;
        if (binder.tensorOperand(operand) ||
            binder.tensorResultType(resultType))
          return failure();

        auto loc = binder.getLoc();
        auto &op = binder.op;
        auto operandTy = cast<Torch::BaseTensorType>(operand.getType());

        if (!operandTy.hasSizes())
          return rewriter.notifyMatchFailure(op, "input rank unknown");

        llvm::SmallVector<Value> dims;
        int64_t rank = operandTy.getSizes().size();
        for (int i = 0; i < rank; ++i) {
          auto iv = rewriter.create<Torch::ConstantIntOp>(
              loc, rewriter.getI64IntegerAttr(i));
          Value dim = rewriter.create<Torch::AtenSizeIntOp>(
              loc, rewriter.getType<Torch::IntType>(), operand, iv);
          dims.push_back(dim);
        }

        Value cstFalse = rewriter.create<Torch::ConstantBoolOp>(loc, false);
        Value none = rewriter.create<Torch::ConstantNoneOp>(loc);

        if (dims.empty()) {
          Value one = rewriter.create<Torch::ConstantIntOp>(
              loc, rewriter.getI64IntegerAttr(1));
          rewriter.replaceOpWithNewOp<Torch::AtenTensorIntOp>(
              op, resultType, one, none, none, cstFalse);
          return success();
        }

        Value prod = dims[0];
        for (int i = 1, s = dims.size(); i < s; ++i)
          prod = rewriter.create<Torch::AtenMulIntOp>(loc, prod, dims[i]);

        rewriter.replaceOpWithNewOp<Torch::AtenTensorIntOp>(
            op, resultType, prod, none, none, cstFalse);
        return success();
      });
  patterns.onOp(
      "Tile", 6, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value operand;
        Value repeatDims;
        if (binder.tensorOperands(operand, repeatDims) ||
            binder.tensorResultType(resultType))
          return failure();

        // convert repeatDims tensor to list of ints
        auto repeatDimsSizes =
            dyn_cast<Torch::ValueTensorType>(repeatDims.getType()).getSizes();
        SmallVector<Value> dimList;
        SmallVector<int64_t> selectSizes;
        selectSizes.push_back(1);
        Torch::BaseTensorType shapeType =
            cast<Torch::BaseTensorType>(repeatDims.getType());
        Type selectResultType = shapeType.getWithSizesAndDtype(
            llvm::ArrayRef(selectSizes), shapeType.getOptionalDtype());
        Value zero = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));
        for (int i = 0; i < repeatDimsSizes[0]; i++) {
          Value selectIndex = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(),
              rewriter.getIntegerAttr(rewriter.getIntegerType(64), i));
          Value extract = rewriter.create<Torch::AtenSelectIntOp>(
              binder.getLoc(), selectResultType, repeatDims, zero, selectIndex);
          Value dim = rewriter.create<Torch::AtenItemOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(), extract);
          dimList.push_back(dim);
        }
        Value dimValueList = rewriter.create<Torch::PrimListConstructOp>(
            binder.getLoc(),
            Torch::ListType::get(Torch::IntType::get(binder.op->getContext())),
            dimList);

        rewriter.replaceOpWithNewOp<Torch::AtenTileOp>(binder.op, resultType,
                                                       operand, dimValueList);
        return success();
      });
  patterns.onOp(
      "TopK", 11, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType Values_type, Indices_type;
        Value input, kValue;
        int64_t axis;
        bool largest, sorted;
        if (binder.tensorOperandAtIndex(input, 0) ||
            binder.tensorOperandAtIndex(kValue, 1) ||
            binder.s64IntegerAttr(axis, "axis", -1) ||
            binder.s64BoolAttr(largest, "largest", true) ||
            binder.s64BoolAttr(sorted, "sorted", true) ||
            binder.tensorResultTypeAtIndex(Values_type, 0) ||
            binder.tensorResultTypeAtIndex(Indices_type, 1))
          return failure();
        std::optional<unsigned> maybeRank = Torch::getTensorRank(input);
        if (!maybeRank)
          return rewriter.notifyMatchFailure(binder.op,
                                             "Unimplemented: unranked tensor");
        unsigned rank = *maybeRank;
        axis = Torch::toPositiveDim(axis, rank);
        Value cstAxis = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(axis));
        Value cstLargest =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), largest);
        Value cstSorted =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), sorted);
        Value kValueInt = rewriter.create<Torch::AtenItemOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(), kValue);
        rewriter.replaceOpWithNewOp<Torch::AtenTopkOp>(
            binder.op, Values_type, Indices_type, input, kValueInt, cstAxis,
            cstLargest, cstSorted);
        return success();
      });
  patterns.onOp("Sign", 9,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value operand;
                  if (binder.tensorOperand(operand) ||
                      binder.tensorResultType(resultType))
                    return failure();

                  rewriter.replaceOpWithNewOp<Torch::AtenSignOp>(
                      binder.op, resultType, operand);
                  return success();
                });
  patterns.onOp(
      "Softplus", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value input;
        if (binder.tensorOperand(input) ||
            binder.tensorResultType(resultType)) {
          return failure();
        }
        // out = ln(exp(x) + 1)
        Value exp = rewriter.create<Torch::AtenExpOp>(binder.getLoc(),
                                                      resultType, input);
        rewriter.replaceOpWithNewOp<Torch::AtenLog1pOp>(binder.op, resultType,
                                                        exp);
        return success();
      });
  patterns.onOp("Softsign", 22,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value input;
                  if (binder.tensorOperand(input) ||
                      binder.tensorResultType(resultType)) {
                    return failure();
                  }

                  Value absX = rewriter.create<Torch::AtenAbsOp>(
                      binder.getLoc(), resultType, input);

                  Value constOne = rewriter.create<Torch::ConstantIntOp>(
                      binder.getLoc(), rewriter.getI64IntegerAttr(1));

                  Value absXPlusOne = rewriter.create<Torch::AtenAddScalarOp>(
                      binder.getLoc(), resultType, absX, constOne, constOne);

                  rewriter.replaceOpWithNewOp<Torch::AtenDivTensorOp>(
                      binder.op, resultType, input, absXPlusOne);
                  return success();
                });
  patterns.onOp(
      "Trilu", 14, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value input;
        int64_t upper;
        if (binder.tensorOperandAtIndex(input, 0) ||
            binder.s64IntegerAttr(upper, "upper", 1) ||
            binder.tensorResultType(resultType)) {
          return failure();
        }

        Value diagonal;
        if (binder.tensorOperandAtIndex(diagonal, 1)) {
          diagonal = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getI64IntegerAttr(0));
        } else {
          diagonal = rewriter.create<Torch::AtenItemOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(), diagonal);
        }

        if (upper) {
          rewriter.replaceOpWithNewOp<Torch::AtenTriuOp>(binder.op, resultType,
                                                         input, diagonal);
          return success();
        }
        rewriter.replaceOpWithNewOp<Torch::AtenTrilOp>(binder.op, resultType,
                                                       input, diagonal);
        return success();
      });
  patterns.onOp("ThresholdedRelu", 10,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value input;
                  float alpha;
                  if (binder.tensorOperand(input) ||
                      binder.f32FloatAttr(alpha, "alpha", 1.0) ||
                      binder.tensorResultType(resultType)) {
                    return failure();
                  }
                  Value cstAlpha = rewriter.create<Torch::ConstantFloatOp>(
                      binder.getLoc(), rewriter.getType<Torch::FloatType>(),
                      rewriter.getFloatAttr(rewriter.getF64Type(), alpha));
                  Value value = rewriter.create<Torch::ConstantFloatOp>(
                      binder.getLoc(), rewriter.getType<Torch::FloatType>(),
                      rewriter.getFloatAttr(rewriter.getF64Type(), 0.0));
                  rewriter.replaceOpWithNewOp<Torch::AtenThresholdOp>(
                      binder.op, resultType, input, cstAlpha, value);
                  return success();
                });
  patterns.onOp(
      "RandomNormal", 1,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        SmallString<64> name("torch.onnx.seed");
        auto seedAttr = binder.op->getAttr(name);
        if (seedAttr)
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented: support not present for seed attribute");

        Torch::ValueTensorType resultType;
        int64_t dtypeIntOnnx;
        float mean, scale;
        SmallVector<int64_t> shape;
        if (binder.s64IntegerAttr(dtypeIntOnnx, "dtype", 1) ||
            binder.f32FloatAttr(mean, "mean", 0.0) ||
            binder.f32FloatAttr(scale, "scale", 1.0) ||
            binder.s64IntegerArrayAttr(shape, "shape", {}) ||
            binder.tensorResultType(resultType)) {
          return failure();
        }

        std::optional<int64_t> dtypeIntTorch =
            onnxDtypeIntToTorchDtypeInt(dtypeIntOnnx);
        if (!dtypeIntTorch.has_value()) {
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented support for the given dtype conversion");
        }
        Value constDtype = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(dtypeIntTorch.value()));

        Value shapeList = createConstantIntList(binder, rewriter, shape);
        Value cstNone = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());

        Value self = rewriter.create<Torch::AtenEmptyMemoryFormatOp>(
            binder.op->getLoc(), resultType, shapeList,
            /*dtype=*/constDtype,
            /*layout=*/cstNone,
            /*device=*/cstNone, /*pinMemory=*/cstNone,
            /*memoryFormat=*/cstNone);

        Value cstMean = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), mean));
        Value cstStd = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), scale));

        rewriter.replaceOpWithNewOp<Torch::AtenNormalFunctionalOp>(
            binder.op, resultType, self, cstMean, cstStd,
            /*generator=*/cstNone);
        return success();
      });
  patterns.onOp(
      "RandomNormalLike", 1,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        SmallString<64> name("torch.onnx.seed");
        auto seedAttr = binder.op->getAttr(name);
        if (seedAttr)
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented: support not present for seed attribute");

        Torch::ValueTensorType resultType;
        int64_t dtypeIntOnnx;
        float mean, scale;
        SmallVector<int64_t> shape;
        Value input;
        if (binder.tensorOperand(input) ||
            binder.s64IntegerAttr(dtypeIntOnnx, "dtype", 1) ||
            binder.f32FloatAttr(mean, "mean", 0.0) ||
            binder.f32FloatAttr(scale, "scale", 1.0) ||
            binder.tensorResultType(resultType)) {
          return failure();
        }

        std::optional<int64_t> dtypeIntTorch =
            onnxDtypeIntToTorchDtypeInt(dtypeIntOnnx);
        if (!dtypeIntTorch.has_value()) {
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented support for the given dtype conversion");
        }
        Value constDtype = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(dtypeIntTorch.value()));

        Value cstNone = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        Value cstFalse =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), false);
        input = rewriter.create<Torch::AtenToDtypeOp>(
            binder.op->getLoc(), resultType, input, constDtype,
            /*non_blocking=*/cstFalse, /*copy=*/cstFalse,
            /*memory_format=*/cstNone);

        Value cstMean = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), mean));
        Value cstStd = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), scale));

        rewriter.replaceOpWithNewOp<Torch::AtenNormalFunctionalOp>(
            binder.op, resultType, input, cstMean, cstStd,
            /*generator=*/cstNone);
        return success();
      });
  patterns.onOp(
      "RandomUniform", 1,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        SmallString<64> name("torch.onnx.seed");
        auto seedAttr = binder.op->getAttr(name);
        if (seedAttr)
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented: support not present for seed attribute");

        Torch::ValueTensorType resultType;
        int64_t dtypeIntOnnx;
        float high, low;
        SmallVector<int64_t> shape;
        if (binder.s64IntegerAttr(dtypeIntOnnx, "dtype", 1) ||
            binder.f32FloatAttr(high, "high", 1.0) ||
            binder.f32FloatAttr(low, "low", 0.0) ||
            binder.s64IntegerArrayAttr(shape, "shape", {}) ||
            binder.tensorResultType(resultType)) {
          return failure();
        }

        std::optional<int64_t> dtypeIntTorch =
            onnxDtypeIntToTorchDtypeInt(dtypeIntOnnx);
        if (!dtypeIntTorch.has_value()) {
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented support for the given dtype conversion");
        }
        Value constDtype = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(dtypeIntTorch.value()));

        Value shapeList = createConstantIntList(binder, rewriter, shape);
        Value cstNone = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());

        Value self = rewriter.create<Torch::AtenEmptyMemoryFormatOp>(
            binder.op->getLoc(), resultType, shapeList,
            /*dtype=*/constDtype,
            /*layout=*/cstNone,
            /*device=*/cstNone, /*pinMemory=*/cstNone,
            /*memoryFormat=*/cstNone);

        Value cstHigh = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), high));
        Value cstLow = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), low));

        rewriter.replaceOpWithNewOp<Torch::AtenUniformOp>(
            binder.op, resultType, self, cstLow, cstHigh,
            /*generator=*/cstNone);
        return success();
      });
  patterns.onOp(
      "RandomUniformLike", 1,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        SmallString<64> name("torch.onnx.seed");
        auto seedAttr = binder.op->getAttr(name);
        if (seedAttr)
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented: support not present for seed attribute");

        Torch::ValueTensorType resultType;
        int64_t dtypeIntOnnx;
        float high, low;
        SmallVector<int64_t> shape;
        Value input;
        if (binder.tensorOperand(input) ||
            binder.s64IntegerAttr(dtypeIntOnnx, "dtype", 1) ||
            binder.f32FloatAttr(high, "high", 1.0) ||
            binder.f32FloatAttr(low, "low", 0.0) ||
            binder.tensorResultType(resultType)) {
          return failure();
        }

        std::optional<int64_t> dtypeIntTorch =
            onnxDtypeIntToTorchDtypeInt(dtypeIntOnnx);
        if (!dtypeIntTorch.has_value()) {
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented support for the given dtype conversion");
        }
        Value constDtype = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(dtypeIntTorch.value()));

        Value cstNone = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        Value cstFalse =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), false);
        input = rewriter.create<Torch::AtenToDtypeOp>(
            binder.op->getLoc(), resultType, input, constDtype,
            /*non_blocking=*/cstFalse, /*copy=*/cstFalse,
            /*memory_format=*/cstNone);

        Value cstHigh = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), high));
        Value cstLow = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), low));

        rewriter.replaceOpWithNewOp<Torch::AtenUniformOp>(
            binder.op, resultType, input, cstLow, cstHigh,
            /*generator=*/cstNone);
        return success();
      });
  patterns.onOp(
      "SoftmaxCrossEntropyLoss", 12,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        int64_t ignoreIndex;
        std::string reduction;
        SmallVector<int64_t> shape;
        Value scores, labels, weight;
        if (binder.tensorOperandAtIndex(scores, 0) ||
            binder.tensorOperandAtIndex(labels, 1) ||
            binder.s64IntegerAttr(ignoreIndex, "ignore_index", -100) ||
            binder.customOpNameStringAttr(reduction, "reduction", "mean") ||
            binder.tensorResultTypeAtIndex(resultType, 0)) {
          return failure();
        }

        if (binder.tensorOperandAtIndex(weight, 2))
          weight = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());

        Value cstIgnoreIndex = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(ignoreIndex));

        int64_t reductionInt = reduction == "none"   ? 0
                               : reduction == "mean" ? 1
                                                     : 2;
        Value cstReductionInt = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(reductionInt));

        // The default PyTorch value for label smoothing is "0.0".
        // Refer:
        // https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
        Value cstLabelSmoothing = rewriter.create<Torch::ConstantFloatOp>(
            binder.getLoc(), rewriter.getType<Torch::FloatType>(),
            rewriter.getFloatAttr(rewriter.getF64Type(), 0.0));

        Value loss = rewriter.create<Torch::AtenCrossEntropyLossOp>(
            binder.getLoc(), resultType, scores, labels, weight,
            cstReductionInt, cstIgnoreIndex, cstLabelSmoothing);

        if (binder.op->getNumResults() == 1) {
          rewriter.replaceOp(binder.op, loss);
          return success();
        }

        Torch::ValueTensorType resultTypeLogProb;
        if (binder.tensorResultTypeAtIndex(resultTypeLogProb, 1))
          return failure();

        Value dim = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(1));
        Value cstNone = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        Value logProb = rewriter.create<Torch::AtenLogSoftmaxIntOp>(
            binder.getLoc(), resultTypeLogProb, scores, dim, /*dtype=*/cstNone);

        rewriter.replaceOp(binder.op, {loss, logProb});
        return success();
      });
  patterns.onOp(
      "Resize", 11, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        llvm::SmallVector<Value> operands;
        std::string mode, nearest_mode, coordTfMode;
        int64_t antialias, exclude_outside;
        float extrapolation_value;
        Value noneVal = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());

        if (auto attr = binder.op->getAttr("torch.onnx.axes")) {
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented: support not present for axes attribute");
        }
        if (auto attr =
                binder.op->getAttr("torch.onnx.keep_aspect_ratio_policy")) {
          return rewriter.notifyMatchFailure(
              binder.op, "unimplemented: support not present for "
                         "keep_aspect_ratio_policy attribute");
        }

        if (binder.tensorOperandsList(operands) ||
            binder.tensorResultType(resultType) ||
            binder.customOpNameStringAttr(mode, "mode", "nearest") ||
            binder.customOpNameStringAttr(
                coordTfMode, "coordinate_transformation_mode", "half_pixel") ||
            binder.s64IntegerAttr(antialias, "antialias", 0) ||
            binder.s64IntegerAttr(exclude_outside, "exclude_outside", 0) ||
            binder.f32FloatAttr(extrapolation_value, "extrapolation_value",
                                0.0) ||
            binder.customOpNameStringAttr(nearest_mode, "nearest_mode",
                                          "round_prefer_floor"))
          return failure();
        if (antialias != 0) {
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented: support not present for antialias attribute");
        }
        if (exclude_outside != 0) {
          return rewriter.notifyMatchFailure(
              binder.op, "unimplemented: support not present for "
                         "exclude_outside attribute");
        }
        if (extrapolation_value != 0.0) {
          return rewriter.notifyMatchFailure(
              binder.op, "unimplemented: support not present for "
                         "extrapolation_value attribute");
        }
        if (coordTfMode == "tf_crop_and_resize")
          return rewriter.notifyMatchFailure(
              binder.op, "unimplemented: coordinate transformation mode: "
                         "tf_crop_and_resize");

        if (mode == "nearest" && coordTfMode != "asymmetric" &&
            coordTfMode != "half_pixel") {
          return rewriter.notifyMatchFailure(
              binder.op, "unimplemented: support not present for coord tf mode "
                         "except asymmetric and half_pixel");
        }

        unsigned rank = dyn_cast<Torch::ValueTensorType>(operands[0].getType())
                            .getSizes()
                            .size();

        Value cstFalse =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), false);
        Value cstTrue =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), true);
        Value modeStrValue;

        Value scalesValueList = noneVal;
        Value sizesValueList = noneVal;
        Value alignCorners =
            coordTfMode == "align_corners" ? cstTrue : cstFalse;
        if (mode == "cubic") {
          return rewriter.notifyMatchFailure(binder.op,
                                             "unimplemented: bicubic mode");
        }
        // supported modes:
        // bilinear (half_pixel), bilinear with align_corners,
        // bilinear_pytorch_half_pixel, bilinear_asymmetric nearest
        // (asymmetric), nearest with align_corners, nearest_half_pixel,
        // nearest_pytorch_half_pixel
        if (mode == "linear") {
          std::string modeStr;
          switch (rank) {
          case 3:
            modeStr = "linear";
            break;
          case 4:
            modeStr = "bilinear";
            break;
          case 5:
            modeStr = "trilinear";
            break;
          default:
            return failure();
          }
          // Confusingly enough, the default coordTfMode for pytorch bilinear
          // mode is apparently half_pixel, NOT pytorch_half_pixel
          if (coordTfMode != "half_pixel" && coordTfMode != "align_corners")
            modeStr = (modeStr + "_") + coordTfMode;
          modeStrValue =
              rewriter.create<Torch::ConstantStrOp>(binder.getLoc(), modeStr);
        }
        if (mode == "nearest") {
          std::string modeStr = "nearest";
          // The default coordTfMode for pytorch with mode = nearest is
          // apparently asymmetric
          if (coordTfMode != "asymmetric" && coordTfMode != "align_corners")
            modeStr = (modeStr + "_") + coordTfMode;
          if (nearest_mode != "floor" && nearest_mode != "")
            modeStr = modeStr + "," + nearest_mode;
          modeStrValue =
              rewriter.create<Torch::ConstantStrOp>(binder.getLoc(), modeStr);
        }
        if (operands.size() < 4) {
          Value scaleOperand = operands[2];
          scalesValueList = getValueList(binder, rewriter, scaleOperand);
          sizesValueList = noneVal;
        } else {
          Value sizeOperand = operands[3];
          scalesValueList = noneVal;
          sizesValueList = getValueList(binder, rewriter, sizeOperand);
        }
        if (isa<Torch::NoneType>(scalesValueList.getType()) &&
            isa<Torch::NoneType>(sizesValueList.getType())) {
          return rewriter.notifyMatchFailure(binder.op, "unknown scaling mode");
        }
        rewriter
            .replaceOpWithNewOp<Torch::Aten__InterpolateSizeListScaleListOp>(
                binder.op, resultType, operands[0], sizesValueList,
                scalesValueList, modeStrValue,
                /* AnyTorchOptionalBoolType:$align_corners */ alignCorners,
                /* AnyTorchOptionalBoolType:$recompute_scale_factor */ noneVal,
                /*Torch_BoolType:$antialias*/ cstFalse);
        return success();
      });
  patterns.onOp(
      "RoiAlign", 16, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        // operands = input, rois, batch_indices
        SmallVector<Value> operands;
        std::string coordTfMode, mode;
        int64_t outHInt, outWInt, samplingRatioInt;
        float spatialScaleFloat;
        Torch::ValueTensorType resultType;
        if (binder.tensorOperands(operands, 3) ||
            binder.customOpNameStringAttr(
                coordTfMode, "coordinate_transformation_mode", "half_pixel") ||
            binder.customOpNameStringAttr(mode, "mode", "avg") ||
            binder.s64IntegerAttr(outHInt, "output_height", 1) ||
            binder.s64IntegerAttr(outWInt, "output_width", 1) ||
            binder.s64IntegerAttr(samplingRatioInt, "sampling_ratio", 0) ||
            binder.f32FloatAttr(spatialScaleFloat, "spatial_scale", 1.0f) ||
            binder.tensorResultType(resultType))
          return failure();
        Value input = operands[0];
        Value rois = operands[1];
        Value batchIndices = operands[2];

        // the torchvision roi_pool op does not support these features:
        if (mode == "max" &&
            (coordTfMode != "half_pixel" || samplingRatioInt != 0))
          return rewriter.notifyMatchFailure(
              binder.op, "unsupported: roi max pooling without default "
                         "coordTfMode and sampling_ratio");

        Location loc = binder.getLoc();
        // concatenate the batchIndices to the rois to get rois as a num_roisx5
        // tensor. The batchIndices tensor is an int64 tensor, and needs to be
        // converted to float before concatenation.
        auto roisType = dyn_cast<Torch::ValueTensorType>(rois.getType());
        if (!roisType || !roisType.hasSizes())
          return failure();
        Value cstDim = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(1));
        FailureOr<Value> unsqueezeIndices =
            Torch::unsqueezeTensor(rewriter, binder.op, batchIndices, cstDim);
        if (failed(unsqueezeIndices))
          return failure();
        batchIndices = unsqueezeIndices.value();
        auto batchIndicesType =
            cast<Torch::ValueTensorType>(batchIndices.getType());
        Value dTypeInt =
            Torch::getDtypeIntValueForType(rewriter, loc, roisType.getDtype());
        Value none = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        Value cstFalse =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), false);
        Value newBatchIndices = rewriter.create<Torch::AtenToDtypeOp>(
            loc,
            batchIndicesType.getWithSizesAndDtype(
                batchIndicesType.getOptionalSizes(),
                roisType.getOptionalDtype()),
            batchIndices, dTypeInt, cstFalse, cstFalse, none);
        SmallVector<int64_t> roiSizes(roisType.getSizes());
        roiSizes.back() = 5;
        auto catType = rewriter.getType<Torch::ValueTensorType>(
            roiSizes, roisType.getDtype());
        Type listElemType =
            roisType.getWithSizesAndDtype(/*optionalSizes=*/std::nullopt,
                                          /*optionalDtype=*/nullptr);
        Type listType = Torch::ListType::get(listElemType);
        Value tensorList = rewriter.create<Torch::PrimListConstructOp>(
            binder.op->getLoc(), listType, ValueRange{newBatchIndices, rois});
        Value newRois =
            rewriter.create<Torch::AtenCatOp>(loc, catType, tensorList, cstDim);

        // make constants from attributes
        Value cstSpatialScale = rewriter.create<Torch::ConstantFloatOp>(
            loc, rewriter.getF64FloatAttr(spatialScaleFloat));
        Value pooledHeight = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getI64IntegerAttr(outHInt));
        Value pooledWidth = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getI64IntegerAttr(outWInt));
        // this is for consistency with the default pytorch sampling ratio value
        samplingRatioInt = (samplingRatioInt == 0) ? -1 : samplingRatioInt;
        Value samplingRatio = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getI64IntegerAttr(samplingRatioInt));
        bool aligned = coordTfMode == "half_pixel";
        Value cstAligned = rewriter.create<Torch::ConstantBoolOp>(loc, aligned);

        if (mode == "avg") {
          rewriter.replaceOpWithNewOp<Torch::TorchvisionRoiAlignOp>(
              binder.op, resultType, input, newRois, cstSpatialScale,
              pooledHeight, pooledWidth, samplingRatio, cstAligned);
          return success();
        }
        // mode == "max"
        auto indicesType = resultType.getWithSizesAndDtype(
            resultType.getOptionalSizes(), batchIndicesType.getDtype());
        auto roiPool = rewriter.create<Torch::TorchvisionRoiPoolOp>(
            loc, TypeRange{resultType, indicesType}, input, newRois,
            cstSpatialScale, pooledHeight, pooledWidth);
        rewriter.replaceOp(binder.op, roiPool.getResult(0));
        return success();
      });
  patterns.onOp(
      "SpaceToDepth", 1,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value input;
        int64_t blockSize;
        std::string mode;
        if (binder.tensorOperand(input) ||
            binder.s64IntegerAttr(blockSize, "blocksize") ||
            binder.customOpNameStringAttr(mode, "mode", "DCR") ||
            binder.tensorResultType(resultType))
          return failure();
        auto inputTy = dyn_cast<Torch::BaseTensorType>(input.getType());
        if (!inputTy || !inputTy.hasSizes()) {
          return rewriter.notifyMatchFailure(
              binder.op, "Expected input type having sizes");
        }
        SmallVector<int64_t> inputSizes{inputTy.getSizes()};
        if (inputSizes.size() != 4) {
          return rewriter.notifyMatchFailure(binder.op,
                                             "Expected input rank to be 4");
        }

        Value b = rewriter.create<Torch::AtenSizeIntOp>(
            binder.getLoc(), input,
            rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getI64IntegerAttr(0)));
        Value c = rewriter.create<Torch::AtenSizeIntOp>(
            binder.getLoc(), input,
            rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getI64IntegerAttr(1)));
        Value h = rewriter.create<Torch::AtenSizeIntOp>(
            binder.getLoc(), input,
            rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getI64IntegerAttr(2)));
        Value w = rewriter.create<Torch::AtenSizeIntOp>(
            binder.getLoc(), input,
            rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getI64IntegerAttr(3)));
        Value cstBlockSize = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(blockSize));
        Value cstBlockSizeSquare = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(blockSize * blockSize));
        Value hDivBlockSize = rewriter.create<Torch::AtenDivIntOp>(
            binder.getLoc(), h, cstBlockSize);
        Value wDivBlockSize = rewriter.create<Torch::AtenDivIntOp>(
            binder.getLoc(), w, cstBlockSize);
        hDivBlockSize = rewriter.create<Torch::AtenIntFloatOp>(binder.getLoc(),
                                                               hDivBlockSize);
        wDivBlockSize = rewriter.create<Torch::AtenIntFloatOp>(binder.getLoc(),
                                                               wDivBlockSize);

        // The implementation is as follows:
        // tmp = np.reshape(
        //     x, [b, c, h // blocksize, blocksize, w // blocksize, blocksize]
        // )
        // tmp = np.transpose(tmp, [0, 3, 5, 1, 2, 4])
        // y = np.reshape(tmp, [b, c * (blocksize**2), h // blocksize, w //
        // blocksize])
        Value reshapeSizesList = rewriter.create<Torch::PrimListConstructOp>(
            binder.getLoc(),
            Torch::ListType::get(Torch::IntType::get(input.getContext())),
            llvm::SmallVector<Value>{b, c, hDivBlockSize, cstBlockSize,
                                     wDivBlockSize, cstBlockSize});
        int64_t hDivBlockSizeInt = inputSizes[2] == Torch::kUnknownSize
                                       ? Torch::kUnknownSize
                                       : inputSizes[2] / blockSize;
        int64_t wDivBlockSizeInt = inputSizes[3] == Torch::kUnknownSize
                                       ? Torch::kUnknownSize
                                       : inputSizes[3] / blockSize;
        SmallVector<int64_t, 6> reshapeSizesInt{inputSizes[0],    inputSizes[1],
                                                hDivBlockSizeInt, blockSize,
                                                wDivBlockSizeInt, blockSize};
        Value reshapedInput = rewriter.create<Torch::AtenReshapeOp>(
            binder.getLoc(),
            inputTy.getWithSizesAndDtype(reshapeSizesInt,
                                         inputTy.getOptionalDtype()),
            input, reshapeSizesList);

        SmallVector<int64_t, 6> permuteDimsInt{0, 3, 5, 1, 2, 4};
        Value permutedInput;
        if (failed(createTorchPermuteOp(binder, rewriter, binder.getLoc(),
                                        reshapedInput, permuteDimsInt,
                                        permutedInput)))
          return rewriter.notifyMatchFailure(
              binder.op, "Failed to create Torch Permute op");

        Value cMulBlockSizeSquare = rewriter.create<Torch::AtenMulIntOp>(
            binder.getLoc(), c, cstBlockSizeSquare);
        reshapeSizesList = rewriter.create<Torch::PrimListConstructOp>(
            binder.getLoc(),
            Torch::ListType::get(Torch::IntType::get(input.getContext())),
            llvm::SmallVector<Value>{b, cMulBlockSizeSquare, hDivBlockSize,
                                     wDivBlockSize});
        rewriter.replaceOpWithNewOp<Torch::AtenReshapeOp>(
            binder.op, resultType, permutedInput, reshapeSizesList);
        return success();
      });
  patterns.onOp(
      "Shrink", 9, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Location loc = binder.getLoc();
        Torch::ValueTensorType resultType;
        Value input;
        float bias, lambd;
        if (binder.tensorOperand(input) ||
            binder.f32FloatAttr(bias, "bias", 0.0) ||
            binder.f32FloatAttr(lambd, "lambd", 0.5) ||
            binder.tensorResultType(resultType)) {
          return failure();
        }

        Torch::ValueTensorType inputType =
            cast<Torch::ValueTensorType>(input.getType());
        if (!isa<mlir::FloatType>(inputType.getDtype()))
          return rewriter.notifyMatchFailure(
              binder.op, "unimplemented: non-floating point dtype");

        Torch::ValueTensorType comparisonResultType =
            rewriter.getType<Torch::ValueTensorType>(
                ArrayRef<int64_t>(inputType.getSizes()), rewriter.getI1Type());

        // The formula of this operator is: If x < -lambd, y = x + bias; If x >
        // lambd, y = x - bias; Otherwise, y = 0.
        // The implementation is based on the following algorithm:
        // Shrink <bias,lambd>(input) => (output)
        // {
        //    Lambd = Constant <value_float: float = @lambd> ()
        //    LambdCast = CastLike (Lambd, input)
        //    Bias = Constant <value_float: float = @bias> ()
        //    BiasCast = CastLike (Bias, input)
        //    Zero = Constant <value: tensor = float {0}> ()
        //    ZeroCast = CastLike (Zero, input)
        //    NegLmbda = Neg (LambdCast)
        //    InputLessThanNegLambda = Less (input, NegLmbda)
        //    InputAddBias = Add (input, BiasCast)
        //    InputSubBias = Sub (input, BiasCast)
        //    LambdaLessThanInput = Less (LambdCast, input)
        //    InputSubBiasOrZero = Where (LambdaLessThanInput, InputSubBias,
        //    ZeroCast) output = Where (InputLessThanNegLambda, InputAddBias,
        //    InputSubBiasOrZero)
        // }
        Value constLambd = rewriter.create<Torch::ConstantFloatOp>(
            loc, rewriter.getFloatAttr(rewriter.getF64Type(), lambd));
        Value constBias = rewriter.create<Torch::ConstantFloatOp>(
            loc, rewriter.getFloatAttr(rewriter.getF64Type(), bias));
        Value constZero = rewriter.create<Torch::ConstantFloatOp>(
            loc, rewriter.getFloatAttr(rewriter.getF64Type(), 0.0));
        Value constOne = rewriter.create<Torch::ConstantFloatOp>(
            loc, rewriter.getFloatAttr(rewriter.getF64Type(), 1.0));
        Value constNegLambd = rewriter.create<Torch::ConstantFloatOp>(
            loc, rewriter.getFloatAttr(rewriter.getF64Type(), -lambd));

        Value inputLTNegLambd = rewriter.create<Torch::AtenLtScalarOp>(
            loc, comparisonResultType, input, constNegLambd);
        Value inputPlusBias = rewriter.create<Torch::AtenAddScalarOp>(
            loc, inputType, input, constBias, /*alpha=*/constOne);
        Value inputSubBias = rewriter.create<Torch::AtenSubScalarOp>(
            loc, inputType, input, constBias, /*alpha=*/constOne);
        Value inputGTLambd = rewriter.create<Torch::AtenGtScalarOp>(
            loc, comparisonResultType, input, constLambd);

        Value inputSubBiasOrZero =
            rewriter.create<Torch::AtenWhereScalarOtherOp>(
                loc, resultType, inputGTLambd, inputSubBias, constZero);
        rewriter.replaceOpWithNewOp<Torch::AtenWhereSelfOp>(
            binder.op, resultType, inputLTNegLambd, inputPlusBias,
            inputSubBiasOrZero);
        return success();
      });
  patterns.onOp("SequenceAt", 11,
                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
                  Torch::ValueTensorType resultType;
                  Value inputSequence, position;
                  if (binder.tensorListOperandAtIndex(inputSequence, 0) ||
                      binder.tensorOperandAtIndex(position, 1) ||
                      binder.tensorResultType(resultType))
                    return failure();

                  Value index = rewriter.create<Torch::AtenItemOp>(
                      binder.getLoc(), rewriter.getType<Torch::IntType>(),
                      position);
                  rewriter.replaceOpWithNewOp<Torch::Aten__Getitem__TOp>(
                      binder.op, resultType, inputSequence, index);
                  return success();
                });
  patterns.onOp(
      "SequenceEmpty", 11,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ListType resultType;
        int64_t dtypeIntOnnx;
        if (binder.s64IntegerAttr(dtypeIntOnnx, "dtype", 1) ||
            binder.tensorListResultType(resultType))
          return failure();

        std::optional<int64_t> dtypeIntTorch =
            onnxDtypeIntToTorchDtypeInt(dtypeIntOnnx);
        if (!dtypeIntTorch.has_value()) {
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented support for the given dtype conversion");
        }
        Value constDtype = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(dtypeIntTorch.value()));

        Value shapeList = createConstantIntList(binder, rewriter, {});
        Value cstNone = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());

        Value self = rewriter.create<Torch::AtenEmptyMemoryFormatOp>(
            binder.op->getLoc(), resultType.getContainedType(), shapeList,
            /*dtype=*/constDtype,
            /*layout=*/cstNone,
            /*device=*/cstNone, /*pinMemory=*/cstNone,
            /*memoryFormat=*/cstNone);

        rewriter.replaceOpWithNewOp<Torch::PrimListConstructOp>(
            binder.op, resultType, llvm::SmallVector<Value>{self});
        return success();
      });
  patterns.onOp(
      "SequenceErase", 11,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ListType resultType;
        Value inputSequence, position;
        if (binder.tensorListOperandAtIndex(inputSequence, 0) ||
            binder.tensorListResultType(resultType))
          return failure();

        Value length = rewriter.create<Torch::AtenLenTOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(), inputSequence);

        Value cstNone = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        Value cstOne = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(1));
        if (binder.op->getNumOperands() == 1) {
          // If True, it means that the `position` arg is missing and
          // the last tensor from the list has to be erased.
          Value lengthMinusOne = rewriter.create<Torch::AtenSubIntOp>(
              binder.getLoc(), length, cstOne);
          rewriter.replaceOpWithNewOp<Torch::AtenSliceTOp>(
              binder.op, resultType, inputSequence, /*start=*/cstNone,
              /*end=*/lengthMinusOne, /*step=*/cstOne);
          return success();
        }

        if (binder.tensorOperandAtIndex(position, 1))
          return failure();

        Value positionInt = rewriter.create<Torch::AtenItemOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(), position);
        // Handling negative position value.
        Value cstZero = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(0));
        Value isPositionNegative = rewriter.create<Torch::AtenLtIntOp>(
            binder.getLoc(), positionInt, cstZero);
        isPositionNegative = rewriter.create<Torch::AtenIntBoolOp>(
            binder.getLoc(), isPositionNegative);
        Value finalOffset = rewriter.create<Torch::AtenMulIntOp>(
            binder.getLoc(), isPositionNegative, length);
        positionInt = rewriter.create<Torch::AtenAddIntOp>(
            binder.getLoc(), positionInt, finalOffset);

        Value listBeforePosition = rewriter.create<Torch::AtenSliceTOp>(
            binder.getLoc(), resultType, inputSequence, /*start=*/cstNone,
            /*end=*/positionInt, /*step=*/cstOne);
        Value positionPlusOne = rewriter.create<Torch::AtenAddIntOp>(
            binder.getLoc(), positionInt, cstOne);
        Value listAfterPosition = rewriter.create<Torch::AtenSliceTOp>(
            binder.getLoc(), resultType, inputSequence,
            /*start=*/positionPlusOne,
            /*end=*/length, /*step=*/cstOne);

        rewriter.replaceOpWithNewOp<Torch::AtenAddTOp>(
            binder.op, resultType, listBeforePosition, listAfterPosition);
        return success();
      });
  patterns.onOp(
      "SequenceInsert", 11,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ListType resultType;
        Value inputSequence, position, insertValue;
        if (binder.tensorListOperandAtIndex(inputSequence, 0) ||
            binder.tensorOperandAtIndex(insertValue, 1) ||
            binder.tensorListResultType(resultType))
          return failure();

        if (binder.op->getNumOperands() == 1) {
          // If True, it means that the `position` arg is missing and
          // the tensor has to be inserted at the end of the list.
          Value length = rewriter.create<Torch::AtenLenTOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(),
              inputSequence);
          rewriter.replaceOpWithNewOp<Torch::AtenInsertTOp>(
              binder.op, inputSequence, /*idx=*/length,
              /*el=*/insertValue);
          return success();
        }

        if (binder.tensorOperandAtIndex(position, 2))
          return failure();

        Value positionInt = rewriter.create<Torch::AtenItemOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(), position);
        rewriter.create<Torch::AtenInsertTOp>(binder.getLoc(), inputSequence,
                                              /*idx=*/positionInt,
                                              /*el=*/insertValue);
        rewriter.replaceOp(binder.op, inputSequence);
        return success();
      });
  patterns.onOp(
      "SequenceMap", 17,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        llvm::SmallVector<Value> operands;
        Torch::ListType resultType;
        if (binder.tensorOperandsList(operands) || operands.size() == 0 ||
            binder.tensorListResultType(resultType)) {
          return failure();
        }

        Region *bodyRegion;
        if (binder.getRegionAtIndex(bodyRegion, 0)) {
          return rewriter.notifyMatchFailure(binder.op,
                                             "Failed getting Body Region");
        }

        // construct an empty list, append results through the loop
        auto resultTensorType =
            dyn_cast<Torch::ValueTensorType>(resultType.getContainedType());
        Value shapeList = createConstantIntList(binder, rewriter,
                                                resultTensorType.getSizes());
        Value cstNone = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        Value self = rewriter.create<Torch::AtenEmptyMemoryFormatOp>(
            binder.op->getLoc(), resultType.getContainedType(), shapeList,
            /*dtype=*/cstNone, /*layout=*/cstNone, /*device=*/cstNone,
            /*pinMemory=*/cstNone, /*memoryFormat=*/cstNone);
        Value result = rewriter.create<Torch::PrimListConstructOp>(
            binder.getLoc(), resultType, llvm::SmallVector<Value>{self});

        // create a for-like primLoopOp
        // with the length of sequence as max iter_num
        Value len = rewriter.create<Torch::AtenLenTOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(), operands[0]);
        auto cstTrue = rewriter.create<Torch::ConstantBoolOp>(
            binder.getLoc(), rewriter.getBoolAttr(true));
        mlir::ImplicitLocOpBuilder b(binder.getLoc(), rewriter);
        auto loop =
            b.create<Torch::PrimLoopOp>(resultType, len, cstTrue, result);
        rewriter.cloneRegionBefore(*bodyRegion, loop.getRegion(),
                                   loop.getRegion().begin());

        // primLoopOp loopBody expects torch.int as first arg
        // remove inputs from the region and use it from outside
        loop.getRegion().front().insertArgument(0U, resultType,
                                                binder.getLoc());
        Value sequenceArg = loop.getRegion().front().getArgument(0);
        loop.getRegion().front().insertArgument(
            0U, rewriter.getType<Torch::IntType>(), binder.getLoc());
        Value indexArg = loop.getRegion().front().getArgument(0);

        // get sequence[i] (and addtionalInput[i]) in each iteration
        rewriter.setInsertionPointToStart(&loop.getRegion().front());
        for (size_t i = 0; i < operands.size(); i++) {
          Value argInput = loop.getRegion().front().getArgument(2);
          if (isa<Torch::ListType>(operands[i].getType())) {
            auto tensorType = dyn_cast<Torch::ValueTensorType>(
                dyn_cast<Torch::ListType>(operands[i].getType())
                    .getContainedType());
            Value item = rewriter.create<Torch::Aten__Getitem__TOp>(
                binder.getLoc(), tensorType, operands[i], indexArg);
            argInput.replaceAllUsesWith(item);
          } else {
            argInput.replaceAllUsesWith(operands[i]);
          }
          loop.getRegion().eraseArgument(2);
        }

        // replace terminator
        PatternRewriter::InsertionGuard guard(rewriter);
        Operation *terminator = loop.getRegion().front().getTerminator();
        rewriter.setInsertionPoint(terminator);
        // update sequence input
        auto terminatorOperands = terminator->getOperands();
        Value append = rewriter.create<Torch::AtenAppendTOp>(
            binder.getLoc(), resultType, sequenceArg, terminatorOperands[0]);
        rewriter.replaceOpWithNewOp<Torch::PrimLoopConditionOp>(
            terminator, cstTrue, append);

        rewriter.replaceOp(binder.op, loop);
        return success();
      });
  patterns.onOp(
      "Upsample", 9, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        std::string mode;
        Value input, scales;
        if (binder.tensorOperands(input, scales) ||
            binder.customOpNameStringAttr(mode, "mode", "nearest") ||
            binder.tensorResultType(resultType)) {
          return failure();
        }

        if (mode != "nearest" && mode != "linear")
          return rewriter.notifyMatchFailure(
              binder.op, "unsupported interpolation mode other than nearest, "
                         "linear");

        int64_t resultRank = resultType.getSizes().size();
        if (resultRank > 5)
          return rewriter.notifyMatchFailure(
              binder.op, "supports upto 3d upsampling only");

        Value scalesValueList = getValueList(binder, rewriter, scales);
        if (mode == "linear") {
          if (resultRank == 4)
            mode = "bilinear";
          if (resultRank == 5)
            mode = "trilinear";
        }
        Value modeStrValue =
            rewriter.create<Torch::ConstantStrOp>(binder.getLoc(), mode);
        Value cstNone = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        Value cstFalse = rewriter.create<Torch::ConstantBoolOp>(
            binder.getLoc(), rewriter.getBoolAttr(false));

        rewriter
            .replaceOpWithNewOp<Torch::Aten__InterpolateSizeListScaleListOp>(
                binder.op, resultType, input, /*size=*/cstNone, scalesValueList,
                modeStrValue,
                /* AnyTorchOptionalBoolType:$align_corners */ cstNone,
                /* AnyTorchOptionalBoolType:$recompute_scale_factor */ cstNone,
                /*Torch_BoolType:$antialias*/ cstFalse);
        return success();
      });
  patterns.onOp(
      "STFT", 17, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        // operands in order ->(signal, frameStep, window, frameLength*)
        SmallVector<Value> operands;
        int64_t onesided;
        Torch::ValueTensorType resultType;

        if (binder.tensorOperandsList(operands) ||
            binder.s64IntegerAttr(onesided, "onesided", 1) ||
            binder.tensorResultType(resultType))
          return failure();

        Value signal = operands[0];
        Value frameStep = operands[1];
        auto signalTy = cast<Torch::ValueTensorType>(signal.getType());
        if (!signalTy || !signalTy.hasSizes()) {
          return rewriter.notifyMatchFailure(
              binder.op, "Expected signal type having sizes");
        }
        auto signalShape = signalTy.getSizes();
        // The infrastructure of ONNX and onnxruntime supports a rank-2.
        // For reference:
        // https://github.com/onnx/onnx/blob/060589cb81dfb081ed912c9e722b15fe1dbc1a14/onnx/defs/math/defs.cc#L3475-L3477
        if (signalShape.size() != 2 && signalShape.size() != 3) {
          return rewriter.notifyMatchFailure(binder.op,
                                             "signal has invalid shape.");
        }
        if (!resultType || !resultType.hasSizes()) {
          return rewriter.notifyMatchFailure(
              binder.op, "Expected result type having sizes");
        }
        auto resultShape = resultType.getSizes();
        if (resultShape.size() != 4) {
          return rewriter.notifyMatchFailure(binder.op,
                                             "result has invalid shape.");
        }

        // There are two possible cases for optional inputs frameLength and
        // window, which are that either 4 operands will be passed with window
        // being !torch.none, or three operands will be passed, with window
        // present and frameLength absent. In the former case, we simply create
        // a rectangular window consisting of ones, and in the latter, we set
        // frameLength equal to the the inputShape[1] or windowShape[0]
        // depending upon whether window was present or not. Note that it is
        // possible that both window and frameLength can be none, which would
        // mean that either only two operands were passed, or, in case of three
        // operands, window was passed in as none, and frameLength was absent.
        Value window = nullptr, frameLength = nullptr;
        bool windowIsNone = true, frameLengthIsNone = true;
        if (operands.size() == 3) {
          window = operands[2];
          windowIsNone = isa<Torch::NoneType>(window.getType());
        }
        if (operands.size() == 4) {
          window = operands[2];
          frameLength = operands[3];
          windowIsNone = isa<Torch::NoneType>(window.getType());
          frameLengthIsNone = isa<Torch::NoneType>(frameLength.getType());
        }

        ArrayRef<int64_t> windowShape;
        if (!windowIsNone) {
          windowShape =
              cast<Torch::ValueTensorType>(window.getType()).getSizes();
          if (windowShape.size() != 1) {
            return rewriter.notifyMatchFailure(binder.op,
                                               "window has invalid shape.");
          }
        }
        if (frameLengthIsNone) {
          if (windowIsNone) {
            frameLength = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getI64IntegerAttr(signalShape[1]));
          } else {
            frameLength = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getI64IntegerAttr(windowShape[0]));
          }
        }

        Value frameLengthItem;
        if (!frameLengthIsNone || windowIsNone) {
          frameLengthItem =
              getItemOp<Torch::IntType>(binder, rewriter, frameLength);
        } else {
          frameLengthItem = frameLength;
        }
        Value frameStepItem =
            getItemOp<Torch::IntType>(binder, rewriter, frameStep);

        if (windowIsNone) {
          auto onesResultTy = rewriter.getType<Torch::ValueTensorType>(
              ArrayRef<int64_t>({-1}), signalTy.getDtype());

          Value none = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
          Value sizes = rewriter.create<Torch::PrimListConstructOp>(
              binder.getLoc(),
              Torch::ListType::get(
                  Torch::IntType::get(binder.op->getContext())),
              SmallVector<Value>{frameLengthItem});
          window = rewriter.create<Torch::AtenOnesOp>(
              binder.getLoc(), onesResultTy, sizes, none, none, none, none);
        }

        FailureOr<Type> complexDtype;
        if (signalTy.getDtype().isBF16()) {
          return rewriter.notifyMatchFailure(
              binder.op,
              "unimplemented: support for bfloat16 type is unimplemented.");
        }
        if (signalTy.getDtype().isF16()) {
          complexDtype = Torch::getTypeForScalarType(
              binder.op->getContext(),
              torch::torch_upstream::ScalarType::ComplexHalf);
        } else if (signalTy.getDtype().isF32()) {
          complexDtype = Torch::getTypeForScalarType(
              binder.op->getContext(),
              torch::torch_upstream::ScalarType::ComplexFloat);
        } else {
          complexDtype = Torch::getTypeForScalarType(
              binder.op->getContext(),
              torch::torch_upstream::ScalarType::ComplexDouble);
        }

        auto complexSignalTy = rewriter.getType<Torch::ValueTensorType>(
            ArrayRef<int64_t>({signalShape[0], signalShape[1]}),
            complexDtype.value());

        // The onnx STFT op always passes in a float input, and if the input
        // is intended to be complex, its shape will be [batch][length][2],
        // where [...][0] is the real component, and [...][1] is the complex
        // component. This complex input has to be made torch compatible before
        // being passed into torch.stft, so it is necessary to call
        // AtenViewAsComplexOp. In case of real input, the shape of the signal
        // will be [batch][length] or [batch][length][1], and therefore it will
        // have to be squeezed at dim=2 in the latter case, before being passed
        // into torch.stft.
        if (signalShape.size() == 3) {
          if (signalShape[2] == 2) {
            signal = rewriter.create<Torch::AtenViewAsComplexOp>(
                binder.getLoc(), complexSignalTy, signal);
          } else {
            Value two = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getI64IntegerAttr(2));
            auto newSignalTy = signalTy.getWithSizesAndDtype(
                ArrayRef<int64_t>({signalShape[0], signalShape[1]}),
                signalTy.getDtype());
            signal = rewriter.create<Torch::AtenSqueezeDimOp>(
                binder.getLoc(), newSignalTy, signal, two);
          }
        }

        // In case the window is not given, we use frameLength
        // as the length of the window.
        Value windowLen;
        if (!windowIsNone) {
          windowLen = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getI64IntegerAttr(windowShape[0]));
        } else {
          windowLen = frameLengthItem;
        }

        Value falseVal =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), false);
        Value trueVal =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), true);
        auto stftTy = complexSignalTy.getWithSizesAndDtype(
            ArrayRef<int64_t>({resultShape[0], resultShape[2], resultShape[1]}),
            complexSignalTy.getDtype());

        // After torch.stft is called and the result is stored into the value
        // stft, there is one thing to note: The resultType for the onnx op
        // will have shape [batch][num_frames][length][2], while the shape of
        // stft will be [batch][length][num_frames]. Before the value is
        // converted to real through torch.view_as_real, we must permute the
        // shape of stft to match the shape of resultType. Also, it is
        // immaterial whether torch.view_as_real is called after or before the
        // permutation; both outputs will be equivalent.
        Value stft = rewriter.create<Torch::AtenStftOp>(
            binder.getLoc(), stftTy, signal, frameLengthItem, frameStepItem,
            windowLen, window, falseVal, onesided ? trueVal : falseVal,
            trueVal);

        auto permuteStftTy = complexSignalTy.getWithSizesAndDtype(
            ArrayRef<int64_t>({resultShape[0], resultShape[1], resultShape[2]}),
            complexSignalTy.getDtype());
        Value permuteDims = createConstantIntList(binder, rewriter, {0, 2, 1});
        Value permutedStft = rewriter.create<Torch::AtenPermuteOp>(
            binder.getLoc(), permuteStftTy, stft, permuteDims);

        rewriter.replaceOpWithNewOp<Torch::AtenViewAsRealOp>(
            binder.op, resultType, permutedStft);
        return success();
      });
  patterns.onOp(
      "ReverseSequence", 10,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value input, sequenceLens;
        int64_t batchAxis, timeAxis;
        if (binder.tensorOperandAtIndex(input, 0) ||
            binder.tensorOperandAtIndex(sequenceLens, 1) ||
            binder.s64IntegerAttr(batchAxis, "batch_axis", 1) ||
            binder.s64IntegerAttr(timeAxis, "time_axis", 0) ||
            binder.tensorResultType(resultType))
          return failure();

        auto inputTy = cast<Torch::ValueTensorType>(input.getType());
        SmallVector<int64_t> inputShape(inputTy.getSizes());
        auto dtype = resultType.getDtype();

        Value cstZero = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(0));
        Value cstOne = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(1));
        Value batchAxisVal = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(batchAxis));
        Value timeAxisVal = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(timeAxis));

        SmallVector<int64_t> sliceShape(inputShape);
        sliceShape[batchAxis] = 1;
        auto sliceType =
            rewriter.getType<Torch::ValueTensorType>(sliceShape, dtype);
        SmallVector<int64_t> flipShape(sliceShape);
        flipShape[timeAxis] = Torch::kUnknownSize;
        auto flipType =
            rewriter.getType<Torch::ValueTensorType>(flipShape, dtype);
        auto scalarTensorType = rewriter.getType<Torch::ValueTensorType>(
            ArrayRef<int64_t>{1}, rewriter.getIntegerType(64, /*signed*/ 1));

        for (int i = 0; i < inputShape[batchAxis]; i++) {
          // slice i iterating on batch axis
          Value k = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getI64IntegerAttr(i));
          Value end =
              rewriter.create<Torch::AtenAddIntOp>(binder.getLoc(), k, cstOne);
          Value sliceBatch = rewriter.create<Torch::AtenSliceTensorOp>(
              binder.getLoc(), sliceType, input, batchAxisVal, k, end, cstOne);

          // get sequence length and slice the reversing part
          Value kTensor = rewriter.create<Torch::PrimNumToTensorScalarOp>(
              binder.getLoc(), scalarTensorType, k);
          Value sel = rewriter.create<Torch::AtenIndexSelectOp>(
              binder.getLoc(), scalarTensorType, sequenceLens, cstZero,
              kTensor);
          Value len = rewriter.create<Torch::AtenItemOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(), sel);
          Value sliceTime = rewriter.create<Torch::AtenSliceTensorOp>(
              binder.getLoc(), flipType, sliceBatch, timeAxisVal, cstZero, len,
              cstOne);
          // flip the sliced reversing tensor
          Value dims = rewriter.create<Torch::PrimListConstructOp>(
              binder.getLoc(),
              rewriter.getType<Torch::ListType>(
                  rewriter.getType<Torch::IntType>()),
              SmallVector<Value>{timeAxisVal});
          Value flip = rewriter.create<Torch::AtenFlipOp>(
              binder.getLoc(), flipType, sliceTime, dims);

          // embeds the reversed tensor to the input
          Value embedTime = rewriter.create<Torch::AtenSliceScatterOp>(
              binder.getLoc(), sliceType, sliceBatch, flip, timeAxisVal,
              /*start=*/cstZero, /*end=*/len, /*step=*/cstOne);
          input = rewriter.create<Torch::AtenSliceScatterOp>(
              binder.getLoc(), resultType, input, embedTime, batchAxisVal,
              /*start=*/k, /*end=*/end, /*step=*/cstOne);
        }

        rewriter.replaceOp(binder.op, input);
        return success();
      });
  patterns.onOp(
      "ScatterND", 11,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Torch::ValueTensorType resultType;
        Value data, indices, updates;
        std::string reduction;
        if (binder.tensorOperandAtIndex(data, 0) ||
            binder.tensorOperandAtIndex(indices, 1) ||
            binder.tensorOperandAtIndex(updates, 2) ||
            binder.tensorResultType(resultType))
          return failure();

        // Previous to version 16 of ScatterND, reduction attribute was not
        // supported. Setting it as "none" for unsupported versions.
        if (binder.customOpNameStringAttr(reduction, "reduction", "none")) {
          reduction = "none";
        }

        // Map onnx reduction type to torch reduction type.
        if (reduction == "add") {
          reduction = "sum";
        } else if (reduction == "mul") {
          reduction = "prod";
        } else if (reduction == "max") {
          reduction = "amax";
        } else if (reduction == "min") {
          reduction = "amin";
        } else if (reduction != "none") {
          return rewriter.notifyMatchFailure(
              binder.op, "expects reduction to be one of add, mul, max, min, "
                         "none(default)");
        }

        Location loc = binder.getLoc();
        auto dataTy = dyn_cast<Torch::ValueTensorType>(data.getType());
        auto indicesTy = dyn_cast<Torch::ValueTensorType>(indices.getType());
        auto updatesTy = dyn_cast<Torch::ValueTensorType>(updates.getType());
        if (!dataTy || !indicesTy || !updatesTy || !dataTy.hasSizes() ||
            !indicesTy.hasSizes() || !updatesTy.hasSizes())
          return failure();

        // step 1. Get shapes and ranks of data, indices and updates.
        // The last dimension of indices is expected to be static.
        ArrayRef<int64_t> dataShape = dataTy.getSizes();
        int64_t dataRank = dataShape.size();
        ArrayRef<int64_t> updatesShape = updatesTy.getSizes();
        int64_t updatesRank = updatesShape.size();
        ArrayRef<int64_t> indicesShape = indicesTy.getSizes();
        int64_t indicesRank = indicesShape.size();
        int64_t indicesLastDim = indicesShape.back();
        // Given data tensor of rank r >= 1, indices tensor of rank q >= 1, and
        // updates tensor of rank q + r - indices_shape[-1] - 1, the output is
        // produced by creating a copy of the input data, and then updating
        // its value to values specified by updates at specific index positions
        // specified by indices. Its output shape is the same as the shape of
        // data.
        // indices_shape[-1] must be static to have deterministic ranks.
        if (dataRank < 1 || indicesRank < 1 || updatesRank < 1)
          return rewriter.notifyMatchFailure(
              binder.op, "expected data, indices and updates rank to be >= 1");
        if (indicesLastDim == Torch::kUnknownSize || indicesLastDim <= 0)
          return rewriter.notifyMatchFailure(
              binder.op, "expected last dimension of indices to be static and "
                         "greater than zero");

        // step 2. Get dimension list of data.
        SmallVector<Value> dataDims;
        for (int64_t i = 0; i < dataRank; ++i) {
          Value k = rewriter.create<Torch::ConstantIntOp>(loc, i);
          Value dataDim = rewriter.create<Torch::AtenSizeIntOp>(loc, data, k);
          dataDims.push_back(dataDim);
        }

        // step 3. Get dimension list of indices.
        Value constZero = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getI64IntegerAttr(0));
        Value constOne = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getI64IntegerAttr(1));
        SmallVector<Value> indicesDimsMinusOne;
        Value indicesFlattenDim = constOne;
        for (int64_t i = 0; i < indicesRank - 1; ++i) {
          Value k = rewriter.create<Torch::ConstantIntOp>(loc, i);
          Value indicesDim =
              rewriter.create<Torch::AtenSizeIntOp>(loc, indices, k);
          indicesDimsMinusOne.push_back(indicesDim);
          indicesFlattenDim = rewriter.create<Torch::AtenMulIntOp>(
              loc, indicesFlattenDim, indicesDim);
        }
        ArrayRef<int64_t> indicesShapeMinusOne = indicesShape.drop_back();

        // Algorithm: We can not directly perform torch.scatter as it requires
        // the ranks of data(`r`), indices(`q`) and updates to be same.
        // So we will perform collapse and expand operations to match the
        // ranks of data, indices and updates(making sure the semantic of the
        // onnx.scatter_nd is preserved), then perform torch.scatter operation,
        // later unflatten the scatter result to match onnx.scatter_nd output.
        // For example, assuming
        // indices is of shape (4, 5, 3, 2), data is (4, 10, 11, 7, 4) and
        // updates is (4, 5, 3, 11, 7, 4). Firstly, modify indices to 1-D
        // indexing as the torch.scatter op supports only single dimensional
        // indexing(this algorithm would have been simpler if we can get a
        // torch op that supports indexing at multiple dimensions
        // simultaneously). 1-D indexed indices will be of shape (4, 5, 3, 1),
        // now materialize it to `r-indices_shape[-1]` dimension of data i.e.
        // reshaping it to the shape (4, 5, 3, 1, 1, 1). Next step is to
        // flatten+expand the indices and flatten the data to (60, 11, 7, 4) and
        // (40, 11, 7, 4) shapes respectively and then perform the torch.scatter
        // operation. Post the scatter operation, unflatten the first dimension
        // of result to (4, 10, 11, 7, 4) which is our required result.

        // step 4. Convert indices_shape[-1] dimensional indexing to 1D
        // indexing.
        Value sliceDim = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getI64IntegerAttr(indicesRank - 1));
        SmallVector<int64_t> indicesSliceShape(indicesShapeMinusOne);
        indicesSliceShape.push_back(1);
        auto indicesSliceTy = rewriter.getType<Torch::ValueTensorType>(
            indicesSliceShape, indicesTy.getOptionalDtype());

        Value start = constZero;
        Value updatedIndices;
        for (int64_t i = 0; i < indicesLastDim; ++i) {
          Value end = rewriter.create<Torch::ConstantIntOp>(
              loc, rewriter.getI64IntegerAttr(i + 1));
          Value indicesSlice = rewriter.create<Torch::AtenSliceTensorOp>(
              loc, indicesSliceTy, indices, sliceDim, start, end,
              /*step=*/constOne);
          start = end;
          // Apply bounds checking on the indices slice.
          auto boolTy = rewriter.getType<Torch::ValueTensorType>(
              indicesSliceShape, rewriter.getI1Type());
          Value lt = rewriter.create<Torch::AtenLtScalarOp>(
              loc, boolTy, indicesSlice, constZero);
          Value add = rewriter.create<Torch::AtenAddScalarOp>(
              loc, indicesSliceTy, indicesSlice, dataDims[i],
              /*alpha=*/constOne);
          indicesSlice = rewriter.create<Torch::AtenWhereSelfOp>(
              loc, indicesSliceTy, lt, add, indicesSlice);
          if (i == 0) {
            updatedIndices = indicesSlice;
            continue;
          }
          updatedIndices = rewriter.create<Torch::AtenAddTensorOp>(
              loc, indicesSliceTy, indicesSlice, updatedIndices, dataDims[i]);
        }

        // step 5. Compute all the required result types here.
        SmallVector<int64_t> reshapeIndicesShape(indicesShapeMinusOne);
        SmallVector<Value> reshapeIndicesDims(indicesDimsMinusOne);
        // Determine the collapsed dim size of indices(index_shape[-1] is not
        // part of collapsing as we already removed it by 1-D indexing).
        SmallVector<int64_t> flattenIndicesShape;
        auto indicesCt = 1;
        for (int64_t i = 0; i < indicesRank - 1; ++i) {
          if (indicesShape[i] == Torch::kUnknownSize) {
            indicesCt = Torch::kUnknownSize;
            break;
          }
          indicesCt *= indicesShape[i];
        }
        flattenIndicesShape.push_back(indicesCt);
        // Compute the shape of expand op.
        SmallVector<Value> expandIndicesDims;
        expandIndicesDims.push_back(indicesFlattenDim);
        SmallVector<int64_t> expandIndicesShape;
        expandIndicesShape.push_back(indicesCt);
        // Determine the collapsed dim size of data.
        SmallVector<int64_t> flattenDataShape;
        auto dataCt = 1;
        for (int64_t i = 0; i < indicesLastDim; ++i) {
          if (dataShape[i] == Torch::kUnknownSize) {
            dataCt = Torch::kUnknownSize;
            break;
          }
          dataCt *= dataShape[i];
        }
        flattenDataShape.push_back(dataCt);
        // Determine the collapsed dim size of updates.
        SmallVector<int64_t> flattenUpdatesShape;
        auto updatesCt = 1;
        for (int64_t i = 0; i < indicesRank - 1; ++i) {
          if (updatesShape[i] == Torch::kUnknownSize) {
            updatesCt = Torch::kUnknownSize;
            break;
          }
          updatesCt *= updatesShape[i];
        }
        flattenUpdatesShape.push_back(updatesCt);
        flattenUpdatesShape.insert(flattenUpdatesShape.end(),
                                   updatesShape.begin() + indicesRank - 1,
                                   updatesShape.end());
        // Append `r-indices_shape[-1]` unit or data dims appropriately to all
        // result types.
        for (int64_t i = indicesLastDim; i < dataRank; ++i) {
          reshapeIndicesShape.push_back(1);
          flattenIndicesShape.push_back(1);
          flattenDataShape.push_back(dataShape[i]);
          expandIndicesShape.push_back(dataShape[i]);
          reshapeIndicesDims.push_back(constOne);
          expandIndicesDims.push_back(dataDims[i]);
        }

        // step 6. Reshape 1-D indexed indices to match the rank of flattened
        // data by inserting unit dimensions.
        auto intListTy = rewriter.getType<Torch::ListType>(
            rewriter.getType<Torch::IntType>());
        Value reshapeIndicesSizeList =
            rewriter.create<Torch::PrimListConstructOp>(loc, intListTy,
                                                        reshapeIndicesDims);
        auto reshapeIndicesTy = rewriter.getType<Torch::ValueTensorType>(
            reshapeIndicesShape, indicesTy.getOptionalDtype());
        Value reshapedIndices = rewriter.create<Torch::AtenViewOp>(
            loc, reshapeIndicesTy, updatedIndices, reshapeIndicesSizeList);

        // step 7. Flatten `q-1` dimensions of the indices and updates.
        auto flattenIndicesTy = rewriter.getType<Torch::ValueTensorType>(
            flattenIndicesShape, indicesTy.getOptionalDtype());
        auto flattenUpdatesTy = rewriter.getType<Torch::ValueTensorType>(
            flattenUpdatesShape, updatesTy.getOptionalDtype());
        Value flattenedIndices = reshapedIndices;
        Value flattenedUpdates = updates;
        if (indicesRank == 1) {
          flattenedIndices = rewriter.create<Torch::AtenUnsqueezeOp>(
              loc, flattenIndicesTy, reshapedIndices, constZero);
          flattenedUpdates = rewriter.create<Torch::AtenUnsqueezeOp>(
              loc, flattenUpdatesTy, updates, constZero);
        } else if (indicesRank > 1) {
          Value endDim = rewriter.create<Torch::ConstantIntOp>(
              loc, rewriter.getI64IntegerAttr(indicesRank - 2));
          flattenedIndices = rewriter.create<Torch::AtenFlattenUsingIntsOp>(
              loc, flattenIndicesTy, reshapedIndices, constZero, endDim);
          flattenedUpdates = rewriter.create<Torch::AtenFlattenUsingIntsOp>(
              loc, flattenUpdatesTy, updates, constZero, endDim);
        }

        // step 8. Expand `r-indices_shape[-1]` dims of flattened indices.
        auto expandIndicesTy = rewriter.getType<Torch::ValueTensorType>(
            expandIndicesShape, indicesTy.getOptionalDtype());
        Value expandIndicesSizeList =
            rewriter.create<Torch::PrimListConstructOp>(loc, intListTy,
                                                        expandIndicesDims);
        Value constFalse = rewriter.create<Torch::ConstantBoolOp>(
            loc, rewriter.getType<Torch::BoolType>(),
            rewriter.getBoolAttr(false));
        Value expandedIndices = rewriter.create<Torch::AtenExpandOp>(
            loc, expandIndicesTy, flattenedIndices, expandIndicesSizeList,
            /*implicit=*/constFalse);

        // step 9. Flatten indices_shape[-1] dimensions of data.
        auto flattenDataTy = rewriter.getType<Torch::ValueTensorType>(
            flattenDataShape, dataTy.getOptionalDtype());
        Value endDim = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getI64IntegerAttr(indicesLastDim - 1));
        Value flattenedData = rewriter.create<Torch::AtenFlattenUsingIntsOp>(
            loc, flattenDataTy, data, constZero, endDim);

        // step 10. Now we have flattenedData, expandedIndices and
        // flattenedUpdates of same rank to perform scatter operation.
        auto scatterTy = rewriter.getType<Torch::ValueTensorType>(
            flattenDataShape, dataTy.getOptionalDtype());

        Value scatter;
        if (reduction == "none") {
          scatter = rewriter.create<Torch::AtenScatterSrcOp>(
              loc, scatterTy, flattenedData, /*axis=*/constZero,
              expandedIndices, flattenedUpdates);
        } else {
          Value cstReduction =
              rewriter.create<Torch::ConstantStrOp>(loc, reduction);
          Value constTrue = rewriter.create<Torch::ConstantBoolOp>(
              loc, rewriter.getType<Torch::BoolType>(),
              rewriter.getBoolAttr(true));
          scatter = rewriter.create<Torch::AtenScatterReduceTwoOp>(
              loc, scatterTy, flattenedData, /*axis=*/constZero,
              expandedIndices, flattenedUpdates, cstReduction,
              /*include_self=*/constTrue);
        }

        // step 11. Unflatten the collapsed data dims of scatter result.
        if (indicesLastDim == 1) {
          rewriter.replaceOp(binder.op, scatter);
          return success();
        }
        Value unflattenSizeList = rewriter.create<Torch::PrimListConstructOp>(
            loc, intListTy, dataDims);
        rewriter.replaceOpWithNewOp<Torch::AtenUnflattenIntOp>(
            binder.op, resultType, scatter, constZero, unflattenSizeList);
        return success();
      });
  // split to sequence
  // Arguments:
  // - input: the tensor to split
  // -Split(optional): Length of each output
  // Attributes:
  // - axis: the axis along which to split the input
  // - keepdims: to keep the split dimension or not. Ignored when 'split' is
  // specified Outputs:
  // - outputs: sequence of tensor
  //

  patterns.onOp(
      "SplitToSequence", 11,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Value self;
        Value split;
        int64_t axis;
        int64_t keepdims;
        Torch::ListType resultType;

        if (binder.op->getNumOperands() == 1)
          return rewriter.notifyMatchFailure(
              binder.op, "No of operands should be two.Keepdims attribute is "
                         "not yet implemented");

        if (binder.tensorOperandAtIndex(self, 0) ||
            binder.tensorListResultType(resultType) ||
            binder.s64IntegerAttr(keepdims, "keepdims", 1) ||
            binder.tensorOperandAtIndex(split, 1) ||
            binder.s64IntegerAttr(axis, "axis", 0))
          return rewriter.notifyMatchFailure(
              binder.op,
              "Not converting to AtenSplitToSequenceOp due to inputs ");

        Value axisValue = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getI64IntegerAttr(axis));
        auto splitTy = cast<Torch::ValueTensorType>(split.getType());

        if (!splitTy || !splitTy.hasSizes())
          return failure();

        auto splitSizes = splitTy.getSizes();
        unsigned splitDim = splitTy.getSizes().size();

        if (splitDim > 1)
          return rewriter.notifyMatchFailure(
              binder.op, "Split should be scalar or 1-D Tensor ");

        if (splitDim == 1) {
          if (splitSizes[0] == Torch::kUnknownSize) {
            return rewriter.notifyMatchFailure(
                binder.op, "Dynamic shapes for Split is not yet supported");
          } else if (splitSizes[0] <=
                     1) { // dealing with 1/0 element in 1-D tensor
            Value splitInt = rewriter.create<Torch::AtenItemOp>(
                binder.getLoc(), rewriter.getType<Torch::IntType>(), split);
            rewriter.replaceOpWithNewOp<Torch::AtenSplitTensorOp>(
                binder.op, resultType, self, splitInt, axisValue);
            return success();
          } else {
            // Handling multiple elment in split
            Value shapeList =
                createConstantIntList(binder, rewriter, splitSizes);
            rewriter.replaceOpWithNewOp<Torch::AtenSplitSizesOp>(
                binder.op, resultType, self, shapeList, axisValue);
            return success();
          }
        } else if (splitDim == 0) { // Handle 0-D tensor
          Value splitInt = rewriter.create<Torch::AtenItemOp>(
              binder.getLoc(), rewriter.getType<Torch::IntType>(), split);
          rewriter.replaceOpWithNewOp<Torch::AtenSplitTensorOp>(
              binder.op, resultType, self, splitInt, axisValue);
          return success();
        } else {
          return rewriter.notifyMatchFailure(
              binder.op, "Handling of this kind of inputs is not there");
        }
      });
  patterns.onOp(
      "Unique", 11, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Value input;
        int64_t axis, sorted;
        SmallVector<Type> resultTypes;

        if (binder.tensorOperand(input) ||
            binder.s64IntegerAttr(sorted, "sorted", 1) ||
            binder.tensorResultTypes(resultTypes))
          return failure();

        Value zero = rewriter.create<Torch::ConstantIntOp>(binder.getLoc(), 0);

        auto inputTy = cast<Torch::ValueTensorType>(input.getType());
        if (!inputTy.hasSizes()) {
          return rewriter.notifyMatchFailure(
              binder.op, "Expected input type to have sizes");
        }
        auto inputShape = inputTy.getSizes();
        int64_t inputDim = static_cast<int64_t>(inputShape.size());

        Value axisVal;
        SmallVector<int64_t> outputTensorSizes(inputDim);
        bool axisWasNone;
        if (!binder.optionalS64IntegerAttr(axis, "axis")) {
          if (axis < -1 * inputDim || axis > inputDim - 1)
            return rewriter.notifyMatchFailure(binder.op,
                                               "invalid value for axis");
          axisVal = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getI64IntegerAttr(axis));
          axisWasNone = false;
        } else {
          axisVal = zero;
          axisWasNone = true;
        }

        Value sortedVal = rewriter.create<Torch::ConstantBoolOp>(
            binder.getLoc(), rewriter.getBoolAttr(sorted));
        Value trueVal =
            rewriter.create<Torch::ConstantBoolOp>(binder.getLoc(), true);

        // The shape of inverse_indices is the same as input shape, but
        // resulTypes[2] must be used to avoid live value after conversion.
        Torch::ValueTensorType outputTy;
        outputTy = cast<Torch::ValueTensorType>(resultTypes[0]);
        Torch::ValueTensorType countsTy =
            cast<Torch::ValueTensorType>(resultTypes[3]);
        Torch::ValueTensorType inverseTy =
            cast<Torch::ValueTensorType>(resultTypes[2]);

        if (axisWasNone) {
          int64_t inputNumel = 1;
          for (auto elem : inputShape) {
            if (elem == Torch::kUnknownSize) {
              return rewriter.notifyMatchFailure(
                  binder.op,
                  "Expected all sizes in input shape to be statically known");
            }
            inputNumel *= elem;
          }
          auto flattenResultTy = rewriter.getType<Torch::ValueTensorType>(
              ArrayRef({inputNumel}), inputTy.getDtype());
          Value negativeOne =
              rewriter.create<Torch::ConstantIntOp>(binder.getLoc(), -1);
          input = rewriter.create<Torch::AtenFlattenUsingIntsOp>(
              binder.getLoc(), flattenResultTy, input, zero, negativeOne);
        }

        Torch::AtenUniqueDimOp intermResults =
            rewriter.create<Torch::AtenUniqueDimOp>(
                binder.getLoc(), outputTy, inverseTy, countsTy, input, axisVal,
                sortedVal, trueVal, trueVal);

        SmallVector<Value> uniqueResults = intermResults.getResults();

        // Calculate the indices where each of the unique elements first
        // appeared in the original input tensor. Also, the counts tensor and
        // the indices tensor have the same Dtype, int64, so reuse that here.
        auto arangeResultType = rewriter.getType<Torch::ValueTensorType>(
            ArrayRef<int64_t>({inputShape[0]}), countsTy.getOptionalDtype());

        Value inputDimZero = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(inputShape[0]));
        Value int64Type = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(4));
        Value noneVal = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());

        Value perm = rewriter.create<Torch::AtenArangeOp>(
            binder.getLoc(), arangeResultType, inputDimZero,
            /*dtype=*/int64Type,
            /*layout=*/noneVal, /*device=*/noneVal, /*pin_memory=*/noneVal);

        // Inverse has the same shape as input, but the dtype is not the same.
        Value flipDims = createConstantIntList(binder, rewriter, {0});
        Value inverse = rewriter.create<Torch::AtenFlipOp>(
            binder.getLoc(),
            inputTy.getWithSizesAndDtype(inputShape, countsTy.getDtype()),
            uniqueResults[1], flipDims);
        perm = rewriter.create<Torch::AtenFlipOp>(
            binder.getLoc(), cast<Torch::ValueTensorType>(perm.getType()), perm,
            flipDims);

        auto newInverseTy = rewriter.getType<Torch::ValueTensorType>(
            ArrayRef<int64_t>({outputTy.getSizes()[0]}), countsTy.getDtype());
        Value newInverseSize =
            createConstantIntList(binder, rewriter, {outputTy.getSizes()[0]});
        Value newInverse = rewriter.create<Torch::AtenNewEmptyOp>(
            binder.getLoc(), newInverseTy, inverse, newInverseSize,
            /*dtype=*/int64Type, /*layout=*/noneVal, /*device=*/noneVal,
            /*pin_memory=*/noneVal);

        Value firstOccurIndices = rewriter.create<Torch::AtenScatterSrcOp>(
            binder.getLoc(), resultTypes[1], newInverse, zero, inverse, perm);

        rewriter.replaceOp(binder.op, {uniqueResults[0], firstOccurIndices,
                                       uniqueResults[1], uniqueResults[2]});
        return success();
      });
  patterns.onOp(
      "TfIdfVectorizer", 9,
      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        llvm::SmallVector<int64_t> ngram_counts;
        llvm::SmallVector<int64_t> ngram_indexes;
        llvm::SmallVector<int64_t> pool_int64s;
        std::string mode;
        int64_t min_gram_length;
        int64_t max_gram_length;
        int64_t max_skip_count;
        Value input;
        Torch::ValueTensorType resultType;

        if (binder.s64IntegerArrayAttr(ngram_counts, "ngram_counts", {}) ||
            binder.s64IntegerArrayAttr(ngram_indexes, "ngram_indexes", {}) ||
            binder.s64IntegerArrayAttr(pool_int64s, "pool_int64s", {}) ||
            binder.customOpNameStringAttr(mode, "mode", "") ||
            binder.s64IntegerAttr(min_gram_length, "min_gram_length", 0) ||
            binder.s64IntegerAttr(max_gram_length, "max_gram_length", 0) ||
            binder.s64IntegerAttr(max_skip_count, "max_skip_count", 0) ||
            binder.tensorOperand(input) || binder.tensorResultType(resultType))
          return failure();

        if (mode != "TF")
          return rewriter.notifyMatchFailure(binder.op,
                                             "TF mode supported only");
        if (pool_int64s.size() == 0)
          return rewriter.notifyMatchFailure(
              binder.op, "pool_int64s empty, only integers supported");
        auto inputType = dyn_cast<Torch::ValueTensorType>(input.getType());
        auto inputSizes =
            dyn_cast<Torch::ValueTensorType>(input.getType()).getSizes();
        SmallVector<int64_t> inputShape(inputSizes);
        bool is_2d = (inputShape.size() > 1) ? true : false;
        if (is_2d && inputShape[0] == ShapedType::kDynamic)
          return rewriter.notifyMatchFailure(
              binder.op, "input batch dimension cannot be dynamic");
        int batch_size = (is_2d) ? inputShape[0] : 1;

        Value none = rewriter.create<Torch::ConstantNoneOp>(binder.getLoc());
        Value zero = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getType<Torch::IntType>(),
            rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));
        Value one = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(1));
        Value cstFalse = rewriter.create<Torch::ConstantBoolOp>(
            binder.getLoc(), rewriter.getBoolAttr(false));

        auto intType = rewriter.getType<Torch::IntType>();
        Value loopConditionTrue = rewriter.create<Torch::ConstantBoolOp>(
            binder.getLoc(), rewriter.getBoolAttr(true));
        Type loopIndexType = intType;
        // create a zero tensor for output
        SmallVector<int64_t> resultShape(resultType.getSizes());
        int64_t rank = resultShape.size();
        SmallVector<Value> zerosShapeValues;
        for (int j = 0; j < rank; j++) {
          Value dimSize = rewriter.create<Torch::ConstantIntOp>(
              binder.getLoc(), rewriter.getI64IntegerAttr(resultShape[j]));
          zerosShapeValues.push_back(dimSize);
        }
        Value zerosShapeList = rewriter.create<Torch::PrimListConstructOp>(
            binder.getLoc(),
            rewriter.getType<Torch::ListType>(
                rewriter.getType<Torch::IntType>()),
            zerosShapeValues);
        Value output = rewriter.create<Torch::AtenZerosOp>(
            binder.getLoc(), resultType, zerosShapeList, none, none, none,
            none);

        Value batchSize = rewriter.create<Torch::ConstantIntOp>(
            binder.getLoc(), rewriter.getI64IntegerAttr(batch_size));
        auto batchLoop = rewriter.create<Torch::PrimLoopOp>(
            binder.getLoc(), TypeRange({output.getType()}), batchSize,
            loopConditionTrue, ValueRange({output}));
        {
          PatternRewriter::InsertionGuard guard(rewriter);
          Block *batchLoopBody = rewriter.createBlock(
              &batchLoop.getRegion(), batchLoop.getRegion().begin(),
              TypeRange({loopIndexType, output.getType()}),
              {binder.getLoc(), binder.getLoc()});
          Value batchValue = batchLoopBody->getArgument(0);
          Value output = batchLoopBody->getArgument(1);
          Value outputForBatch = output;
          Value inputSequence = input;
          if (is_2d) {
            // get input sequence from input (ex: [[0,1],[2,3]] -> [[0,1]] ->
            // [0,1])
            SmallVector<int64_t> inputSequenceShape;
            inputSequenceShape.push_back(1);
            inputSequenceShape.push_back(inputShape[1]);
            auto inputSequenceType = rewriter.getType<Torch::ValueTensorType>(
                inputSequenceShape, inputType.getOptionalDtype());
            Value batchPlusOne = rewriter.create<Torch::AtenAddIntOp>(
                binder.getLoc(), batchValue, one);
            inputSequence = rewriter.create<Torch::AtenSliceTensorOp>(
                binder.getLoc(), inputSequenceType, input, /*dim=*/zero,
                batchValue, batchPlusOne, one);
            inputSequence = rewriter.create<Torch::AtenSqueezeDimOp>(
                binder.getLoc(),
                Torch::ValueTensorType::get(binder.op->getContext(),
                                            ArrayRef<int64_t>{inputShape[1]},
                                            inputType.getOptionalDtype()),
                inputSequence, zero);

            SmallVector<int64_t> outputForBatchShape;
            outputForBatchShape.push_back(1);
            outputForBatchShape.push_back(resultShape[1]);
            auto outputForBatchType = rewriter.getType<Torch::ValueTensorType>(
                outputForBatchShape, resultType.getOptionalDtype());
            outputForBatch = rewriter.create<Torch::AtenSliceTensorOp>(
                binder.getLoc(), outputForBatchType, output,
                /*dim=*/zero, batchValue, batchPlusOne, one);
            outputForBatch = rewriter.create<Torch::AtenSqueezeDimOp>(
                binder.getLoc(),
                Torch::ValueTensorType::get(binder.op->getContext(),
                                            ArrayRef<int64_t>{resultShape[1]},
                                            resultType.getOptionalDtype()),
                outputForBatch, zero);
          }
          // ngram_counts[j] records the starting position of ngrams within the
          // pool_int64's of length j+1. The loop below is iterating through the
          // different n-gram sizes
          // ngram_i keeps track of which ngram we are looking at in the pool.
          // The frequency of this ngram will be stored in the output tensor at
          // the position ngram_indexes[ngram_i]
          int ngram_i = 0;
          for (int j = 0; j < (int)ngram_counts.size(); j++) {
            int ngram_length = j + 1;
            int start_idx = ngram_counts[j];
            int end_idx = (j + 1) < (int)ngram_counts.size()
                              ? ngram_counts[j + 1]
                              : pool_int64s.size();
            if (j + 1 < min_gram_length || j + 1 > max_gram_length) {
              // progress the ngram counter for the skipped (j+1)grams
              ngram_i += (end_idx - start_idx) / ngram_length;
              continue;
            }

            Value ngramLength = rewriter.create<Torch::ConstantIntOp>(
                binder.getLoc(), rewriter.getI64IntegerAttr(ngram_length));
            for (int start = start_idx; start < end_idx;
                 start += ngram_length, ngram_i++) {
              Value count = rewriter.create<Torch::ConstantIntOp>(
                  binder.getLoc(), rewriter.getI64IntegerAttr(0));
              // for 1-grams, there is no skipping (skip = gap between
              // consecutive values in the n-gram pulled from the input
              // sequence), so we default to skip_count_bound = 1 in that case
              // to avoid repeating the same count multiple times.
              int skip_count_bound =
                  (ngram_length == 1) ? 1 : (max_skip_count + 1);
              Value skipCountBound = rewriter.create<Torch::ConstantIntOp>(
                  binder.getLoc(), intType,
                  rewriter.getI64IntegerAttr(skip_count_bound));
              // given a n-gram to search for, and the input sequence to search
              // in, we need to count how many times that n-gram appears in the
              // input for each skip between 0 and max_skip_count (inclusive).
              auto skipLoop = rewriter.create<Torch::PrimLoopOp>(
                  binder.getLoc(), TypeRange({count.getType()}), skipCountBound,
                  loopConditionTrue, ValueRange({count}));
              {
                PatternRewriter::InsertionGuard guard(rewriter);
                Block *skipLoopBody = rewriter.createBlock(
                    &skipLoop.getRegion(), skipLoop.getRegion().begin(),
                    TypeRange({loopIndexType, count.getType()}),
                    {binder.getLoc(), binder.getLoc()});
                Value skipCount = skipLoopBody->getArgument(0);
                Value skipCountPlusOne = rewriter.create<Torch::AtenAddIntOp>(
                    binder.getLoc(), skipCount, one);
                count = skipLoopBody->getArgument(1);

                // max_start_index =
                // inputSizes.back() - ((ngram_length - 1) * (skip_count + 1));
                // the index one higher than the last possible start index
                // without the input ngram going out of bounds
                Value seqLen = rewriter.create<Torch::ConstantIntOp>(
                    binder.getLoc(), intType,
                    rewriter.getI64IntegerAttr(inputSizes.back()));
                Value ngramLengthMinusOne =
                    rewriter.create<Torch::AtenSubIntOp>(binder.getLoc(),
                                                         ngramLength, one);
                Value ngramSkipLength = rewriter.create<Torch::AtenMulIntOp>(
                    binder.getLoc(), ngramLengthMinusOne, skipCountPlusOne);
                Value maxStartIndex = rewriter.create<Torch::AtenSubIntOp>(
                    binder.getLoc(), seqLen, ngramSkipLength);
                // This loop will extract each n-gram with the given skip_count
                // from the input sequence from start input index, and increment
                // the count if the n-gram matches the one gotten from the
                // pool_int64s
                auto countLoop = rewriter.create<Torch::PrimLoopOp>(
                    binder.getLoc(), TypeRange({count.getType()}),
                    maxStartIndex, loopConditionTrue, ValueRange({count}));
                {
                  PatternRewriter::InsertionGuard guard(rewriter);
                  Block *countLoopBody = rewriter.createBlock(
                      &countLoop.getRegion(), countLoop.getRegion().begin(),
                      TypeRange({loopIndexType, count.getType()}),
                      {binder.getLoc(), binder.getLoc()});

                  Value startInputIdx = countLoopBody->getArgument(0);
                  count = countLoopBody->getArgument(1);

                  // extract input ngram and compare to pool ngram
                  Torch::BaseTensorType inputSequenceType =
                      cast<Torch::BaseTensorType>(inputSequence.getType());
                  SmallVector<int64_t> selectSizes;
                  selectSizes.push_back(1);
                  Type selectResultType =
                      inputSequenceType.getWithSizesAndDtype(
                          llvm::ArrayRef(selectSizes),
                          inputSequenceType.getOptionalDtype());
                  Value foundNgram = rewriter.create<Torch::ConstantIntOp>(
                      binder.getLoc(), rewriter.getI64IntegerAttr(1));
                  for (int i = 0; i < ngram_length; i++) {
                    Value selectIndex = rewriter.create<Torch::ConstantIntOp>(
                        binder.getLoc(), rewriter.getType<Torch::IntType>(),
                        rewriter.getIntegerAttr(rewriter.getIntegerType(64),
                                                i));
                    selectIndex = rewriter.create<Torch::AtenMulIntOp>(
                        binder.getLoc(), selectIndex, skipCountPlusOne);
                    selectIndex = rewriter.create<Torch::AtenAddIntOp>(
                        binder.getLoc(), selectIndex, startInputIdx);
                    Value inputExtract =
                        rewriter.create<Torch::AtenSelectIntOp>(
                            binder.getLoc(), selectResultType, inputSequence,
                            zero, selectIndex);
                    Value inputNgram_i = rewriter.create<Torch::AtenItemOp>(
                        binder.getLoc(), rewriter.getType<Torch::IntType>(),
                        inputExtract);

                    Value poolNgram_i = rewriter.create<Torch::ConstantIntOp>(
                        binder.getLoc(),
                        rewriter.getI64IntegerAttr(pool_int64s[start + i]));
                    Value isEqual = rewriter.create<Torch::AtenEqIntOp>(
                        binder.getLoc(), inputNgram_i, poolNgram_i);
                    isEqual = rewriter.create<Torch::AtenIntBoolOp>(
                        binder.getLoc(), isEqual);
                    foundNgram = rewriter.create<Torch::AtenMulIntOp>(
                        binder.getLoc(), isEqual, foundNgram);
                  }

                  count = rewriter.create<Torch::AtenAddIntOp>(
                      binder.getLoc(), count, foundNgram);
                  rewriter.create<Torch::PrimLoopConditionOp>(
                      binder.getLoc(), loopConditionTrue, ValueRange({count}));
                }
                count = countLoop.getResult(0);
                rewriter.create<Torch::PrimLoopConditionOp>(
                    binder.getLoc(), loopConditionTrue, ValueRange({count}));
              }
              count = skipLoop.getResult(0);
              // insert count "tf" into output
              Value countFloat = rewriter.create<Torch::AtenFloatScalarOp>(
                  binder.getLoc(), count);
              Value dataList = rewriter.create<Torch::PrimListConstructOp>(
                  binder.getLoc(),
                  rewriter.getType<Torch::ListType>(
                      rewriter.getType<Torch::FloatType>()),
                  SmallVector<Value>{countFloat});
              Value cstDtype = rewriter.create<Torch::ConstantIntOp>(
                  binder.getLoc(), rewriter.getI64IntegerAttr(
                                       (int)torch_upstream::ScalarType::Float));
              SmallVector<int64_t> countShape{1};
              auto countType = rewriter.getType<Torch::ValueTensorType>(
                  countShape, resultType.getOptionalDtype());
              Value countTensor = rewriter.create<Torch::AtenTensorOp>(
                  binder.getLoc(), countType, dataList, /*dtype=*/cstDtype,
                  /*layout=*/none, /*requires_grad=*/cstFalse);

              Value insertStart = rewriter.create<Torch::ConstantIntOp>(
                  binder.getLoc(),
                  rewriter.getI64IntegerAttr(ngram_indexes[ngram_i]));
              Value insertEnd = rewriter.create<Torch::AtenAddIntOp>(
                  binder.getLoc(), insertStart, one);
              outputForBatch = rewriter.create<Torch::AtenSliceScatterOp>(
                  binder.getLoc(), outputForBatch.getType(), outputForBatch,
                  countTensor,
                  /*dim=*/zero, insertStart, insertEnd, /*step=*/one);
            } // start
          }
          if (is_2d) {
            Value batchPlusOne = rewriter.create<Torch::AtenAddIntOp>(
                binder.getLoc(), batchValue, one);
            outputForBatch = rewriter.create<Torch::AtenUnsqueezeOp>(
                binder.getLoc(),
                rewriter.getType<Torch::ValueTensorType>(
                    llvm::SmallVector<int64_t>{1, resultShape[1]},
                    resultType.getDtype()),
                outputForBatch, zero);
            output = rewriter.create<Torch::AtenSliceScatterOp>(
                binder.getLoc(), resultType, output, outputForBatch,
                /*dim=*/zero, batchValue, batchPlusOne, /*step=*/one);
          } else {
            output = outputForBatch;
          }
          rewriter.create<Torch::PrimLoopConditionOp>(
              binder.getLoc(), loopConditionTrue, ValueRange({output}));
        }
        output = batchLoop.getResult(0);
        rewriter.replaceOp(binder.op, output);
        return success();
      });
  patterns.onOp(
      "Scan", 11, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
        Location loc = binder.getLoc();
        SmallVector<Value> operands;
        int64_t numScanInputs;
        if (binder.tensorOperandsList(operands) || operands.size() == 0 ||
            binder.s64IntegerAttr(numScanInputs, "num_scan_inputs")) {
          return rewriter.notifyMatchFailure(binder.op,
                                             "Failed to get required inputs");
        }
        SmallVector<Type> resultTypes;
        if (binder.tensorResultTypes(resultTypes)) {
          return rewriter.notifyMatchFailure(binder.op,
                                             "result type bind failure");
        }
        Region *loopBodyIn;
        if (binder.getRegionAtIndex(loopBodyIn, 0)) {
          return rewriter.notifyMatchFailure(binder.op,
                                             "Failed getting LoopBody Region");
        }

        int64_t numInits = operands.size() - numScanInputs;
        SmallVector<Value> initVals(operands.begin(),
                                    operands.begin() + numInits);
        SmallVector<Value> scanInputs(operands.begin() + numInits,
                                      operands.end());
        if (scanInputs.size() < 1) {
          return rewriter.notifyMatchFailure(binder.op,
                                             "Expects at least one scan input");
        }

        Value constZero = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getI64IntegerAttr(0));
        Value constOne = rewriter.create<Torch::ConstantIntOp>(
            loc, rewriter.getI64IntegerAttr(1));
        SmallVector<Type> scanOutTypes;
        for (unsigned i = numInits; i < resultTypes.size(); i++) {
          auto scanOutTy = cast<Torch::ValueTensorType>(resultTypes[i]);
          // TODO: Handle dynamic result types.
          if (!scanOutTy.hasSizes() || !scanOutTy.areAllSizesKnown()) {
            return rewriter.notifyMatchFailure(
                binder.op, "Expects result type to be static");
          }
          Value sizeList =
              createConstantIntList(binder, rewriter, scanOutTy.getSizes());
          initVals.push_back(Torch::createInitTensor(rewriter, loc, scanOutTy,
                                                     constZero, sizeList));
          scanOutTypes.push_back(resultTypes[i]);
        }
        // Create torch.prim.Loop op.
        Value maxTripCount = rewriter.create<Torch::AtenSizeIntOp>(
            loc, scanInputs[0], constZero);
        auto constBoolTrue = rewriter.create<Torch::ConstantBoolOp>(
            binder.getLoc(), rewriter.getBoolAttr(true));
        auto primLoop = rewriter.create<Torch::PrimLoopOp>(
            loc, resultTypes, maxTripCount, constBoolTrue, initVals);
        rewriter.cloneRegionBefore(*loopBodyIn, primLoop.getRegion(),
                                   primLoop.getRegion().begin());

        // Insert index var as torch.int argument in the loop body, as
        // the primLoopOp loopBody expects torch.int as first argument.
        primLoop.getRegion().insertArgument(
            0u, rewriter.getType<Torch::IntType>(), loc);
        auto loopInd = primLoop.getRegion().getArgument(0);

        // The block arguments of onnx.scan needs to be replaced with
        // slice of scan inputs.
        rewriter.setInsertionPointToStart(&primLoop.getRegion().front());
        for (unsigned i = 0; i < numScanInputs; i++) {
          auto loopBlockArg =
              primLoop.getRegion().getArgument(numInits + 1 + i);
          Value extract = rewriter.create<Torch::AtenSelectIntOp>(
              loc, loopBlockArg.getType(), scanInputs[i], constZero, loopInd);
          loopBlockArg.replaceAllUsesWith(extract);
        }
        primLoop.getRegion().front().eraseArguments(numInits + 1,
                                                    /*count=*/numScanInputs);

        // Collect the output slices to form scan outputs and replace the
        // terminator.
        SmallVector<Location> locs(scanOutTypes.size(), loc);
        primLoop.getRegion().front().addArguments(scanOutTypes, locs);

        PatternRewriter::InsertionGuard guard(rewriter);
        Operation *terminator = primLoop.getRegion().front().getTerminator();
        auto terminatorOperands = terminator->getOperands();
        SmallVector<Value> resTerminatorOperands(
            terminatorOperands.begin(), terminatorOperands.begin() + numInits);
        SmallVector<Value> scanOutSlices(terminatorOperands.begin() + numInits,
                                         terminatorOperands.end());
        rewriter.setInsertionPoint(terminator);
        for (unsigned i = 0; i < scanOutSlices.size(); i++) {
          Value self = BlockArgument::Value(
              primLoop.getRegion().getArgument(numInits + 1 + i));
          FailureOr<Value> src = Torch::unsqueezeTensor(
              rewriter, binder.op, scanOutSlices[i], constZero);
          if (failed(src))
            return failure();
          Value scanOut = rewriter.create<Torch::AtenSliceScatterOp>(
              loc, scanOutTypes[i], self, src.value(), constZero,
              /*start=*/loopInd,
              /*end=*/loopInd, constOne);
          resTerminatorOperands.push_back(scanOut);
        }

        Value terminatorCond = constBoolTrue;
        rewriter.replaceOpWithNewOp<Torch::PrimLoopConditionOp>(
            terminator, terminatorCond, resTerminatorOperands);
        rewriter.replaceOp(binder.op, primLoop);
        return success();
      });
}