torch-mlir/lib/Conversion/TorchToStablehlo/GatherScatter.cpp

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Also available under a BSD-style license. See LICENSE.
//
//===----------------------------------------------------------------------===//

#include "torch-mlir/Conversion/TorchToStablehlo/TorchToStablehlo.h"

#include "../PassDetail.h"
#include "PopulatePatterns.h"

#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "stablehlo/dialect/ChloOps.h"
#include "stablehlo/dialect/StablehloOps.h"
#include "torch-mlir/Conversion/TorchToStablehlo/StablehloLegalizeUtils.h"
#include "torch-mlir/Conversion/Utils/Utils.h"
#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
#include "torch-mlir/Dialect/Torch/IR/TorchTypes.h"
#include "torch-mlir/Dialect/Torch/Utils/Utils.h"

using namespace mlir;
using namespace mlir::torch;
using namespace mlir::torch::Torch;
using namespace mlir::torch::torch_to_stablehlo;

namespace {
static Value createInitialValueForGatherScatterOp(Operation *op,
                                                  RankedTensorType constType,
                                                  PatternRewriter &rewriter) {
  if (!constType.hasStaticShape()) {
    return nullptr;
  }
  auto elementTy = constType.getElementType();
  if (isa<AtenEmbeddingBagPaddingIdxOp>(op)) {
    if (isa<mlir::FloatType>(elementTy)) {
      auto constAttr = DenseElementsAttr::get(
          constType, {APFloat::getZero(
                         cast<mlir::FloatType>(elementTy).getFloatSemantics(),
                         /*negative=*/false)});
      return rewriter.create<stablehlo::ConstantOp>(op->getLoc(), constType,
                                                    constAttr);
    } else if (isa<mlir::IntegerType>(elementTy) &&
               elementTy.getIntOrFloatBitWidth() != 8) {
      auto constAttr = DenseElementsAttr::get(
          constType, {APInt::getZero(elementTy.getIntOrFloatBitWidth())});
      return rewriter.create<stablehlo::ConstantOp>(op->getLoc(), constType,
                                                    constAttr);
    }
  }

  op->emitError("unimplemented lowering in "
                "createInitialValueForGatherScatterOp");
  return nullptr;
}

Value gatherTensorAlongSingleAxis(PatternRewriter &rewriter, Operation *op,
                                  Value input, Value indices, int64_t axis,
                                  size_t dimSizeIndexBits) {
  auto loc = op->getLoc();
  Type intType = rewriter.getIntegerType(dimSizeIndexBits);
  Value one = rewriter.create<arith::ConstantOp>(
      loc, rewriter.getIntegerAttr(intType, 1));

  // sliceSizes
  auto inputRankTy = dyn_cast<RankedTensorType>(input.getType());
  auto inputRank = inputRankTy.getRank();
  SmallVector<Value, 4> sliceSizes;
  sliceSizes.reserve(inputRank);
  for (int64_t r = 0; r < inputRank; ++r) {
    if (r == axis) {
      sliceSizes.push_back(one);
    } else {
      sliceSizes.push_back(rewriter.create<arith::IndexCastOp>(
          loc, intType, rewriter.create<tensor::DimOp>(loc, input, r)));
    }
  }
  auto sliceSizesTensor =
      rewriter.create<tensor::FromElementsOp>(loc, sliceSizes);

  // offsetDims
  SmallVector<int64_t, 4> offsetDims;
  offsetDims.reserve(inputRank);
  for (int64_t r = 0; r < axis; ++r) {
    offsetDims.push_back(r);
  }
  auto indicesRankTy = dyn_cast<RankedTensorType>(indices.getType());
  auto indicesRank = indicesRankTy.getRank();
  for (int64_t r = axis + 1; r < inputRank; ++r) {
    offsetDims.push_back(r + indicesRank - 1);
  }

  // collapsedSliceDims
  SmallVector<int64_t, 4> collapsedSliceDims(1, axis);
  // startIndexMap
  SmallVector<int64_t, 4> startIndexMap(1, axis);
  // indexVecDim
  int64_t indexVecDim = indicesRank;
  auto dimsAttr = stablehlo::GatherDimensionNumbersAttr::get(
      rewriter.getContext(),
      /*offsetDims=*/offsetDims,
      /*collapsedSliceDims=*/collapsedSliceDims,
      /*operandBatchingDims=*/{},
      /*startIndicesBatchingDims=*/{},
      /*startIndexMap=*/startIndexMap,
      /*indexVecDim=*/indexVecDim);

  // outputShape = input.shape[:axis] + indices.shape +
  //                input.shape[axis + 1:]
  auto inputShape = inputRankTy.getShape();
  auto indicesShape = indicesRankTy.getShape();
  SmallVector<int64_t, 4> outputShape(inputShape.begin(),
                                      inputShape.begin() + axis);
  outputShape.insert(outputShape.end(), indicesShape.begin(),
                     indicesShape.end());
  outputShape.insert(outputShape.end(), inputShape.begin() + axis + 1,
                     inputShape.end());

  // create output tensor type
  auto outputTy =
      RankedTensorType::get(outputShape, inputRankTy.getElementType());
  return rewriter
      .create<stablehlo::DynamicGatherOp>(loc, outputTy, input, indices,
                                          sliceSizesTensor, dimsAttr)
      .getResult();
}

template <typename OpTy, typename OpAdaptor>
LogicalResult prepareArgumentsForSlicingOp(OpTy op, OpAdaptor adaptor,
                                           ConversionPatternRewriter &rewriter,
                                           SmallVector<Value> &resultShape,
                                           SmallVector<Value> &offsets,
                                           SmallVector<Value> &strides) {
  Location loc = op.getLoc();
  auto input = adaptor.getSelf();
  RankedTensorType inputType = cast<RankedTensorType>(input.getType());

  Value zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
  Value one = rewriter.create<arith::ConstantIndexOp>(loc, 1);

  int64_t dim;
  if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim)))
    return op->emitError("unimplemented: dim is not constant");

  int64_t inputRank = inputType.getRank();
  dim = toPositiveDim(dim, inputRank);
  if (!isValidDim(dim, inputRank))
    return rewriter.notifyMatchFailure(op, "dim is statically invalid");

  SmallVector<Value> inputShape = getTensorSizes(rewriter, loc, input);
  Value dimSize = inputShape[dim];

  Value torchTypeStart = op.getStart();
  Value torchTypeEnd = op.getEnd();
  Value builtinTypeStart = adaptor.getStart();
  Value builtinTypeEnd = adaptor.getEnd();

  if (isa<OptionalType>(torchTypeStart.getType()) ||
      isa<OptionalType>(torchTypeEnd.getType()))
    return rewriter.notifyMatchFailure(op, "unimplemented optional type arg");

  int64_t step;
  if (!matchPattern(op.getStep(), m_TorchConstantInt(&step))) {
    if (!isa<Torch::NoneType>(op.getStep().getType()))
      return op->emitError("unimplemented: step is not constant");
    step = 1;
  }

  Value start = toPositiveValidDim(rewriter, loc, torchTypeStart,
                                   builtinTypeStart, zero, dimSize);
  Value end = toPositiveValidDim(rewriter, loc, torchTypeEnd, builtinTypeEnd,
                                 dimSize, dimSize);

  // end >= start ? end : start
  Value endSgeStart = rewriter.create<arith::CmpIOp>(
      loc, arith::CmpIPredicate::sge, end, start);
  end = rewriter.create<arith::SelectOp>(loc, endSgeStart, end, start);
  Value stepIndex = rewriter.create<arith::ConstantIndexOp>(loc, step);

  // Slice logic: resultSize = floordiv(end - start + step - 1,  step)
  resultShape = getTensorSizes(rewriter, loc, input);
  Value len = rewriter.create<arith::SubIOp>(loc, end, start);
  Value resultSize = rewriter.create<arith::AddIOp>(loc, len, stepIndex);
  resultSize = rewriter.create<arith::SubIOp>(loc, resultSize, one);
  resultSize = rewriter.create<arith::FloorDivSIOp>(loc, resultSize, stepIndex);
  resultShape[dim] = resultSize;

  strides.resize(inputType.getRank(), one);
  offsets.resize(inputType.getRank(), zero);

  offsets[dim] = start;
  strides[dim] = rewriter.create<arith::MulIOp>(loc, strides[dim], stepIndex);
  return success();
}
} // namespace

namespace {
// A helper function used to generate stablehlo's ScatterIndices or
// GatherIndices from torch's indices, usually appear in torch ops, like
// aten.index.Tensor or aten.input_put A usage example is as follow: Input: [[1,
// 2, 3],
//         [4, 5, 6],
//         [7, 8, 9]]
// Indices[0]: [[0, 0, 0],
//              [2, 2, 0]]
// Indices[1]: [[2],
//              [1]]
// Step 1: broadcast indices tensors
// Indices[0]: [[0, 0, 0],
//              [2, 2, 0]]
// Indices[1]: [[2, 2, 2],
//              [1, 1, 1]]
// Step 2: concat index tensors at a unsqueezed -1 dimension.
// Indices: [[[0, 2], [0, 2], [0, 2]],
//           [[2, 1], [2, 1], [0, 1]]]
FailureOr<Value> broadcastAndConcatIndices(Operation *op,
                                           ConversionPatternRewriter &rewriter,
                                           SmallVector<Value> indexTensors,
                                           size_t dimSizeIndexBits,
                                           int &maxIndexRank) {
  // Step 1: broadcast indices tensors
  bool allIndexStaticShape = true;

  // concat index tensor into to indices tensor for concat
  for (size_t i = 0; i < indexTensors.size(); i++) {
    auto indexTensor = indexTensors[i];
    auto indexTensorType = cast<RankedTensorType>(indexTensor.getType());
    for (int64_t size : makeShapeTorchCompatible(indexTensorType.getShape())) {
      if (size == kUnknownSize)
        allIndexStaticShape = false;
    }
    maxIndexRank = std::max(maxIndexRank, (int)indexTensorType.getRank());
  }

  auto bcastSizeInfo = hlo::getBroadcastResultShape(rewriter, op, indexTensors,
                                                    dimSizeIndexBits);
  if (failed(bcastSizeInfo)) {
    return failure();
  }
  Value bcastSizeTensor = (*bcastSizeInfo).first;
  auto indicesShape = (*bcastSizeInfo).second;
  SmallVector<int64_t> expandShape(indicesShape.begin(), indicesShape.end());
  SmallVector<int64_t> concatShape(indicesShape.begin(), indicesShape.end());
  expandShape.push_back(1);
  concatShape.push_back(indexTensors.size());

  SmallVector<Value> broadcastedIndices;
  Type indexElemTy = rewriter.getI64Type();
  RankedTensorType bcastIndexType =
      RankedTensorType::get(indicesShape, indexElemTy);
  for (auto indexTensor : indexTensors) {
    Value bcastVal;
    RankedTensorType reshapeType =
        RankedTensorType::get(expandShape, indexElemTy);
    if (allIndexStaticShape) {
      bcastVal = hlo::promoteAndBroadcast(rewriter, indexTensor, bcastIndexType,
                                          std::nullopt);
      bcastVal = rewriter.create<stablehlo::ReshapeOp>(op->getLoc(),
                                                       reshapeType, bcastVal);
    } else {
      bcastVal = hlo::promoteAndBroadcast(rewriter, indexTensor, bcastIndexType,
                                          bcastSizeTensor);
      auto bcastValShapeTensorVec =
          *hlo::getDimSizesOfTensor(rewriter, op, bcastVal, dimSizeIndexBits);
      bcastValShapeTensorVec.push_back(rewriter.create<mlir::arith::ConstantOp>(
          op->getLoc(), rewriter.getIntegerAttr(
                            rewriter.getIntegerType(dimSizeIndexBits), 1)));
      Value bcastValShapeTensor = rewriter
                                      .create<tensor::FromElementsOp>(
                                          op->getLoc(), bcastValShapeTensorVec)
                                      .getResult();
      bcastVal = rewriter.create<stablehlo::DynamicReshapeOp>(
          op->getLoc(), reshapeType, bcastVal, bcastValShapeTensor);
    }
    broadcastedIndices.push_back(bcastVal);
  }

  // Step 2: concat index tensors at a unsqueezed -1 dimension.
  Value finalIndexTensor = broadcastedIndices[0];
  if (broadcastedIndices.size() > 1) {
    RankedTensorType concatTy = RankedTensorType::get(concatShape, indexElemTy);
    finalIndexTensor = rewriter.create<stablehlo::ConcatenateOp>(
        op->getLoc(), concatTy, ValueRange(broadcastedIndices),
        concatShape.size() - 1);
  }
  return finalIndexTensor;
}
} // namespace

// Ref:
// https://pytorch.org/docs/stable/generated/torch.nn.functional.embedding.html
// padding_idx (int, optional)
//  – If specified, the entries at padding_idx do not contribute to the
//  gradient; therefore, the embedding vector at padding_idx is not updated
//  during training, i.e. it remains as a fixed “pad”.
// scale_grad_by_freq (boolean, optional)
//  – If given, this will scale gradients by the inverse of frequency of the
//  words in the mini-batch. Default False.
// sparse (bool, optional)
//  – If True, gradient w.r.t. weight matrix will be a sparse tensor.
template <>
LogicalResult ConvertAtenOp<AtenEmbeddingOp>::matchAndRewrite(
    AtenEmbeddingOp op, OpAdaptor adaptor,
    ConversionPatternRewriter &rewriter) const {
  auto weight = adaptor.getWeight();
  auto weightTy = cast<RankedTensorType>(weight.getType());
  if (!weightTy)
    return op.emitError("only ranked tensor types are supported");

  int64_t padding_idx;
  if (!matchPattern(op.getPaddingIdx(), m_TorchConstantInt(&padding_idx)))
    return rewriter.notifyMatchFailure(
        op, "only constant padding_idx is currently supported");

  bool scale_grad_by_freq;
  if (!matchPattern(op.getScaleGradByFreq(),
                    m_TorchConstantBool(&scale_grad_by_freq)))
    return rewriter.notifyMatchFailure(
        op, "only constant scale_grad_by_freq is currently supported");
  if (scale_grad_by_freq)
    return rewriter.notifyMatchFailure(
        op, "scale gradients is currently not supported");
  bool sparse;
  if (!matchPattern(op.getSparse(), m_TorchConstantBool(&sparse)))
    return rewriter.notifyMatchFailure(
        op, "only constant sparse is currently supported");
  if (sparse)
    return rewriter.notifyMatchFailure(
        op, "sparse gradients is currently not supported");

  Value output = gatherTensorAlongSingleAxis(
      rewriter, op, weight, adaptor.getIndices(), 0, options.dimSizeIndexBits);
  rewriter.replaceOpWithNewOp<stablehlo::ConvertOp>(
      op, getTypeConverter()->convertType(op.getType()), output);

  return success();
}

template <>
LogicalResult ConvertAtenOp<AtenEmbeddingBagPaddingIdxOp>::matchAndRewrite(
    AtenEmbeddingBagPaddingIdxOp op, OpAdaptor adaptor,
    ConversionPatternRewriter &rewriter) const {
  Location loc = op->getLoc();
  Value weight = adaptor.getWeight();
  Value indices = adaptor.getIndices();
  Value offsets = adaptor.getOffsets();

  auto weightTy = cast<RankedTensorType>(weight.getType());
  if (weightTy && weightTy.hasStaticShape() && weightTy.getRank() != 2)
    return rewriter.notifyMatchFailure(
        op, "weight must be rank 2 tensor with static shapes");

  auto indicesTy = cast<RankedTensorType>(indices.getType());
  if (indicesTy && indicesTy.hasStaticShape() && indicesTy.getRank() != 1)
    return rewriter.notifyMatchFailure(
        op, "indices must be a vector with static shapes");

  auto offsetsTy = cast<RankedTensorType>(offsets.getType());
  if (offsetsTy && offsetsTy.getRank() != 1 && offsetsTy.hasStaticShape() &&
      offsetsTy.getShape()[0] == 1)
    return rewriter.notifyMatchFailure(
        op, "offsets must be a vector with static shape equal to 1");

  if (!isa<Torch::NoneType>(op.getPaddingIdx().getType()))
    return rewriter.notifyMatchFailure(
        op, "Unimplemented: padding_idx should be none");

  if (!isa<Torch::NoneType>(op.getPerSampleWeights().getType()))
    return rewriter.notifyMatchFailure(
        op, "Unimplemented: per_sample_weights should be none");

  bool includeLastOffset;
  if (!matchPattern(op.getIncludeLastOffset(),
                    m_TorchConstantBool(&includeLastOffset))) {
    return rewriter.notifyMatchFailure(
        op, "include_last_offset is expected to be a constant boolean value.");
  }
  if (includeLastOffset)
    return rewriter.notifyMatchFailure(
        op, "include_last_offset is currently not supported");

  bool scaleGradByFreq;
  if (!matchPattern(op.getScaleGradByFreq(),
                    m_TorchConstantBool(&scaleGradByFreq)))
    return rewriter.notifyMatchFailure(
        op, "only constant scale_grad_by_freq is currently supported");
  if (scaleGradByFreq)
    return rewriter.notifyMatchFailure(
        op, "scale gradients is currently not supported");

  bool sparse;
  if (!matchPattern(op.getSparse(), m_TorchConstantBool(&sparse)))
    return rewriter.notifyMatchFailure(
        op, "only constant sparse is currently supported");
  if (sparse)
    return rewriter.notifyMatchFailure(
        op, "sparse gradients is currently not supported");

  int64_t modeInt;
  if (!matchPattern(op.getMode(), m_TorchConstantInt(&modeInt))) {
    return rewriter.notifyMatchFailure(
        op, "mode is expected to be a constant integer value.");
  }
  if (modeInt != torch_upstream::EmbeddingBagMode::MODE_SUM) {
    return rewriter.notifyMatchFailure(op,
                                       "Unimplemented: Mean and Max mode are "
                                       "not supported yet for EmbeddingBag.");
  }

  const auto &options =
      ConvertAtenOp<AtenEmbeddingBagPaddingIdxOp>::getOptions();
  auto weightDimSizes =
      *hlo::getDimSizesOfTensor(rewriter, op, weight, options.dimSizeIndexBits);
  auto indicesDimSizes = *hlo::getDimSizesOfTensor(rewriter, op, indices,
                                                   options.dimSizeIndexBits);
  auto offsetsDimSizes = *hlo::getDimSizesOfTensor(rewriter, op, offsets,
                                                   options.dimSizeIndexBits);

  Value gatherOutput = gatherTensorAlongSingleAxis(
      rewriter, op, weight, indices, 0, options.dimSizeIndexBits);

  Type elementTy = weightTy.getElementType();
  auto constType = RankedTensorType::get({}, elementTy);
  Value initValue =
      createInitialValueForGatherScatterOp(op, constType, rewriter);
  if (!initValue)
    return failure();

  auto stablehloReduceOp = rewriter.create<stablehlo::ReduceOp>(
      op.getLoc(), gatherOutput, initValue, rewriter.getDenseI64ArrayAttr({0}),
      elementTy);

  Region &region = stablehloReduceOp.getBody();
  Block &block = region.emplaceBlock();
  auto blockArgumentTy = RankedTensorType::get({}, elementTy);

  block.addArgument(blockArgumentTy, op->getLoc());
  block.addArgument(blockArgumentTy, op->getLoc());

  auto *firstArgument = block.args_begin();
  auto secondArgument = block.args_rbegin();

  {
    OpBuilder::InsertionGuard guard(rewriter);
    rewriter.setInsertionPointToStart(&block);
    Value addResult = rewriter.create<stablehlo::AddOp>(
        op->getLoc(), blockArgumentTy, *firstArgument, *secondArgument);
    rewriter.create<stablehlo::ReturnOp>(op->getLoc(), addResult);
  }

  auto outShapeInfo = hlo::getDimIndexOfTensor(rewriter, op, weight);
  if (failed(outShapeInfo)) {
    return rewriter.notifyMatchFailure(
        op, "failed to get dimension sizes of the input");
  }
  auto outShapeVec = *outShapeInfo;
  auto one = rewriter.create<mlir::arith::ConstantOp>(
      op->getLoc(), rewriter.getIntegerAttr(rewriter.getIndexType(), 1));
  outShapeVec[0] = one;
  auto outShapeTensor =
      rewriter.create<mlir::tensor::FromElementsOp>(op->getLoc(), outShapeVec);
  auto resultA = rewriter.create<stablehlo::DynamicReshapeOp>(
      loc, getTypeConverter()->convertType(op.getType(0)),
      stablehloReduceOp.getResult(0), outShapeTensor);

  RankedTensorType resultType = cast<RankedTensorType>(
      getTypeConverter()->convertType(op->getResult(1).getType()));
  Value resultB =
      createInitialValueForGatherScatterOp(op, resultType, rewriter);
  if (!resultB)
    return failure();

  resultType = cast<RankedTensorType>(
      getTypeConverter()->convertType(op->getResult(2).getType()));
  Value resultC =
      createInitialValueForGatherScatterOp(op, resultType, rewriter);
  if (!resultC)
    return failure();

  resultType = cast<RankedTensorType>(
      getTypeConverter()->convertType(op->getResult(3).getType()));
  Value resultD =
      createInitialValueForGatherScatterOp(op, resultType, rewriter);
  if (!resultD)
    return failure();

  rewriter.replaceOp(op, {resultA, resultB, resultC, resultD});
  return success();
}

template <>
LogicalResult ConvertAtenOp<AtenIndexSelectOp>::matchAndRewrite(
    AtenIndexSelectOp op, OpAdaptor adaptor,
    ConversionPatternRewriter &rewriter) const {
  auto self = adaptor.getSelf();
  auto selfTy = cast<RankedTensorType>(self.getType());
  if (!selfTy)
    return op.emitError("only ranked tensor types are supported");
  int64_t dim;
  if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim)))
    return rewriter.notifyMatchFailure(
        op, "only constant dim is currently supported");
  int64_t inputRank = selfTy.getRank();
  dim = toPositiveDim(dim, inputRank);
  if (!isValidDim(dim, inputRank))
    return rewriter.notifyMatchFailure(op, "dim is statically invalid");

  Value output = gatherTensorAlongSingleAxis(
      rewriter, op, self, adaptor.getIndex(), dim, options.dimSizeIndexBits);

  rewriter.replaceOpWithNewOp<stablehlo::ConvertOp>(
      op, getTypeConverter()->convertType(op.getType()), output);

  return success();
}

// AtenGatherOp
template <>
LogicalResult ConvertAtenOp<AtenGatherOp>::matchAndRewrite(
    AtenGatherOp op, OpAdaptor adaptor,
    ConversionPatternRewriter &rewriter) const {
  Location loc = op->getLoc();
  Value input = adaptor.getSelf();
  Value index = adaptor.getIndex();
  auto inputType = cast<RankedTensorType>(input.getType());
  auto indexType = cast<RankedTensorType>(index.getType());
  auto indexElemType = indexType.getElementType();

  if (indexType.getRank() != inputType.getRank()) {
    return op.emitError("`index` and `input` param should have the same rank");
  }
  int64_t dim;
  if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim))) {
    return rewriter.notifyMatchFailure(
        op, "only constant int `dim` param supported");
  }
  dim = toPositiveDim(dim, inputType.getRank());
  if (!isValidDim(dim, inputType.getRank())) {
    return rewriter.notifyMatchFailure(op, "invalid `dim` param detected");
  }

  bool sparseGrad = false;
  if (!matchPattern(op.getSparseGrad(), m_TorchConstantBool(&sparseGrad))) {
    return rewriter.notifyMatchFailure(
        op, "only constant boolean `sparse_grad` param supported");
  }

  auto indexShapeInfo = hlo::getDimIndexOfTensor(rewriter, op, index);
  if (failed(indexShapeInfo)) {
    return rewriter.notifyMatchFailure(
        op, "failed to get dim sizes of `index` param");
  }
  auto one = rewriter.create<arith::ConstantOp>(
      loc, rewriter.getIntegerAttr(rewriter.getIndexType(), 1));
  auto toConcatIndexShapeValueVec = *indexShapeInfo;
  toConcatIndexShapeValueVec.push_back(one);
  auto toConcatIndexShape =
      rewriter.create<tensor::FromElementsOp>(loc, toConcatIndexShapeValueVec);

  auto indexShape = indexType.getShape();
  SmallVector<int64_t> toConcatIndexShapeVec(indexShape.begin(),
                                             indexShape.end());
  toConcatIndexShapeVec.push_back(1);
  RankedTensorType toConcatIndexType =
      RankedTensorType::get(toConcatIndexShapeVec, indexElemType);

  SmallVector<Value> toConcat;
  for (int64_t i = 0; i < inputType.getRank(); ++i) {
    if (i == dim) {
      toConcat.push_back(rewriter.create<stablehlo::DynamicReshapeOp>(
          loc, toConcatIndexType, index, toConcatIndexShape));
    } else {
      toConcat.push_back(rewriter.create<stablehlo::DynamicIotaOp>(
          loc, toConcatIndexType, toConcatIndexShape,
          rewriter.getI64IntegerAttr(i)));
    }
  }
  auto gatherIndicies = rewriter.create<stablehlo::ConcatenateOp>(
      loc, toConcat, static_cast<uint64_t>(inputType.getRank()));
  SmallVector<int64_t> sliceSizes(inputType.getRank(), 1);

  int64_t indexVecDim = inputType.getRank();
  SmallVector<int64_t> collapsedDims;
  SmallVector<int64_t> startIndexMap;
  for (int64_t i = 0; i < inputType.getRank(); ++i) {
    collapsedDims.push_back(i);
    startIndexMap.push_back(i);
  }

  auto dimsAttr = stablehlo::GatherDimensionNumbersAttr::get(
      rewriter.getContext(),
      /*offsetDims=*/{},
      /*collapsedSliceDims=*/collapsedDims,
      /*operandBatchingDims=*/{},
      /*startIndicesBatchingDims=*/{},
      /*startIndexMap=*/startIndexMap,
      /*indexVecDim=*/indexVecDim);

  rewriter.replaceOpWithNewOp<stablehlo::GatherOp>(
      op, input, gatherIndicies, dimsAttr,
      rewriter.getDenseI64ArrayAttr(sliceSizes));
  return success();
}

// AtenSliceScatterOp
template <>
LogicalResult ConvertAtenOp<AtenSliceScatterOp>::matchAndRewrite(
    AtenSliceScatterOp op, OpAdaptor adaptor,
    ConversionPatternRewriter &rewriter) const {

  if (failed(verifyLinalgCompatibleTypes(op, rewriter)))
    return failure();

  Location loc = op.getLoc();
  const TypeConverter *typeConverter = getTypeConverter();

  auto input = adaptor.getSelf();
  RankedTensorType inputType = cast<RankedTensorType>(input.getType());

  RankedTensorType resultType = cast<RankedTensorType>(
      typeConverter->convertType(op->getResult(0).getType()));

  int64_t dim;
  if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim))) {
    return op->emitError("unimplemented: dim is not constant");
  }

  int64_t inputRank = inputType.getRank();
  dim = toPositiveDim(dim, inputRank);
  if (!isValidDim(dim, inputRank)) {
    return rewriter.notifyMatchFailure(op, "dim is statically invalid");
  }

  auto inputShape = inputType.getShape();
  auto dimSize = inputShape[dim];
  int64_t step;
  if (!matchPattern(op.getStep(), m_TorchConstantInt(&step))) {
    return op->emitError("unimplemented: step is not constant");
  }

  int64_t start;
  if (!matchPattern(op.getStart(), m_TorchConstantInt(&start))) {
    return op->emitError("unimplemented: start is not constant");
  } else if (ShapedType::isDynamic(dimSize) and start < 0) {
    return op->emitError("unimplemented: not support dynamic dimSize when "
                         "start smaller than 0.");
  }
  start = start >= 0 ? start : dimSize + start;

  int64_t end;
  if (!matchPattern(op.getEnd(), m_TorchConstantInt(&end))) {
    return op->emitError("unimplemented: end is not constant");
  } else if (ShapedType::isDynamic(dimSize) and end < 0) {
    return op->emitError(
        "unimplemented: not support dynamic dimSize when end smaller than 0.");
  }
  end = end >= 0 ? end : dimSize + end;

  int64_t size = 0;
  std::vector<int64_t> indicesVec;
  for (int64_t i = start; i < end; i += step) {
    indicesVec.push_back(i);
    ++size;
  }
  ArrayRef<int64_t> indices(indicesVec);
  std::vector<int64_t> tmp_shape = {size, 1};
  ArrayRef<int64_t> shape(tmp_shape);
  RankedTensorType constType =
      RankedTensorType::get(shape, rewriter.getIntegerType(64));
  auto constAttr = DenseElementsAttr::get(
      RankedTensorType::get(shape, rewriter.getIntegerType(64)), indices);
  auto const_op =
      rewriter.create<stablehlo::ConstantOp>(loc, constType, constAttr);
  Value scatterIndices = const_op.getResult();

  SmallVector<int64_t> updateWindowDims;
  for (int64_t i = 0; i < inputType.getRank(); ++i) {
    if (i == dim) {
      continue;
    }
    updateWindowDims.push_back(i);
  }

  auto scatterArgs = stablehlo::ScatterDimensionNumbersAttr::get(
      rewriter.getContext(),
      /*updateWindowDims=*/updateWindowDims,
      /*insertedWindowDims=*/{dim},
      /*inputBatchingDims=*/{},
      /*scatterIndicesBatchingDims=*/{},
      /*scatterDimsToOperandDim=*/{dim},
      /*indexVectorDim=*/1);

  Value src = adaptor.getSrc();
  auto scatterOp = rewriter.create<stablehlo::ScatterOp>(
      loc, resultType, input, scatterIndices, src, scatterArgs, false, false);

  Block &block = scatterOp.getUpdateComputation().emplaceBlock();
  auto blockArgumentType =
      RankedTensorType::get({}, inputType.getElementType());
  block.addArgument(blockArgumentType, loc);
  block.addArgument(blockArgumentType, loc);

  auto *lhs = block.args_begin();
  auto *rhs = std::next(lhs);
  {
    OpBuilder::InsertionGuard guard(rewriter);
    rewriter.setInsertionPointToStart(&block);
    rewriter.create<stablehlo::ReturnOp>(loc, *rhs);
  }

  rewriter.replaceOp(op, scatterOp.getResults());

  return success();
}

template <typename AtenOpT, int reduceType>
class ConvertAtenScatterOp : public ConvertAtenOp<AtenOpT> {
public:
  using ConvertAtenOp<AtenOpT>::ConvertAtenOp;
  using OpAdaptor = typename AtenOpT::Adaptor;
  LogicalResult
  matchAndRewrite(AtenOpT op, OpAdaptor adaptor,
                  ConversionPatternRewriter &rewriter) const override {
    Location loc = op->getLoc();
    Value input = adaptor.getSelf();
    Value index = adaptor.getIndex();
    Value src = adaptor.getSrc();
    auto inputType = cast<RankedTensorType>(input.getType());
    auto indexType = cast<RankedTensorType>(index.getType());
    auto srcType = cast<RankedTensorType>(src.getType());
    auto indexElemType = indexType.getElementType();

    if (indexType.getRank() != inputType.getRank() ||
        inputType.getRank() != srcType.getRank()) {
      return op.emitError(
          "`index`, `input` and `src` param should have the same rank");
    }
    int64_t dim;
    if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim))) {
      return rewriter.notifyMatchFailure(
          op, "only constant int `dim` param supported");
    }
    dim = toPositiveDim(dim, inputType.getRank());
    if (!isValidDim(dim, inputType.getRank())) {
      return rewriter.notifyMatchFailure(op, "invalid `dim` param detected");
    }

    auto indexShapeInfo = hlo::getDimIndexOfTensor(rewriter, op, index);
    if (failed(indexShapeInfo)) {
      return rewriter.notifyMatchFailure(
          op, "failed to get dim sizes of `index` param");
    }

    // slice src tensor to have the same shape bound of index tensor in the
    // leading dimensions. PyTorch has guaranteed that src tensor size will not
    // be smaller than that of index tensor. REF:
    // https://pytorch.org/docs/stable/generated/torch.Tensor.scatter_.html#torch.Tensor.scatter_
    auto zero = rewriter.create<arith::ConstantOp>(
        loc, rewriter.getIntegerAttr(rewriter.getIndexType(), 0));
    auto one = rewriter.create<arith::ConstantOp>(
        loc, rewriter.getIntegerAttr(rewriter.getIndexType(), 1));
    SmallVector<Value> sliceIndicies(srcType.getRank(), zero);
    SmallVector<Value> sliceStrides(srcType.getRank(), one);

    auto sliceIndiciesValue =
        rewriter.create<tensor::FromElementsOp>(loc, sliceIndicies);
    auto sliceStridesValue =
        rewriter.create<tensor::FromElementsOp>(loc, sliceStrides);
    auto sliceLimitIndiciesValue =
        rewriter.create<tensor::FromElementsOp>(loc, *indexShapeInfo);

    auto newSrcType =
        RankedTensorType::get(indexType.getShape(), srcType.getElementType());
    src = rewriter.create<stablehlo::RealDynamicSliceOp>(
        loc, newSrcType, src, sliceIndiciesValue, sliceLimitIndiciesValue,
        sliceStridesValue);

    // generate scatter indicies for stablehlo::Scatter op.
    auto toConcatIndexShapeValueVec = *indexShapeInfo;
    toConcatIndexShapeValueVec.push_back(one);
    auto toConcatIndexShape = rewriter.create<tensor::FromElementsOp>(
        loc, toConcatIndexShapeValueVec);

    auto indexShape = indexType.getShape();
    SmallVector<int64_t> toConcatIndexShapeVec(indexShape.begin(),
                                               indexShape.end());
    toConcatIndexShapeVec.push_back(1);
    RankedTensorType toConcatIndexType =
        RankedTensorType::get(toConcatIndexShapeVec, indexElemType);

    SmallVector<Value> toConcat;
    for (int64_t i = 0; i < inputType.getRank(); ++i) {
      if (i == dim) {
        toConcat.push_back(rewriter.create<stablehlo::DynamicReshapeOp>(
            loc, toConcatIndexType, index, toConcatIndexShape));
      } else {
        toConcat.push_back(rewriter.create<stablehlo::DynamicIotaOp>(
            loc, toConcatIndexType, toConcatIndexShape,
            rewriter.getI64IntegerAttr(i)));
      }
    }

    auto scatterIndicies = rewriter.create<stablehlo::ConcatenateOp>(
        loc, toConcat, static_cast<uint64_t>(inputType.getRank()));
    SmallVector<int64_t> sliceSizes(inputType.getRank(), 1);

    // generate ScatterDimensionNumbers for stablehlo::Scatter op.
    int64_t indexVecDim = inputType.getRank();
    SmallVector<int64_t> scatterDimOperandDimMap;
    SmallVector<int64_t> insertedWindowDims;
    for (int64_t i = 0; i < inputType.getRank(); ++i) {
      scatterDimOperandDimMap.push_back(i);
      insertedWindowDims.push_back(i);
    }
    auto scatterDimensionNumbers = stablehlo::ScatterDimensionNumbersAttr::get(
        rewriter.getContext(),
        /*updateWindowDims=*/{},
        /*insertedWindowDims=*/insertedWindowDims,
        /*inputBatchingDims=*/{},
        /*scatterIndicesBatchingDims=*/{},
        /*scatterDimsToOperandDim=*/scatterDimOperandDimMap,
        /*indexVectorDim=*/indexVecDim);

    auto stablehloScatterOp = rewriter.create<stablehlo::ScatterOp>(
        loc, inputType, input, scatterIndicies, src, scatterDimensionNumbers,
        false, false);

    // config update computation function: just return the element from src.
    Block &block = stablehloScatterOp.getUpdateComputation().emplaceBlock();
    // add block arguments
    auto blockArgumentType =
        RankedTensorType::get({}, inputType.getElementType());
    block.addArgument(blockArgumentType, loc);
    block.addArgument(blockArgumentType, loc);

    auto *lhsArg = block.args_begin();
    auto *rhsArg = std::next(lhsArg);

    {
      OpBuilder::InsertionGuard guard(rewriter);
      rewriter.setInsertionPointToStart(&block);
      if (reduceType == 0) {
        rewriter.create<stablehlo::ReturnOp>(loc, *rhsArg);
      } else if (reduceType == 1) {
        Value res = rewriter.create<stablehlo::AddOp>(loc, blockArgumentType,
                                                      *lhsArg, *rhsArg);
        rewriter.create<stablehlo::ReturnOp>(loc, res);
      }
    }

    rewriter.replaceOp(op, stablehloScatterOp.getResults());
    return success();
  }
};

// AtenIndexTensorOp
// Convert to StableHlo::GatherOp.
template <>
LogicalResult ConvertAtenOp<AtenIndexTensorHackedTwinOp>::matchAndRewrite(
    AtenIndexTensorHackedTwinOp op, OpAdaptor adaptor,
    ConversionPatternRewriter &rewriter) const {
  Location loc = op->getLoc();
  Value input = adaptor.getSelf();
  auto inputTensorType = cast<RankedTensorType>(input.getType());
  auto outType =
      cast<RankedTensorType>(getTypeConverter()->convertType(op.getType()));
  Value indexList = op.getIndices();
  SmallVector<Value> indicesTorchType;
  if (!getListConstructElements(indexList, indicesTorchType))
    return op.emitError(
        "unimplemented: the tensor list is not from list construct");

  auto indexTensors = getTypeConvertedValues(rewriter, loc, getTypeConverter(),
                                             indicesTorchType);

  int maxIndexRank = -1;
  auto gatherIndicesInfo = broadcastAndConcatIndices(
      op, rewriter, indexTensors, options.dimSizeIndexBits, maxIndexRank);
  if (failed(gatherIndicesInfo)) {
    return rewriter.notifyMatchFailure(
        op, "failed to generate broadcasted indices");
  }
  auto gatherIndices = *gatherIndicesInfo;

  int64_t numIndicesDim = indexTensors.size();
  int64_t indexVecDim = maxIndexRank;

  SmallVector<int64_t> offsetDims;
  SmallVector<int64_t> collapsedDims;
  SmallVector<int64_t> startIndexMap;
  for (int64_t i = 0; i < numIndicesDim; ++i) {
    collapsedDims.push_back(i);
    startIndexMap.push_back(i);
  }
  for (int64_t i = numIndicesDim; i < inputTensorType.getRank(); i++) {
    offsetDims.push_back(i + maxIndexRank - numIndicesDim);
  }
  auto dimsAttr = stablehlo::GatherDimensionNumbersAttr::get(
      rewriter.getContext(),
      /*offsetDims=*/offsetDims,
      /*collapsedSliceDims=*/collapsedDims,
      /*operandBatchingDims=*/{},
      /*startIndicesBatchingDims=*/{},
      /*startIndexMap=*/startIndexMap,
      /*indexVecDim=*/indexVecDim);

  SmallVector<int64_t> sliceSizes;
  auto inputShape = makeShapeTorchCompatible(inputTensorType.getShape());
  for (int64_t i = 0; i < inputTensorType.getRank(); ++i) {
    if (i < numIndicesDim) {
      sliceSizes.push_back(1);
    } else {
      sliceSizes.push_back(inputShape[i]);
    }
  }

  rewriter.replaceOpWithNewOp<stablehlo::GatherOp>(
      op, outType, input, gatherIndices, dimsAttr,
      rewriter.getDenseI64ArrayAttr(sliceSizes));
  return success();
}

// AtenIndexPutHackedTwinOP
// Convert to stablehlo::ScatterOp
template <>
LogicalResult ConvertAtenOp<AtenIndexPutHackedTwinOp>::matchAndRewrite(
    AtenIndexPutHackedTwinOp op, OpAdaptor adaptor,
    ConversionPatternRewriter &rewriter) const {
  Location loc = op->getLoc();
  Value input = adaptor.getSelf();
  Value values = adaptor.getValues();
  auto outType =
      cast<RankedTensorType>(getTypeConverter()->convertType(op.getType()));
  auto inputType = cast<RankedTensorType>(input.getType());
  auto inputShape = inputType.getShape();
  auto inputRank = inputType.getRank();
  auto valuesType = cast<RankedTensorType>(values.getType());
  int64_t valueRank = valuesType.getRank();
  auto valuesShape = valuesType.getShape();
  bool accumulate;
  if (!matchPattern(op.getAccumulate(), m_TorchConstantBool(&accumulate))) {
    return rewriter.notifyMatchFailure(op,
                                       "accumulate should be a constant bool");
  }
  Value indexList = op.getIndices();
  SmallVector<Value> indicesTorchType;
  if (!getListConstructElements(indexList, indicesTorchType))
    return op.emitError(
        "unimplemented: the tensor list is not from list construct");
  int64_t indexCnt = indicesTorchType.size();

  auto indexTensors = getTypeConvertedValues(rewriter, loc, getTypeConverter(),
                                             indicesTorchType);

  int maxIndexRank = -1;
  auto scatterIndicesInfo = broadcastAndConcatIndices(
      op, rewriter, indexTensors, options.dimSizeIndexBits, maxIndexRank);
  if (failed(scatterIndicesInfo)) {
    return rewriter.notifyMatchFailure(
        op, "failed to generate broadcasted indices");
  }
  auto scatterIndices = *scatterIndicesInfo;

  // broadcast `values` tensor to match expectedValuesShape.
  SmallVector<int64_t> scatterIndicesDims;
  for (int64_t i = 0; i < maxIndexRank; ++i) {
    scatterIndicesDims.push_back(i);
  }
  auto expectedValuesShapeTensorInfo =
      hlo::getDimSizesOfTensor(rewriter, op, scatterIndices, scatterIndicesDims,
                               options.dimSizeIndexBits);
  if (failed(expectedValuesShapeTensorInfo)) {
    return rewriter.notifyMatchFailure(
        op, "failed to get shape of broadcasted indices");
  }
  auto expectedValuesShapeTensors = *expectedValuesShapeTensorInfo;
  SmallVector<int64_t> trailingInputDims;
  for (int64_t i = indexCnt; i < inputRank; ++i) {
    trailingInputDims.push_back(i);
  }
  auto trailingInputShapeTensorInfo = hlo::getDimSizesOfTensor(
      rewriter, op, input, trailingInputDims, options.dimSizeIndexBits);
  if (failed(trailingInputShapeTensorInfo)) {
    return rewriter.notifyMatchFailure(op, "failed to get shape of input");
  }
  expectedValuesShapeTensors.append((*trailingInputShapeTensorInfo).begin(),
                                    (*trailingInputShapeTensorInfo).end());

  llvm::ArrayRef<int64_t> scatterIndicesShape =
      (cast<RankedTensorType>(scatterIndices.getType())).getShape();
  SmallVector<int64_t> expectedValuesShape(
      scatterIndicesShape.begin(), scatterIndicesShape.begin() + maxIndexRank);
  for (int64_t i = indexCnt; i < inputRank; i++) {
    expectedValuesShape.push_back(inputShape[i]);
  }

  valuesType =
      RankedTensorType::get(expectedValuesShape, valuesType.getElementType());
  values =
      hlo::promoteAndBroadcast(rewriter, values, valuesType,
                               rewriter
                                   .create<tensor::FromElementsOp>(
                                       op->getLoc(), expectedValuesShapeTensors)
                                   .getResult());
  valueRank = valuesType.getRank();
  valuesShape = valuesType.getShape();

  // create stablehlo::ScatterOp
  int64_t indexVecDim = maxIndexRank;
  SmallVector<int64_t> scatterDimOperandDimMap;
  SmallVector<int64_t> insertedWindowDims;
  SmallVector<int64_t> updateWindowDims;
  for (int64_t i = 0; i < indexCnt; ++i) {
    scatterDimOperandDimMap.push_back(i);
    insertedWindowDims.push_back(i);
  }
  for (int64_t i = maxIndexRank; i < valueRank; ++i) {
    updateWindowDims.push_back(i);
  }
  auto scatterDimensionNumbers = stablehlo::ScatterDimensionNumbersAttr::get(
      rewriter.getContext(),
      /*updateWindowDims=*/updateWindowDims,
      /*insertedWindowDims=*/insertedWindowDims,
      /*inputBatchingDims=*/{},
      /*scatterIndicesBatchingDims=*/{},
      /*scatterDimsToOperandDim=*/scatterDimOperandDimMap,
      /*indexVectorDim=*/indexVecDim);

  auto stablehloScatterOp = rewriter.create<stablehlo::ScatterOp>(
      loc, outType, input, scatterIndices, values, scatterDimensionNumbers,
      false, false);

  // configure update computation function.
  Block &block = stablehloScatterOp.getUpdateComputation().emplaceBlock();
  // add block arguments
  auto blockArgumentType =
      RankedTensorType::get({}, inputType.getElementType());
  block.addArgument(blockArgumentType, loc);
  block.addArgument(blockArgumentType, loc);

  auto *lhsArg = block.args_begin();
  auto *rhsArg = std::next(lhsArg);

  {
    OpBuilder::InsertionGuard guard(rewriter);
    rewriter.setInsertionPointToStart(&block);
    if (!accumulate) {
      rewriter.create<stablehlo::ReturnOp>(loc, *rhsArg);
    } else {
      Value out = rewriter.create<stablehlo::AddOp>(loc, blockArgumentType,
                                                    *lhsArg, *rhsArg);
      rewriter.create<stablehlo::ReturnOp>(loc, out);
    }
  }

  rewriter.replaceOp(op, stablehloScatterOp.getResults());
  return success();
}

// AtenGridSamplerOp
// See
// https://github.com/pytorch/pytorch/blob/ec58f1f74ebcec744d2ab90ad34abd09c1018e92/torch/_decomp/decompositions.py#L3923-L4086
namespace {
template <typename T>
static Value getConstantLike(OpBuilder &b, Location loc, T constant,
                             Value val) {
  Type ty = getElementTypeOrSelf(val.getType());
  auto getAttr = [&]() -> Attribute {
    if (isa<mlir::IntegerType>(ty))
      return b.getIntegerAttr(ty, constant);
    if (isa<mlir::FloatType>(ty))
      return b.getFloatAttr(ty, constant);
    if (auto complexTy = dyn_cast<mlir::ComplexType>(ty))
      return complex::NumberAttr::get(complexTy, constant, 0);
    llvm_unreachable("unhandled element type");
  };
  return b.create<mlir::chlo::ConstantLikeOp>(loc, cast<TypedAttr>(getAttr()),
                                              val);
}

template <typename T>
static Value getConstTensor(ConversionPatternRewriter &rewriter, Operation *op,
                            ArrayRef<T> values, ArrayRef<int64_t> shape,
                            Type ty) {
  Location loc = op->getLoc();
  RankedTensorType valueType = RankedTensorType::get(shape, ty);
  auto valueAttr = DenseElementsAttr::get(valueType, values);
  return rewriter.create<stablehlo::ConstantOp>(loc, valueType, valueAttr);
}

template <typename T>
static Value getConstScalarTensor(ConversionPatternRewriter &rewriter,
                                  Operation *op, T value, Type ty) {
  return getConstTensor(rewriter, op, ArrayRef<T>{value}, {}, ty);
}

// Helper function to lower AtenGridSamplerOp.
static Value unnormalize(ConversionPatternRewriter &rewriter, Operation *op,
                         Value coords, int64_t size, Type elemTy,
                         bool alignCorners) {
  Location loc = op->getLoc();
  APFloat pointFive(cast<mlir::FloatType>(elemTy).getFloatSemantics(), "0.5");
  APFloat sizeFloat =
      APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), size);
  APFloat one = APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 1);
  APFloat zero = APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 0);

  // double mul = alignCorners ? (size * 0.5 - 0.5) : (size * 0.5);
  // double ofs = size * 0.5 - 0.5;
  APFloat mul =
      alignCorners ? sizeFloat * pointFive - pointFive : sizeFloat * pointFive;
  APFloat ofs = sizeFloat * pointFive - pointFive;
  Value constMul = getConstScalarTensor(rewriter, op, mul, elemTy);
  Value constOfs = getConstScalarTensor(rewriter, op, ofs, elemTy);

  // use chlo::BroadcastMulOp to multiply constMul with coords.
  DenseI64ArrayAttr bcastDimensions;
  Value mulResult = rewriter.create<chlo::BroadcastMulOp>(loc, coords, constMul,
                                                          bcastDimensions);
  // use chlo::BroadcastAddOp to add constOfs to mulResult.
  Value result = rewriter.create<chlo::BroadcastAddOp>(loc, mulResult, constOfs,
                                                       bcastDimensions);
  return result;
}

static Value computeCoordinates(ConversionPatternRewriter &rewriter,
                                Operation *op, Value coords, int64_t size,
                                Type elemTy, int64_t padding_mode) {
  // TODO: add support for padding_mode 1 and 2.
  return coords;
}

static Value computeSourceIndex(ConversionPatternRewriter &rewriter,
                                Operation *op, Value coords, int64_t size,
                                Type elemTy, int64_t padding_mode,
                                bool alignCorners) {
  Value coordsUn =
      unnormalize(rewriter, op, coords, size, elemTy, alignCorners);
  return computeCoordinates(rewriter, op, coordsUn, size, elemTy, padding_mode);
}

// def in_bounds_cond(xs: Tensor, ys: Tensor) -> Tensor:
//     return torch.logical_and(
//         0 <= xs, torch.logical_and(xs < iW, torch.logical_and(0 <= ys, ys
//         < iH))
//     )
static Value inBoundsCond(ConversionPatternRewriter &rewriter, Operation *op,
                          Value xs, Value ys, int64_t ih, int64_t iw,
                          Type elemTy) {
  Location loc = op->getLoc();
  APFloat zeroFloat =
      APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 0);
  Value zero = getConstScalarTensor(rewriter, op, zeroFloat, elemTy);
  APFloat iwFloat =
      APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), iw);
  APFloat ihFloat =
      APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), ih);

  Value iwFloatValue = getConstScalarTensor(rewriter, op, iwFloat, elemTy);
  Value ihFloatValue = getConstScalarTensor(rewriter, op, ihFloat, elemTy);

  chlo::ComparisonTypeAttr compareTypeAttr = chlo::ComparisonTypeAttr::get(
      rewriter.getContext(), chlo::ComparisonType::FLOAT);
  chlo::ComparisonDirectionAttr compareLTAttr =
      chlo::ComparisonDirectionAttr::get(rewriter.getContext(),
                                         chlo::ComparisonDirection::LT);
  chlo::ComparisonDirectionAttr compareGEAttr =
      chlo::ComparisonDirectionAttr::get(rewriter.getContext(),
                                         chlo::ComparisonDirection::GE);
  DenseI64ArrayAttr bcastDimensions;
  Value cond1 = rewriter.create<chlo::BroadcastCompareOp>(
      loc, xs, zero, bcastDimensions, compareGEAttr, compareTypeAttr);
  Value cond2 = rewriter.create<chlo::BroadcastCompareOp>(
      loc, xs, iwFloatValue, bcastDimensions, compareLTAttr, compareTypeAttr);
  Value cond3 = rewriter.create<chlo::BroadcastCompareOp>(
      loc, ys, zero, bcastDimensions, compareGEAttr, compareTypeAttr);
  Value cond4 = rewriter.create<chlo::BroadcastCompareOp>(
      loc, ys, ihFloatValue, bcastDimensions, compareLTAttr, compareTypeAttr);
  Value cond5 =
      rewriter.create<chlo::BroadcastAndOp>(loc, cond1, cond2, bcastDimensions);
  Value cond6 =
      rewriter.create<chlo::BroadcastAndOp>(loc, cond3, cond4, bcastDimensions);
  return rewriter.create<chlo::BroadcastAndOp>(loc, cond5, cond6,
                                               bcastDimensions);
}
// def clip(xs: Tensor, ys: Tensor, ws: Tensor) -> TensorSequenceType:
//     cond = in_bounds_cond(xs, ys)
//     # To clip to inside valid coordinates, we map the coordinates
//     # to (x, y) = (0, 0) and also set the weight to 0
//     # We also change the shape of the tensor to the appropriate one for
//     # broadcasting with N_idx, C_idx for the purposes of advanced
//     indexing c = C if _expand_grid else 1
//     return tuple(
//         torch.where(cond, t, 0).view(N, c, oH, oW)
//         for t in (xs.to(dtype=torch.int64), ys.to(dtype=torch.int64), ws)
//     )
SmallVector<Value> clip(ConversionPatternRewriter &rewriter, Operation *op,
                        Value xs, Value ys, Value ws, int64_t N, int64_t oH,
                        int64_t oW, int64_t iH, int64_t iW, Type elemTy) {
  Location loc = op->getLoc();
  auto indexElemTy = rewriter.getI64Type();
  auto indexTy = RankedTensorType::get(mlir::ArrayRef<int64_t>{1}, indexElemTy);

  Value zeroIntValue = rewriter.create<stablehlo::ConstantOp>(
      loc, indexTy, DenseIntElementsAttr::get(indexTy, ArrayRef<int64_t>{0}));

  APFloat zeroAPFloat =
      APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 0);
  Value zeroFloatValue =
      getConstScalarTensor(rewriter, op, zeroAPFloat, elemTy);
  Value cond = inBoundsCond(rewriter, op, xs, ys, iH, iW, elemTy);
  Value xsInt = rewriter.create<stablehlo::ConvertOp>(loc, xs, indexElemTy);
  Value ysInt = rewriter.create<stablehlo::ConvertOp>(loc, ys, indexElemTy);

  Value selectXs = rewriter.create<chlo::BroadcastSelectOp>(
      loc, ArrayRef<Value>{cond, xsInt, zeroIntValue});
  Value selectYs = rewriter.create<chlo::BroadcastSelectOp>(
      loc, ArrayRef<Value>{cond, ysInt, zeroIntValue});
  Value selectWs = rewriter.create<chlo::BroadcastSelectOp>(
      loc, ArrayRef<Value>{cond, ws, zeroFloatValue});

  SmallVector<int64_t> sizes = {N, 1, oH, oW};
  Value reshapedXs = rewriter.create<stablehlo::ReshapeOp>(
      loc, RankedTensorType::get(sizes, indexElemTy), selectXs);
  Value reshapedYs = rewriter.create<stablehlo::ReshapeOp>(
      loc, RankedTensorType::get(sizes, indexElemTy), selectYs);
  Value reshapedWs = rewriter.create<stablehlo::ReshapeOp>(
      loc, RankedTensorType::get(sizes, elemTy), selectWs);
  return SmallVector<Value>{reshapedXs, reshapedYs, reshapedWs};
}

Value getSummand(ConversionPatternRewriter &rewriter, Operation *op,
                 Value input, Value ix, Value iy, Value w, int64_t N,
                 int64_t oH, int64_t oW, int64_t iH, int64_t iW, Value Nidx,
                 Value CIdx, RankedTensorType outType, Type elemTy,
                 size_t dimSizeIndexBits) {
  Location loc = op->getLoc();
  auto inputTensorType = cast<RankedTensorType>(input.getType());
  SmallVector<Value> clipValues =
      clip(rewriter, op, ix, iy, w, N, oH, oW, iH, iW, elemTy);
  Value idxX = clipValues[0];
  Value idxY = clipValues[1];
  Value idxW = clipValues[2];
  SmallVector<Value> indexTensors{Nidx, CIdx, idxY, idxX};

  int maxIndexRank = -1;
  auto gatherIndicesInfo =
      broadcastAndConcatIndices(input.getDefiningOp(), rewriter, indexTensors,
                                dimSizeIndexBits, maxIndexRank);
  auto gatherIndices = *gatherIndicesInfo;
  int64_t numIndicesDim = indexTensors.size();
  int64_t indexVecDim = maxIndexRank;

  SmallVector<int64_t> offsetDims;
  SmallVector<int64_t> collapsedDims;
  SmallVector<int64_t> startIndexMap;
  for (int64_t i = 0; i < numIndicesDim; ++i) {
    collapsedDims.push_back(i);
    startIndexMap.push_back(i);
  }
  for (int64_t i = numIndicesDim; i < inputTensorType.getRank(); i++) {
    offsetDims.push_back(i + maxIndexRank - numIndicesDim);
  }
  auto dimsAttr = stablehlo::GatherDimensionNumbersAttr::get(
      rewriter.getContext(),
      /*offsetDims=*/offsetDims,
      /*collapsedSliceDims=*/collapsedDims,
      /*operandBatchingDims=*/{},
      /*startIndicesBatchingDims=*/{},
      /*startIndexMap=*/startIndexMap,
      /*indexVecDim=*/indexVecDim);

  SmallVector<int64_t> sliceSizes;
  auto inputShape = makeShapeTorchCompatible(inputTensorType.getShape());
  for (int64_t i = 0; i < inputTensorType.getRank(); ++i) {
    if (i < numIndicesDim) {
      sliceSizes.push_back(1);
    } else {
      sliceSizes.push_back(inputShape[i]);
    }
  }

  Value gather = rewriter.create<stablehlo::GatherOp>(
      loc, input, gatherIndices, dimsAttr,
      rewriter.getDenseI64ArrayAttr(sliceSizes));
  // use chlo::BroadcastMulOp to multiply idxW with gather.
  DenseI64ArrayAttr bcastDimensions;
  return rewriter.create<chlo::BroadcastMulOp>(loc, gather, idxW,
                                               bcastDimensions);
}

} // namespace
template <>
LogicalResult ConvertAtenOp<AtenGridSamplerOp>::matchAndRewrite(
    AtenGridSamplerOp op, OpAdaptor adaptor,
    ConversionPatternRewriter &rewriter) const {
  Location loc = op->getLoc();
  Value input = adaptor.getInput();
  Value grid = adaptor.getGrid();

  int64_t interpolationMode;
  if (!matchPattern(op.getInterpolationMode(),
                    m_TorchConstantInt(&interpolationMode)))
    return rewriter.notifyMatchFailure(
        op, "interpolation_mode must be an integer constant");
  int64_t paddingMode;
  if (!matchPattern(op.getPaddingMode(), m_TorchConstantInt(&paddingMode)))
    return rewriter.notifyMatchFailure(
        op, "padding_mode must be an integer constant");

  if (interpolationMode != 0 && interpolationMode != 1)
    return rewriter.notifyMatchFailure(
        op, "only support interpolation_mode = 0 (bilinear) or 1(nearest)");

  if (paddingMode != 0)
    return rewriter.notifyMatchFailure(op,
                                       "only support paddingMode = 0 (Zero)");

  bool alignCorners = false;
  if (!matchPattern(op.getAlignCorners(), m_TorchConstantBool(&alignCorners)))
    return rewriter.notifyMatchFailure(
        op, "alignCorners must be a boolean constant");

  RankedTensorType inputTy = cast<RankedTensorType>(input.getType());
  RankedTensorType gridTy = cast<RankedTensorType>(grid.getType());
  RankedTensorType outTy =
      cast<RankedTensorType>(getTypeConverter()->convertType(op.getType()));
  Type elemTy = inputTy.getElementType();
  if (inputTy.getRank() != 4)
    return rewriter.notifyMatchFailure(op, "input must be a 4D tensor");
  if (gridTy.getRank() != 4)
    return rewriter.notifyMatchFailure(op, "grid must be a 4D tensor");

  auto inputSize = inputTy.getShape();
  auto gridSize = gridTy.getShape();
  int64_t N = inputSize[0];
  int64_t C = inputSize[1];
  int64_t iH = inputSize[2];
  int64_t iW = inputSize[3];
  int64_t oH = gridSize[1];
  int64_t oW = gridSize[2];
  // grid is a 4D tensor with shape (N, oH, oW, 2)

  Type indexElemTy = rewriter.getI64Type();
  RankedTensorType indexTy =
      RankedTensorType::get(mlir::ArrayRef<int64_t>{1}, indexElemTy);
  Value constN = rewriter.create<stablehlo::ConstantOp>(
      loc, indexTy, DenseIntElementsAttr::get(indexTy, {N}));
  Value constC = rewriter.create<stablehlo::ConstantOp>(
      loc, indexTy, DenseIntElementsAttr::get(indexTy, {C}));
  APFloat one = APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 1);
  APFloat zero = APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 0);

  Value constOneFloat = getConstScalarTensor(rewriter, op, one, elemTy);

  auto NidxFlatten = rewriter.create<stablehlo::DynamicIotaOp>(
      loc, RankedTensorType::get(mlir::ArrayRef<int64_t>{N}, indexElemTy),
      constN, 0);
  auto CidxFlatten = rewriter.create<stablehlo::DynamicIotaOp>(
      loc, RankedTensorType::get(mlir::ArrayRef<int64_t>{C}, indexElemTy),
      constC, 0);

  // Reshape NidxFlatten to 4D tensor (N, 1, 1, 1)
  auto NidxSizes = mlir::SmallVector<int64_t>{N, 1, 1, 1};
  auto Nidx = rewriter.create<stablehlo::ReshapeOp>(
      loc, RankedTensorType::get(NidxSizes, indexElemTy), NidxFlatten);

  // Reshape CidxFlatten to 4D tensor (1, C, 1, 1)
  auto CidxSizes = mlir::SmallVector<int64_t>{1, C, 1, 1};
  auto Cidx = rewriter.create<stablehlo::ReshapeOp>(
      loc, RankedTensorType::get(CidxSizes, indexElemTy), CidxFlatten);

  llvm::SmallVector<int64_t> stride(4, 1);
  auto gridX = rewriter.create<stablehlo::SliceOp>(
      loc,
      RankedTensorType::get(mlir::SmallVector<int64_t>{N, oH, oW, 1},
                            gridTy.getElementType()),
      grid, mlir::SmallVector<int64_t>{0, 0, 0, 0},
      mlir::SmallVector<int64_t>{N, oH, oW, 1}, stride);
  auto gridY = rewriter.create<stablehlo::SliceOp>(
      loc,
      RankedTensorType::get(mlir::SmallVector<int64_t>{N, oH, oW, 1},
                            gridTy.getElementType()),
      grid, mlir::SmallVector<int64_t>{0, 0, 0, 1},
      mlir::SmallVector<int64_t>{N, oH, oW, 2}, stride);
  // squeeze last dimension
  auto gridXshape = mlir::SmallVector<int64_t>{N, oH, oW};

  auto gridXReshape = rewriter.create<stablehlo::ReshapeOp>(
      loc, RankedTensorType::get(gridXshape, gridTy.getElementType()), gridX);
  auto gridYReshape = rewriter.create<stablehlo::ReshapeOp>(
      loc, RankedTensorType::get(gridXshape, gridTy.getElementType()), gridY);

  if (interpolationMode == 0) {
    Value ix = computeSourceIndex(rewriter, op, gridXReshape, iW, elemTy,
                                  paddingMode, alignCorners);
    Value iy = computeSourceIndex(rewriter, op, gridYReshape, iH, elemTy,
                                  paddingMode, alignCorners);
    Value ix_nw = rewriter.create<stablehlo::FloorOp>(loc, ix);
    Value iy_nw = rewriter.create<stablehlo::FloorOp>(loc, iy);

    DenseI64ArrayAttr bcastDimensions;
    Value ix_ne = rewriter.create<chlo::BroadcastAddOp>(
        loc, ix_nw, constOneFloat, bcastDimensions);
    Value iy_ne = iy_nw;
    Value ix_sw = ix_nw;
    Value iy_sw = rewriter.create<chlo::BroadcastAddOp>(
        loc, iy_nw, constOneFloat, bcastDimensions);
    Value ix_se = ix_ne;
    Value iy_se = iy_sw;

    // w_nw = (ix_se - ix) * (iy_se - iy)
    // w_ne = (ix - ix_sw) * (iy_sw - iy)
    // w_sw = (ix_ne - ix) * (iy - iy_ne)
    // w_se = (ix - ix_nw) * (iy - iy_nw)
    Value w_nw = rewriter.create<chlo::BroadcastMulOp>(
        loc,
        rewriter.create<chlo::BroadcastSubOp>(loc, ix_se, ix, bcastDimensions),
        rewriter.create<chlo::BroadcastSubOp>(loc, iy_se, iy, bcastDimensions),
        bcastDimensions);
    Value w_ne = rewriter.create<chlo::BroadcastMulOp>(
        loc,
        rewriter.create<chlo::BroadcastSubOp>(loc, ix, ix_sw, bcastDimensions),
        rewriter.create<chlo::BroadcastSubOp>(loc, iy_sw, iy, bcastDimensions),
        bcastDimensions);
    Value w_sw = rewriter.create<chlo::BroadcastMulOp>(
        loc,
        rewriter.create<chlo::BroadcastSubOp>(loc, ix_ne, ix, bcastDimensions),
        rewriter.create<chlo::BroadcastSubOp>(loc, iy, iy_ne, bcastDimensions),
        bcastDimensions);
    Value w_se = rewriter.create<chlo::BroadcastMulOp>(
        loc,
        rewriter.create<chlo::BroadcastSubOp>(loc, ix, ix_nw, bcastDimensions),
        rewriter.create<chlo::BroadcastSubOp>(loc, iy, iy_nw, bcastDimensions),
        bcastDimensions);

    Value summand_nw =
        getSummand(rewriter, op, input, ix_nw, iy_nw, w_nw, N, oH, oW, iH, iW,
                   Nidx, Cidx, outTy, elemTy, options.dimSizeIndexBits);
    Value summand_ne =
        getSummand(rewriter, op, input, ix_ne, iy_ne, w_ne, N, oH, oW, iH, iW,
                   Nidx, Cidx, outTy, elemTy, options.dimSizeIndexBits);
    Value summand_sw =
        getSummand(rewriter, op, input, ix_sw, iy_sw, w_sw, N, oH, oW, iH, iW,
                   Nidx, Cidx, outTy, elemTy, options.dimSizeIndexBits);
    Value summand_se =
        getSummand(rewriter, op, input, ix_se, iy_se, w_se, N, oH, oW, iH, iW,
                   Nidx, Cidx, outTy, elemTy, options.dimSizeIndexBits);

    // summand_nw + summand_ne + summand_sw + summand_se
    Value sum = rewriter.create<stablehlo::AddOp>(loc, summand_nw, summand_ne);
    sum = rewriter.create<stablehlo::AddOp>(loc, sum, summand_sw);
    sum = rewriter.create<stablehlo::AddOp>(loc, sum, summand_se);
    rewriter.replaceOp(op, sum);
  } else if (interpolationMode == 1) {
    Value ix = computeSourceIndex(rewriter, op, gridXReshape, iW, elemTy,
                                  paddingMode, alignCorners);
    Value iy = computeSourceIndex(rewriter, op, gridYReshape, iH, elemTy,
                                  paddingMode, alignCorners);
    Value ix_round = rewriter.create<stablehlo::RoundOp>(loc, ix);
    Value iy_round = rewriter.create<stablehlo::RoundOp>(loc, iy);
    Value oneTensor = getConstantLike(rewriter, loc, 1.0, ix_round);
    Value summand = getSummand(rewriter, op, input, ix_round, iy_round,
                               oneTensor, N, oH, oW, iH, iW, Nidx, Cidx, outTy,
                               elemTy, options.dimSizeIndexBits);
    rewriter.replaceOp(op, summand);
  }
  return success();
}

void mlir::torch::torch_to_stablehlo::
    populateGatherScatterOpPatternsAndLegality(
        TypeConverter &typeConverter, RewritePatternSet &patterns,
        ConversionTarget &target, const TorchToStablehloOptions &options) {
  MLIRContext *context = patterns.getContext();

#define INSERT_ATENOP_PATTERN(AtenOp)                                          \
  target.addIllegalOp<AtenOp>();                                               \
  patterns.add<ConvertAtenOp<AtenOp>>(typeConverter, context, options)
  INSERT_ATENOP_PATTERN(AtenEmbeddingOp);
  INSERT_ATENOP_PATTERN(AtenEmbeddingBagPaddingIdxOp);
  INSERT_ATENOP_PATTERN(AtenIndexSelectOp);
  INSERT_ATENOP_PATTERN(AtenGatherOp);
  INSERT_ATENOP_PATTERN(AtenSliceScatterOp);
  INSERT_ATENOP_PATTERN(AtenIndexTensorHackedTwinOp);
  INSERT_ATENOP_PATTERN(AtenIndexPutHackedTwinOp);
  INSERT_ATENOP_PATTERN(AtenGridSamplerOp);
#undef INSERT_ATENOP_PATTERN

#define INSERT_ATEN_SCATTER_PATTERN(AtenOp, reduceType)                        \
  target.addIllegalOp<AtenOp>();                                               \
  patterns.add<ConvertAtenScatterOp<AtenOp, reduceType>>(typeConverter,        \
                                                         context, options)
  INSERT_ATEN_SCATTER_PATTERN(AtenScatterSrcOp, 0); // 0 for None reduce op
  INSERT_ATEN_SCATTER_PATTERN(AtenScatterAddOp, 1); // 1 for Add reduce op
#undef INSERT_ATEN_SCATTER_PATTERN
}
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								//===----------------------------------------------------------------------===//
 								//
 								// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 								// See https://llvm.org/LICENSE.txt for license information.
 								// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 								// Also available under a BSD-style license. See LICENSE.
 								//
 								//===----------------------------------------------------------------------===//
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								#include "torch-mlir/Conversion/TorchToStablehlo/TorchToStablehlo.h"
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
 								#include "../PassDetail.h"
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								#include "PopulatePatterns.h"
-												build: update llvm tag to 6f46ff37 (#1448)

Summary of changes:
 - Updated references to the Arith dialect
   (https://reviews.llvm.org/D134762)
 - Switched to prefixed accessors for MemRef dialect
   (https://reviews.llvm.org/D134995)
 - Fixed warnings about signed/unsigned comparisons, ignored return
   values, and unused variables
											
										
										
											2022-10-05 21:28:06 +08:00
+								#include "mlir/Dialect/Arith/IR/Arith.h"
-												[Stablehlo] Add lowering of GridSampler Op (#3084)

Inspired by PyTorch decompositions.py.
See
https://github.com/pytorch/pytorch/blob/ec58f1f74ebcec744d2ab90ad34abd09c1018e92/torch/_decomp/decompositions.py#L3923-L4086
Only support paddingMode=0 or 1 and interpolationMode=0 or 1
											
										
										
											2024-06-07 16:06:07 +08:00
+								#include "mlir/Dialect/Complex/IR/Complex.h"
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								#include "mlir/Dialect/Tensor/IR/Tensor.h"
-												[Stablehlo] Add lowering of GridSampler Op (#3084)

Inspired by PyTorch decompositions.py.
See
https://github.com/pytorch/pytorch/blob/ec58f1f74ebcec744d2ab90ad34abd09c1018e92/torch/_decomp/decompositions.py#L3923-L4086
Only support paddingMode=0 or 1 and interpolationMode=0 or 1
											
										
										
											2024-06-07 16:06:07 +08:00
+								#include "stablehlo/dialect/ChloOps.h"
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								#include "stablehlo/dialect/StablehloOps.h"
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
+								#include "torch-mlir/Conversion/TorchToStablehlo/StablehloLegalizeUtils.h"
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								#include "torch-mlir/Conversion/Utils/Utils.h"
 								#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
+								#include "torch-mlir/Dialect/Torch/IR/TorchTypes.h"
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								#include "torch-mlir/Dialect/Torch/Utils/Utils.h"
 								using namespace mlir;
 								using namespace mlir::torch;
 								using namespace mlir::torch::Torch;
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								using namespace mlir::torch::torch_to_stablehlo;
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
 								namespace {
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								static Value createInitialValueForGatherScatterOp(Operation *op,
-												Clang format refresh (#2812)

After noticing a number of commits with unrelated formatting changes,
I think something was changed with clang-format at one point and we're
seeing a number of unrelated changes. Doing a refresh can help avoid
this.

The changes made here came from
```
find lib -iname *.h -o -iname *.cpp  | xargs clang-format -i --style=llvm
find include -iname *.h -o -iname *.cpp  | xargs clang-format -i --style=llvm
find projects -iname *.h -o -iname *.cpp  | xargs clang-format -i --style=llvm
```
											
										
										
											2024-01-30 01:59:33 +08:00
+								                                                  RankedTensorType constType,
 								                                                  PatternRewriter &rewriter) {
-												[Stablehlo] fix crashing on AtenEmbeddingBagSumExample_basic (#3389)


											
										
										
											2024-05-26 12:34:56 +08:00
+								  if (!constType.hasStaticShape()) {
 								    return nullptr;
 								  }
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								  auto elementTy = constType.getElementType();
 								  if (isa<AtenEmbeddingBagPaddingIdxOp>(op)) {
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3130)

We should prefer functional style as the method style is deprecated
https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
(https://mlir.llvm.org/deprecation/)
											
										
										
											2024-04-11 21:47:35 +08:00
+								    if (isa<mlir::FloatType>(elementTy)) {
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								      auto constAttr = DenseElementsAttr::get(
 								          constType, {APFloat::getZero(
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3130)

We should prefer functional style as the method style is deprecated
https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
(https://mlir.llvm.org/deprecation/)
											
										
										
											2024-04-11 21:47:35 +08:00
+								                         cast<mlir::FloatType>(elementTy).getFloatSemantics(),
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								                         /*negative=*/false)});
 								      return rewriter.create<stablehlo::ConstantOp>(op->getLoc(), constType,
 								                                                    constAttr);
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3130)

We should prefer functional style as the method style is deprecated
https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
(https://mlir.llvm.org/deprecation/)
											
										
										
											2024-04-11 21:47:35 +08:00
+								    } else if (isa<mlir::IntegerType>(elementTy) &&
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								               elementTy.getIntOrFloatBitWidth() != 8) {
 								      auto constAttr = DenseElementsAttr::get(
 								          constType, {APInt::getZero(elementTy.getIntOrFloatBitWidth())});
 								      return rewriter.create<stablehlo::ConstantOp>(op->getLoc(), constType,
 								                                                    constAttr);
 								    }
 								  }
 								  op->emitError("unimplemented lowering in "
 								                "createInitialValueForGatherScatterOp");
 								  return nullptr;
 								}
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								Value gatherTensorAlongSingleAxis(PatternRewriter &rewriter, Operation *op,
-												[MHLO] refactor pass configurations (#1315)

Related to https://github.com/llvm/torch-mlir/issues/1227

1. Reduce MHLO #ifdefs
2. Dismiss compilation warnings

											
										
										
											2022-09-01 10:36:02 +08:00
+								                                  Value input, Value indices, int64_t axis,
 								                                  size_t dimSizeIndexBits) {
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								  auto loc = op->getLoc();
-												[MHLO] refactor pass configurations (#1315)

Related to https://github.com/llvm/torch-mlir/issues/1227

1. Reduce MHLO #ifdefs
2. Dismiss compilation warnings

											
										
										
											2022-09-01 10:36:02 +08:00
+								  Type intType = rewriter.getIntegerType(dimSizeIndexBits);
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								  Value one = rewriter.create<arith::ConstantOp>(
 								      loc, rewriter.getIntegerAttr(intType, 1));
 								  // sliceSizes
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								  auto inputRankTy = dyn_cast<RankedTensorType>(input.getType());
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								  auto inputRank = inputRankTy.getRank();
 								  SmallVector<Value, 4> sliceSizes;
 								  sliceSizes.reserve(inputRank);
 								  for (int64_t r = 0; r < inputRank; ++r) {
 								    if (r == axis) {
 								      sliceSizes.push_back(one);
 								    } else {
 								      sliceSizes.push_back(rewriter.create<arith::IndexCastOp>(
 								          loc, intType, rewriter.create<tensor::DimOp>(loc, input, r)));
 								    }
 								  }
 								  auto sliceSizesTensor =
 								      rewriter.create<tensor::FromElementsOp>(loc, sliceSizes);
 								  // offsetDims
 								  SmallVector<int64_t, 4> offsetDims;
 								  offsetDims.reserve(inputRank);
 								  for (int64_t r = 0; r < axis; ++r) {
 								    offsetDims.push_back(r);
 								  }
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								  auto indicesRankTy = dyn_cast<RankedTensorType>(indices.getType());
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								  auto indicesRank = indicesRankTy.getRank();
 								  for (int64_t r = axis + 1; r < inputRank; ++r) {
 								    offsetDims.push_back(r + indicesRank - 1);
 								  }
 								  // collapsedSliceDims
 								  SmallVector<int64_t, 4> collapsedSliceDims(1, axis);
 								  // startIndexMap
 								  SmallVector<int64_t, 4> startIndexMap(1, axis);
 								  // indexVecDim
 								  int64_t indexVecDim = indicesRank;
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								  auto dimsAttr = stablehlo::GatherDimensionNumbersAttr::get(
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								      rewriter.getContext(),
 								      /*offsetDims=*/offsetDims,
 								      /*collapsedSliceDims=*/collapsedSliceDims,
-												Bump llvm and stablehlo (#3377)

* bump llvm to 1e5f29af81a5f6fda308074f6345b9fba4faa71c
* bump stablehlo to c44d9af8d4879adccf1054cb61a53377ae5898cb
											
										
										
											2024-05-22 23:28:45 +08:00
+								      /*operandBatchingDims=*/{},
 								      /*startIndicesBatchingDims=*/{},
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								      /*startIndexMap=*/startIndexMap,
 								      /*indexVecDim=*/indexVecDim);
 								  // outputShape = input.shape[:axis] + indices.shape +
 								  //                input.shape[axis + 1:]
 								  auto inputShape = inputRankTy.getShape();
 								  auto indicesShape = indicesRankTy.getShape();
 								  SmallVector<int64_t, 4> outputShape(inputShape.begin(),
 								                                      inputShape.begin() + axis);
 								  outputShape.insert(outputShape.end(), indicesShape.begin(),
 								                     indicesShape.end());
 								  outputShape.insert(outputShape.end(), inputShape.begin() + axis + 1,
 								                     inputShape.end());
 								  // create output tensor type
 								  auto outputTy =
 								      RankedTensorType::get(outputShape, inputRankTy.getElementType());
 								  return rewriter
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								      .create<stablehlo::DynamicGatherOp>(loc, outputTy, input, indices,
 								                                          sliceSizesTensor, dimsAttr)
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								      .getResult();
 								}
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
 								template <typename OpTy, typename OpAdaptor>
 								LogicalResult prepareArgumentsForSlicingOp(OpTy op, OpAdaptor adaptor,
 								                                           ConversionPatternRewriter &rewriter,
 								                                           SmallVector<Value> &resultShape,
 								                                           SmallVector<Value> &offsets,
 								                                           SmallVector<Value> &strides) {
 								  Location loc = op.getLoc();
 								  auto input = adaptor.getSelf();
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								  RankedTensorType inputType = cast<RankedTensorType>(input.getType());
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
 								  Value zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
 								  Value one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
 								  int64_t dim;
 								  if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim)))
 								    return op->emitError("unimplemented: dim is not constant");
 								  int64_t inputRank = inputType.getRank();
 								  dim = toPositiveDim(dim, inputRank);
 								  if (!isValidDim(dim, inputRank))
 								    return rewriter.notifyMatchFailure(op, "dim is statically invalid");
 								  SmallVector<Value> inputShape = getTensorSizes(rewriter, loc, input);
 								  Value dimSize = inputShape[dim];
 								  Value torchTypeStart = op.getStart();
 								  Value torchTypeEnd = op.getEnd();
 								  Value builtinTypeStart = adaptor.getStart();
 								  Value builtinTypeEnd = adaptor.getEnd();
-												[NFC] Change to *cast instead of .*cast variants (#3405)

Member casts have been deprecated. Changing over a bunch of the member
cast calls to the global templated variants to remove deprecation
warnings.
											
										
										
											2024-05-31 14:45:13 +08:00
+								  if (isa<OptionalType>(torchTypeStart.getType()) ||
 								      isa<OptionalType>(torchTypeEnd.getType()))
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
+								    return rewriter.notifyMatchFailure(op, "unimplemented optional type arg");
 								  int64_t step;
 								  if (!matchPattern(op.getStep(), m_TorchConstantInt(&step))) {
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								    if (!isa<Torch::NoneType>(op.getStep().getType()))
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
+								      return op->emitError("unimplemented: step is not constant");
 								    step = 1;
 								  }
 								  Value start = toPositiveValidDim(rewriter, loc, torchTypeStart,
 								                                   builtinTypeStart, zero, dimSize);
 								  Value end = toPositiveValidDim(rewriter, loc, torchTypeEnd, builtinTypeEnd,
 								                                 dimSize, dimSize);
 								  // end >= start ? end : start
 								  Value endSgeStart = rewriter.create<arith::CmpIOp>(
 								      loc, arith::CmpIPredicate::sge, end, start);
 								  end = rewriter.create<arith::SelectOp>(loc, endSgeStart, end, start);
 								  Value stepIndex = rewriter.create<arith::ConstantIndexOp>(loc, step);
 								  // Slice logic: resultSize = floordiv(end - start + step - 1,  step)
 								  resultShape = getTensorSizes(rewriter, loc, input);
 								  Value len = rewriter.create<arith::SubIOp>(loc, end, start);
 								  Value resultSize = rewriter.create<arith::AddIOp>(loc, len, stepIndex);
 								  resultSize = rewriter.create<arith::SubIOp>(loc, resultSize, one);
 								  resultSize = rewriter.create<arith::FloorDivSIOp>(loc, resultSize, stepIndex);
 								  resultShape[dim] = resultSize;
 								  strides.resize(inputType.getRank(), one);
 								  offsets.resize(inputType.getRank(), zero);
 								  offsets[dim] = start;
 								  strides[dim] = rewriter.create<arith::MulIOp>(loc, strides[dim], stepIndex);
 								  return success();
 								}
-												[MHLO] refactor pass configurations (#1315)

Related to https://github.com/llvm/torch-mlir/issues/1227

1. Reduce MHLO #ifdefs
2. Dismiss compilation warnings

											
										
										
											2022-09-01 10:36:02 +08:00
+								} // namespace
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								namespace {
 								// A helper function used to generate stablehlo's ScatterIndices or
 								// GatherIndices from torch's indices, usually appear in torch ops, like
 								// aten.index.Tensor or aten.input_put A usage example is as follow: Input: [[1,
 								// 2, 3],
 								//         [4, 5, 6],
 								//         [7, 8, 9]]
 								// Indices[0]: [[0, 0, 0],
 								//              [2, 2, 0]]
 								// Indices[1]: [[2],
 								//              [1]]
 								// Step 1: broadcast indices tensors
 								// Indices[0]: [[0, 0, 0],
 								//              [2, 2, 0]]
 								// Indices[1]: [[2, 2, 2],
 								//              [1, 1, 1]]
 								// Step 2: concat index tensors at a unsqueezed -1 dimension.
 								// Indices: [[[0, 2], [0, 2], [0, 2]],
 								//           [[2, 1], [2, 1], [0, 1]]]
 								FailureOr<Value> broadcastAndConcatIndices(Operation *op,
 								                                           ConversionPatternRewriter &rewriter,
 								                                           SmallVector<Value> indexTensors,
-												[stablehlo] support dynamic-shaped index in stablehlo conversion for aten.index-like ops (#3322)

For now, at most one dynamic dim of index tensors in
aten.index/aten.index_put-like op is supported.
											
										
										
											2024-08-01 10:41:09 +08:00
+								                                           size_t dimSizeIndexBits,
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								                                           int &maxIndexRank) {
 								  // Step 1: broadcast indices tensors
-												[stablehlo] support dynamic-shaped index in stablehlo conversion for aten.index-like ops (#3322)

For now, at most one dynamic dim of index tensors in
aten.index/aten.index_put-like op is supported.
											
										
										
											2024-08-01 10:41:09 +08:00
+								  bool allIndexStaticShape = true;
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  // concat index tensor into to indices tensor for concat
 								  for (size_t i = 0; i < indexTensors.size(); i++) {
 								    auto indexTensor = indexTensors[i];
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								    auto indexTensorType = cast<RankedTensorType>(indexTensor.getType());
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    for (int64_t size : makeShapeTorchCompatible(indexTensorType.getShape())) {
 								      if (size == kUnknownSize)
-												[stablehlo] support dynamic-shaped index in stablehlo conversion for aten.index-like ops (#3322)

For now, at most one dynamic dim of index tensors in
aten.index/aten.index_put-like op is supported.
											
										
										
											2024-08-01 10:41:09 +08:00
+								        allIndexStaticShape = false;
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    }
 								    maxIndexRank = std::max(maxIndexRank, (int)indexTensorType.getRank());
 								  }
-												[stablehlo] fix: enhance torch's index-like op lowering to stablehlo's gather/scatter (#3829)

In torch.index_put like ops, `values` is only required to be
broadcastable to `input[indices]`, rather than exact dimension match.
This patch fixes the problem by add additional
stablehlo.dynamic_broadcast_in_dim before creating stablehlo.scatter op.
BTW, this patch also enhance the `getBroadcastResultShape` utility in
hlo namespace.
											
										
										
											2024-11-05 19:15:11 +08:00
+								  auto bcastSizeInfo = hlo::getBroadcastResultShape(rewriter, op, indexTensors,
 								                                                    dimSizeIndexBits);
 								  if (failed(bcastSizeInfo)) {
 								    return failure();
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  }
-												[stablehlo] fix: enhance torch's index-like op lowering to stablehlo's gather/scatter (#3829)

In torch.index_put like ops, `values` is only required to be
broadcastable to `input[indices]`, rather than exact dimension match.
This patch fixes the problem by add additional
stablehlo.dynamic_broadcast_in_dim before creating stablehlo.scatter op.
BTW, this patch also enhance the `getBroadcastResultShape` utility in
hlo namespace.
											
										
										
											2024-11-05 19:15:11 +08:00
+								  Value bcastSizeTensor = (*bcastSizeInfo).first;
 								  auto indicesShape = (*bcastSizeInfo).second;
 								  SmallVector<int64_t> expandShape(indicesShape.begin(), indicesShape.end());
 								  SmallVector<int64_t> concatShape(indicesShape.begin(), indicesShape.end());
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  expandShape.push_back(1);
 								  concatShape.push_back(indexTensors.size());
 								  SmallVector<Value> broadcastedIndices;
-												[Stablehlo]index type use i64 (#3354)


											
										
										
											2024-05-16 15:33:23 +08:00
+								  Type indexElemTy = rewriter.getI64Type();
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  RankedTensorType bcastIndexType =
 								      RankedTensorType::get(indicesShape, indexElemTy);
 								  for (auto indexTensor : indexTensors) {
-												[stablehlo] support dynamic-shaped index in stablehlo conversion for aten.index-like ops (#3322)

For now, at most one dynamic dim of index tensors in
aten.index/aten.index_put-like op is supported.
											
										
										
											2024-08-01 10:41:09 +08:00
+								    Value bcastVal;
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    RankedTensorType reshapeType =
 								        RankedTensorType::get(expandShape, indexElemTy);
-												[stablehlo] support dynamic-shaped index in stablehlo conversion for aten.index-like ops (#3322)

For now, at most one dynamic dim of index tensors in
aten.index/aten.index_put-like op is supported.
											
										
										
											2024-08-01 10:41:09 +08:00
+								    if (allIndexStaticShape) {
 								      bcastVal = hlo::promoteAndBroadcast(rewriter, indexTensor, bcastIndexType,
 								                                          std::nullopt);
 								      bcastVal = rewriter.create<stablehlo::ReshapeOp>(op->getLoc(),
 								                                                       reshapeType, bcastVal);
 								    } else {
 								      bcastVal = hlo::promoteAndBroadcast(rewriter, indexTensor, bcastIndexType,
 								                                          bcastSizeTensor);
 								      auto bcastValShapeTensorVec =
 								          *hlo::getDimSizesOfTensor(rewriter, op, bcastVal, dimSizeIndexBits);
 								      bcastValShapeTensorVec.push_back(rewriter.create<mlir::arith::ConstantOp>(
 								          op->getLoc(), rewriter.getIntegerAttr(
 								                            rewriter.getIntegerType(dimSizeIndexBits), 1)));
 								      Value bcastValShapeTensor = rewriter
 								                                      .create<tensor::FromElementsOp>(
 								                                          op->getLoc(), bcastValShapeTensorVec)
 								                                      .getResult();
 								      bcastVal = rewriter.create<stablehlo::DynamicReshapeOp>(
 								          op->getLoc(), reshapeType, bcastVal, bcastValShapeTensor);
 								    }
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    broadcastedIndices.push_back(bcastVal);
 								  }
 								  // Step 2: concat index tensors at a unsqueezed -1 dimension.
 								  Value finalIndexTensor = broadcastedIndices[0];
 								  if (broadcastedIndices.size() > 1) {
 								    RankedTensorType concatTy = RankedTensorType::get(concatShape, indexElemTy);
 								    finalIndexTensor = rewriter.create<stablehlo::ConcatenateOp>(
 								        op->getLoc(), concatTy, ValueRange(broadcastedIndices),
 								        concatShape.size() - 1);
 								  }
 								  return finalIndexTensor;
 								}
 								} // namespace
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								// Ref:
 								// https://pytorch.org/docs/stable/generated/torch.nn.functional.embedding.html
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								// padding_idx (int, optional)
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								//  – If specified, the entries at padding_idx do not contribute to the
 								//  gradient; therefore, the embedding vector at padding_idx is not updated
 								//  during training, i.e. it remains as a fixed “pad”.
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								// scale_grad_by_freq (boolean, optional)
 								//  – If given, this will scale gradients by the inverse of frequency of the
 								//  words in the mini-batch. Default False.
 								// sparse (bool, optional)
 								//  – If True, gradient w.r.t. weight matrix will be a sparse tensor.
 								template <>
 								LogicalResult ConvertAtenOp<AtenEmbeddingOp>::matchAndRewrite(
 								    AtenEmbeddingOp op, OpAdaptor adaptor,
 								    ConversionPatternRewriter &rewriter) const {
-												build: update llvm tag to 798fa4b4 (#1684)

- Support for non-prefixed accessors has been removed. See:
  https://reviews.llvm.org/D136727
- Rename `operands` to `methodOperands` in `prim.CallMethod` since the
  name `operands` overlaps with a builtin method name. See:
  https://reviews.llvm.org/D136727
- Add passes in refbackend to lower memref.subview. See:
  https://reviews.llvm.org/D136377
- Replace `CopyToValueTensorOps` first in `RewriteViewLikeSubgraph` in
  maximize-value-semantics.

  The current implementation of the `RewriteViewLikeSubgraph` pass in
  maximize-value-semantics creates temporarily invalid IR. In
  particular, given a forward slice starting from a
  `CopyToNonValueTensorOp` and ending in `CopyToValueTensorOp`s, the
  pass first replaces all uses of the `CopyToNonValueTensorOp` with
  its operand, which results in all the `CopyToValueTensorOp` users
  having their operand have type `!torch.vtensor`, which is invalid.

  The correct way to do things is to first replace all the
  `CopyToValueTensorOp`s with their operand, and then replace all uses
  of the `CopyToNonValueTensorOp` with its operand.

  This only started failing now because the generated accessor
  `getOperand` for the `CopyToValueTensorOp` now returns a
  `TypedValue<NonValueTensorType>`, which has an assert checking that
  the value returned is of the expected type.
											
										
										
											2022-12-08 04:20:41 +08:00
+								  auto weight = adaptor.getWeight();
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								  auto weightTy = cast<RankedTensorType>(weight.getType());
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								  if (!weightTy)
 								    return op.emitError("only ranked tensor types are supported");
 								  int64_t padding_idx;
-												build: update llvm tag to 798fa4b4 (#1684)

- Support for non-prefixed accessors has been removed. See:
  https://reviews.llvm.org/D136727
- Rename `operands` to `methodOperands` in `prim.CallMethod` since the
  name `operands` overlaps with a builtin method name. See:
  https://reviews.llvm.org/D136727
- Add passes in refbackend to lower memref.subview. See:
  https://reviews.llvm.org/D136377
- Replace `CopyToValueTensorOps` first in `RewriteViewLikeSubgraph` in
  maximize-value-semantics.

  The current implementation of the `RewriteViewLikeSubgraph` pass in
  maximize-value-semantics creates temporarily invalid IR. In
  particular, given a forward slice starting from a
  `CopyToNonValueTensorOp` and ending in `CopyToValueTensorOp`s, the
  pass first replaces all uses of the `CopyToNonValueTensorOp` with
  its operand, which results in all the `CopyToValueTensorOp` users
  having their operand have type `!torch.vtensor`, which is invalid.

  The correct way to do things is to first replace all the
  `CopyToValueTensorOp`s with their operand, and then replace all uses
  of the `CopyToNonValueTensorOp` with its operand.

  This only started failing now because the generated accessor
  `getOperand` for the `CopyToValueTensorOp` now returns a
  `TypedValue<NonValueTensorType>`, which has an assert checking that
  the value returned is of the expected type.
											
										
										
											2022-12-08 04:20:41 +08:00
+								  if (!matchPattern(op.getPaddingIdx(), m_TorchConstantInt(&padding_idx)))
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								    return rewriter.notifyMatchFailure(
 								        op, "only constant padding_idx is currently supported");
 								  bool scale_grad_by_freq;
-												build: update llvm tag to 798fa4b4 (#1684)

- Support for non-prefixed accessors has been removed. See:
  https://reviews.llvm.org/D136727
- Rename `operands` to `methodOperands` in `prim.CallMethod` since the
  name `operands` overlaps with a builtin method name. See:
  https://reviews.llvm.org/D136727
- Add passes in refbackend to lower memref.subview. See:
  https://reviews.llvm.org/D136377
- Replace `CopyToValueTensorOps` first in `RewriteViewLikeSubgraph` in
  maximize-value-semantics.

  The current implementation of the `RewriteViewLikeSubgraph` pass in
  maximize-value-semantics creates temporarily invalid IR. In
  particular, given a forward slice starting from a
  `CopyToNonValueTensorOp` and ending in `CopyToValueTensorOp`s, the
  pass first replaces all uses of the `CopyToNonValueTensorOp` with
  its operand, which results in all the `CopyToValueTensorOp` users
  having their operand have type `!torch.vtensor`, which is invalid.

  The correct way to do things is to first replace all the
  `CopyToValueTensorOp`s with their operand, and then replace all uses
  of the `CopyToNonValueTensorOp` with its operand.

  This only started failing now because the generated accessor
  `getOperand` for the `CopyToValueTensorOp` now returns a
  `TypedValue<NonValueTensorType>`, which has an assert checking that
  the value returned is of the expected type.
											
										
										
											2022-12-08 04:20:41 +08:00
+								  if (!matchPattern(op.getScaleGradByFreq(),
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								                    m_TorchConstantBool(&scale_grad_by_freq)))
 								    return rewriter.notifyMatchFailure(
 								        op, "only constant scale_grad_by_freq is currently supported");
 								  if (scale_grad_by_freq)
 								    return rewriter.notifyMatchFailure(
 								        op, "scale gradients is currently not supported");
 								  bool sparse;
-												build: update llvm tag to 798fa4b4 (#1684)

- Support for non-prefixed accessors has been removed. See:
  https://reviews.llvm.org/D136727
- Rename `operands` to `methodOperands` in `prim.CallMethod` since the
  name `operands` overlaps with a builtin method name. See:
  https://reviews.llvm.org/D136727
- Add passes in refbackend to lower memref.subview. See:
  https://reviews.llvm.org/D136377
- Replace `CopyToValueTensorOps` first in `RewriteViewLikeSubgraph` in
  maximize-value-semantics.

  The current implementation of the `RewriteViewLikeSubgraph` pass in
  maximize-value-semantics creates temporarily invalid IR. In
  particular, given a forward slice starting from a
  `CopyToNonValueTensorOp` and ending in `CopyToValueTensorOp`s, the
  pass first replaces all uses of the `CopyToNonValueTensorOp` with
  its operand, which results in all the `CopyToValueTensorOp` users
  having their operand have type `!torch.vtensor`, which is invalid.

  The correct way to do things is to first replace all the
  `CopyToValueTensorOp`s with their operand, and then replace all uses
  of the `CopyToNonValueTensorOp` with its operand.

  This only started failing now because the generated accessor
  `getOperand` for the `CopyToValueTensorOp` now returns a
  `TypedValue<NonValueTensorType>`, which has an assert checking that
  the value returned is of the expected type.
											
										
										
											2022-12-08 04:20:41 +08:00
+								  if (!matchPattern(op.getSparse(), m_TorchConstantBool(&sparse)))
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								    return rewriter.notifyMatchFailure(
 								        op, "only constant sparse is currently supported");
 								  if (sparse)
 								    return rewriter.notifyMatchFailure(
 								        op, "sparse gradients is currently not supported");
-												[MHLO] refactor pass configurations (#1315)

Related to https://github.com/llvm/torch-mlir/issues/1227

1. Reduce MHLO #ifdefs
2. Dismiss compilation warnings

											
										
										
											2022-09-01 10:36:02 +08:00
+								  Value output = gatherTensorAlongSingleAxis(
-												build: update llvm tag to 798fa4b4 (#1684)

- Support for non-prefixed accessors has been removed. See:
  https://reviews.llvm.org/D136727
- Rename `operands` to `methodOperands` in `prim.CallMethod` since the
  name `operands` overlaps with a builtin method name. See:
  https://reviews.llvm.org/D136727
- Add passes in refbackend to lower memref.subview. See:
  https://reviews.llvm.org/D136377
- Replace `CopyToValueTensorOps` first in `RewriteViewLikeSubgraph` in
  maximize-value-semantics.

  The current implementation of the `RewriteViewLikeSubgraph` pass in
  maximize-value-semantics creates temporarily invalid IR. In
  particular, given a forward slice starting from a
  `CopyToNonValueTensorOp` and ending in `CopyToValueTensorOp`s, the
  pass first replaces all uses of the `CopyToNonValueTensorOp` with
  its operand, which results in all the `CopyToValueTensorOp` users
  having their operand have type `!torch.vtensor`, which is invalid.

  The correct way to do things is to first replace all the
  `CopyToValueTensorOp`s with their operand, and then replace all uses
  of the `CopyToNonValueTensorOp` with its operand.

  This only started failing now because the generated accessor
  `getOperand` for the `CopyToValueTensorOp` now returns a
  `TypedValue<NonValueTensorType>`, which has an assert checking that
  the value returned is of the expected type.
											
										
										
											2022-12-08 04:20:41 +08:00
+								      rewriter, op, weight, adaptor.getIndices(), 0, options.dimSizeIndexBits);
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								  rewriter.replaceOpWithNewOp<stablehlo::ConvertOp>(
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								      op, getTypeConverter()->convertType(op.getType()), output);
 								  return success();
 								}
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								template <>
 								LogicalResult ConvertAtenOp<AtenEmbeddingBagPaddingIdxOp>::matchAndRewrite(
 								    AtenEmbeddingBagPaddingIdxOp op, OpAdaptor adaptor,
 								    ConversionPatternRewriter &rewriter) const {
 								  Location loc = op->getLoc();
 								  Value weight = adaptor.getWeight();
 								  Value indices = adaptor.getIndices();
 								  Value offsets = adaptor.getOffsets();
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								  auto weightTy = cast<RankedTensorType>(weight.getType());
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								  if (weightTy && weightTy.hasStaticShape() && weightTy.getRank() != 2)
 								    return rewriter.notifyMatchFailure(
 								        op, "weight must be rank 2 tensor with static shapes");
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								  auto indicesTy = cast<RankedTensorType>(indices.getType());
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								  if (indicesTy && indicesTy.hasStaticShape() && indicesTy.getRank() != 1)
 								    return rewriter.notifyMatchFailure(
 								        op, "indices must be a vector with static shapes");
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								  auto offsetsTy = cast<RankedTensorType>(offsets.getType());
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								  if (offsetsTy && offsetsTy.getRank() != 1 && offsetsTy.hasStaticShape() &&
 								      offsetsTy.getShape()[0] == 1)
 								    return rewriter.notifyMatchFailure(
 								        op, "offsets must be a vector with static shape equal to 1");
-												[NFC] Change to *cast instead of .*cast variants (#3405)

Member casts have been deprecated. Changing over a bunch of the member
cast calls to the global templated variants to remove deprecation
warnings.
											
										
										
											2024-05-31 14:45:13 +08:00
+								  if (!isa<Torch::NoneType>(op.getPaddingIdx().getType()))
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								    return rewriter.notifyMatchFailure(
 								        op, "Unimplemented: padding_idx should be none");
-												[NFC] Change to *cast instead of .*cast variants (#3405)

Member casts have been deprecated. Changing over a bunch of the member
cast calls to the global templated variants to remove deprecation
warnings.
											
										
										
											2024-05-31 14:45:13 +08:00
+								  if (!isa<Torch::NoneType>(op.getPerSampleWeights().getType()))
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								    return rewriter.notifyMatchFailure(
 								        op, "Unimplemented: per_sample_weights should be none");
 								  bool includeLastOffset;
 								  if (!matchPattern(op.getIncludeLastOffset(),
 								                    m_TorchConstantBool(&includeLastOffset))) {
 								    return rewriter.notifyMatchFailure(
 								        op, "include_last_offset is expected to be a constant boolean value.");
 								  }
 								  if (includeLastOffset)
 								    return rewriter.notifyMatchFailure(
 								        op, "include_last_offset is currently not supported");
 								  bool scaleGradByFreq;
 								  if (!matchPattern(op.getScaleGradByFreq(),
 								                    m_TorchConstantBool(&scaleGradByFreq)))
 								    return rewriter.notifyMatchFailure(
 								        op, "only constant scale_grad_by_freq is currently supported");
 								  if (scaleGradByFreq)
 								    return rewriter.notifyMatchFailure(
 								        op, "scale gradients is currently not supported");
 								  bool sparse;
 								  if (!matchPattern(op.getSparse(), m_TorchConstantBool(&sparse)))
 								    return rewriter.notifyMatchFailure(
 								        op, "only constant sparse is currently supported");
 								  if (sparse)
 								    return rewriter.notifyMatchFailure(
 								        op, "sparse gradients is currently not supported");
 								  int64_t modeInt;
 								  if (!matchPattern(op.getMode(), m_TorchConstantInt(&modeInt))) {
 								    return rewriter.notifyMatchFailure(
 								        op, "mode is expected to be a constant integer value.");
 								  }
 								  if (modeInt != torch_upstream::EmbeddingBagMode::MODE_SUM) {
 								    return rewriter.notifyMatchFailure(op,
 								                                       "Unimplemented: Mean and Max mode are "
 								                                       "not supported yet for EmbeddingBag.");
 								  }
 								  const auto &options =
 								      ConvertAtenOp<AtenEmbeddingBagPaddingIdxOp>::getOptions();
 								  auto weightDimSizes =
 								      *hlo::getDimSizesOfTensor(rewriter, op, weight, options.dimSizeIndexBits);
 								  auto indicesDimSizes = *hlo::getDimSizesOfTensor(rewriter, op, indices,
 								                                                   options.dimSizeIndexBits);
 								  auto offsetsDimSizes = *hlo::getDimSizesOfTensor(rewriter, op, offsets,
 								                                                   options.dimSizeIndexBits);
 								  Value gatherOutput = gatherTensorAlongSingleAxis(
 								      rewriter, op, weight, indices, 0, options.dimSizeIndexBits);
 								  Type elementTy = weightTy.getElementType();
 								  auto constType = RankedTensorType::get({}, elementTy);
 								  Value initValue =
 								      createInitialValueForGatherScatterOp(op, constType, rewriter);
 								  if (!initValue)
 								    return failure();
 								  auto stablehloReduceOp = rewriter.create<stablehlo::ReduceOp>(
-												Bump stablehlo to openxla/stablehlo@fd52182f76cadb82f2064fe5fc49a4fb4347a826 (#2821)

With the recent LLVM integrate and changes from
https://github.com/llvm/llvm-project/pull/78260, we hit this build error
in Stablehlo (which is quite old).
```
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1020:14: error: no member named 'startRootUpdate' in 'mlir::PatternRewriter'
    rewriter.startRootUpdate(op);
    ~~~~~~~~ ^
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1026:16: error: no member named 'finalizeRootUpdate' in 'mlir::PatternRewriter'
      rewriter.finalizeRootUpdate(op);
      ~~~~~~~~ ^
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1029:16: error: no member named 'cancelRootUpdate' in 'mlir::PatternRewriter'
      rewriter.cancelRootUpdate(op);
      ~~~~~~~~ ^
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1108:14: error: no member named 'updateRootInPlace' in 'mlir::PatternRewriter'
    rewriter.updateRootInPlace(op->getParentOp(), [&]() { return; });
    ~~~~~~~~ ^
4 errors generated.
Target @torch-mlir//:torch-mlir-opt failed to build
```

I'm still puzzled as to how this didn't fail with the CMake merge gating
CI (do we not test Stablehlo builds/tests?). In any case, bumping our
submodule to https://github.com/openxla/stablehlo/pull/1918 fixes it.

It exposes a new failing lit test in TorchToStablehlo though, that I
have looped stablehlo developers into
([here](https://discord.com/channels/999073994483433573/999074539138990131/1201235845391331419)).
```
bazel run @torch-mlir//test/Conversion:TorchToStablehlo/scatter.mlir.test 

...external/torch-mlir/test/Conversion/TorchToStablehlo/scatter.mlir
within split at <stdin>:1 offset :33:8: error: unexpected error: Expects non-empty reduction block for type inference                                                                               
  %0 = torch.aten.scatter.src %arg0, %int0, %arg1, %arg2 : !torch.vtensor<[?,?],si64>, !torch.int, !torch.vtensor<[?,?],si64>, !torch.vtensor<[?,?],si64> -> !torch.vtensor<[?,?],si64>             
       ^                                                                                                                                                                                            
LLVM ERROR: Failed to infer result type(s).               
```

Bazel CI:
https://github.com/sjain-stanford/torch-mlir/actions/runs/7732673480/job/21083102228
											
										
										
											2024-02-01 06:21:17 +08:00
+								      op.getLoc(), gatherOutput, initValue, rewriter.getDenseI64ArrayAttr({0}),
 								      elementTy);
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
 								  Region &region = stablehloReduceOp.getBody();
 								  Block &block = region.emplaceBlock();
 								  auto blockArgumentTy = RankedTensorType::get({}, elementTy);
 								  block.addArgument(blockArgumentTy, op->getLoc());
 								  block.addArgument(blockArgumentTy, op->getLoc());
 								  auto *firstArgument = block.args_begin();
 								  auto secondArgument = block.args_rbegin();
 								  {
 								    OpBuilder::InsertionGuard guard(rewriter);
 								    rewriter.setInsertionPointToStart(&block);
 								    Value addResult = rewriter.create<stablehlo::AddOp>(
 								        op->getLoc(), blockArgumentTy, *firstArgument, *secondArgument);
 								    rewriter.create<stablehlo::ReturnOp>(op->getLoc(), addResult);
 								  }
-												[Stablehlo] use index type as dim size, avoid to generate index_cast (#3526)

For example, the original IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %0 = arith.index_cast %dim : index to i64
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %1 = arith.index_cast %dim_0 : index to i64
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %2 = arith.index_cast %dim_1 : index to i64
    %from_elements = tensor.from_elements %0, %1, %2 : tensor<3xi64>
    %3 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xi64>) -> tensor<?x?x?xf32>
    %4 = stablehlo.dot_general %arg0, %3, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %4 : tensor<?x?x?xf32>
  }
}
```
After using IndexType, the IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %from_elements = tensor.from_elements %dim, %dim_0, %dim_1 : tensor<3xindex>
    %0 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xindex>) -> tensor<?x?x?xf32>
    %1 = stablehlo.dot_general %arg0, %0, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %1 : tensor<?x?x?xf32>
  }
}
```

The benefits of using IndexType on shape tensor:
* simplify the IR, avoid to generate `arith.index_cast`
* let backend compiler have a chance to decide the index width of shape
tensor
* let stablehlo backend have a chance to serialize dynamic shape IR by
[shape_legalize_to_stablehlo](https://github.com/openxla/stablehlo/blob/main/stablehlo/tests/shape_legalize_to_stablehlo.mlir)
											
										
										
											2024-07-07 18:03:03 +08:00
+								  auto outShapeInfo = hlo::getDimIndexOfTensor(rewriter, op, weight);
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								  if (failed(outShapeInfo)) {
 								    return rewriter.notifyMatchFailure(
 								        op, "failed to get dimension sizes of the input");
 								  }
 								  auto outShapeVec = *outShapeInfo;
 								  auto one = rewriter.create<mlir::arith::ConstantOp>(
-												[Stablehlo] use index type as dim size, avoid to generate index_cast (#3526)

For example, the original IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %0 = arith.index_cast %dim : index to i64
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %1 = arith.index_cast %dim_0 : index to i64
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %2 = arith.index_cast %dim_1 : index to i64
    %from_elements = tensor.from_elements %0, %1, %2 : tensor<3xi64>
    %3 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xi64>) -> tensor<?x?x?xf32>
    %4 = stablehlo.dot_general %arg0, %3, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %4 : tensor<?x?x?xf32>
  }
}
```
After using IndexType, the IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %from_elements = tensor.from_elements %dim, %dim_0, %dim_1 : tensor<3xindex>
    %0 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xindex>) -> tensor<?x?x?xf32>
    %1 = stablehlo.dot_general %arg0, %0, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %1 : tensor<?x?x?xf32>
  }
}
```

The benefits of using IndexType on shape tensor:
* simplify the IR, avoid to generate `arith.index_cast`
* let backend compiler have a chance to decide the index width of shape
tensor
* let stablehlo backend have a chance to serialize dynamic shape IR by
[shape_legalize_to_stablehlo](https://github.com/openxla/stablehlo/blob/main/stablehlo/tests/shape_legalize_to_stablehlo.mlir)
											
										
										
											2024-07-07 18:03:03 +08:00
+								      op->getLoc(), rewriter.getIntegerAttr(rewriter.getIndexType(), 1));
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								  outShapeVec[0] = one;
 								  auto outShapeTensor =
 								      rewriter.create<mlir::tensor::FromElementsOp>(op->getLoc(), outShapeVec);
 								  auto resultA = rewriter.create<stablehlo::DynamicReshapeOp>(
 								      loc, getTypeConverter()->convertType(op.getType(0)),
 								      stablehloReduceOp.getResult(0), outShapeTensor);
-												[NFC] Change to *cast instead of .*cast variants (#3405)

Member casts have been deprecated. Changing over a bunch of the member
cast calls to the global templated variants to remove deprecation
warnings.
											
										
										
											2024-05-31 14:45:13 +08:00
+								  RankedTensorType resultType = cast<RankedTensorType>(
 								      getTypeConverter()->convertType(op->getResult(1).getType()));
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								  Value resultB =
 								      createInitialValueForGatherScatterOp(op, resultType, rewriter);
 								  if (!resultB)
 								    return failure();
-												[NFC] Change to *cast instead of .*cast variants (#3405)

Member casts have been deprecated. Changing over a bunch of the member
cast calls to the global templated variants to remove deprecation
warnings.
											
										
										
											2024-05-31 14:45:13 +08:00
+								  resultType = cast<RankedTensorType>(
 								      getTypeConverter()->convertType(op->getResult(2).getType()));
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								  Value resultC =
 								      createInitialValueForGatherScatterOp(op, resultType, rewriter);
 								  if (!resultC)
 								    return failure();
-												[NFC] Change to *cast instead of .*cast variants (#3405)

Member casts have been deprecated. Changing over a bunch of the member
cast calls to the global templated variants to remove deprecation
warnings.
											
										
										
											2024-05-31 14:45:13 +08:00
+								  resultType = cast<RankedTensorType>(
 								      getTypeConverter()->convertType(op->getResult(3).getType()));
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								  Value resultD =
 								      createInitialValueForGatherScatterOp(op, resultType, rewriter);
 								  if (!resultD)
 								    return failure();
 								  rewriter.replaceOp(op, {resultA, resultB, resultC, resultD});
 								  return success();
 								}
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								template <>
 								LogicalResult ConvertAtenOp<AtenIndexSelectOp>::matchAndRewrite(
 								    AtenIndexSelectOp op, OpAdaptor adaptor,
 								    ConversionPatternRewriter &rewriter) const {
-												build: update llvm tag to 798fa4b4 (#1684)

- Support for non-prefixed accessors has been removed. See:
  https://reviews.llvm.org/D136727
- Rename `operands` to `methodOperands` in `prim.CallMethod` since the
  name `operands` overlaps with a builtin method name. See:
  https://reviews.llvm.org/D136727
- Add passes in refbackend to lower memref.subview. See:
  https://reviews.llvm.org/D136377
- Replace `CopyToValueTensorOps` first in `RewriteViewLikeSubgraph` in
  maximize-value-semantics.

  The current implementation of the `RewriteViewLikeSubgraph` pass in
  maximize-value-semantics creates temporarily invalid IR. In
  particular, given a forward slice starting from a
  `CopyToNonValueTensorOp` and ending in `CopyToValueTensorOp`s, the
  pass first replaces all uses of the `CopyToNonValueTensorOp` with
  its operand, which results in all the `CopyToValueTensorOp` users
  having their operand have type `!torch.vtensor`, which is invalid.

  The correct way to do things is to first replace all the
  `CopyToValueTensorOp`s with their operand, and then replace all uses
  of the `CopyToNonValueTensorOp` with its operand.

  This only started failing now because the generated accessor
  `getOperand` for the `CopyToValueTensorOp` now returns a
  `TypedValue<NonValueTensorType>`, which has an assert checking that
  the value returned is of the expected type.
											
										
										
											2022-12-08 04:20:41 +08:00
+								  auto self = adaptor.getSelf();
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								  auto selfTy = cast<RankedTensorType>(self.getType());
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								  if (!selfTy)
 								    return op.emitError("only ranked tensor types are supported");
 								  int64_t dim;
-												build: update llvm tag to 798fa4b4 (#1684)

- Support for non-prefixed accessors has been removed. See:
  https://reviews.llvm.org/D136727
- Rename `operands` to `methodOperands` in `prim.CallMethod` since the
  name `operands` overlaps with a builtin method name. See:
  https://reviews.llvm.org/D136727
- Add passes in refbackend to lower memref.subview. See:
  https://reviews.llvm.org/D136377
- Replace `CopyToValueTensorOps` first in `RewriteViewLikeSubgraph` in
  maximize-value-semantics.

  The current implementation of the `RewriteViewLikeSubgraph` pass in
  maximize-value-semantics creates temporarily invalid IR. In
  particular, given a forward slice starting from a
  `CopyToNonValueTensorOp` and ending in `CopyToValueTensorOp`s, the
  pass first replaces all uses of the `CopyToNonValueTensorOp` with
  its operand, which results in all the `CopyToValueTensorOp` users
  having their operand have type `!torch.vtensor`, which is invalid.

  The correct way to do things is to first replace all the
  `CopyToValueTensorOp`s with their operand, and then replace all uses
  of the `CopyToNonValueTensorOp` with its operand.

  This only started failing now because the generated accessor
  `getOperand` for the `CopyToValueTensorOp` now returns a
  `TypedValue<NonValueTensorType>`, which has an assert checking that
  the value returned is of the expected type.
											
										
										
											2022-12-08 04:20:41 +08:00
+								  if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim)))
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								    return rewriter.notifyMatchFailure(
 								        op, "only constant dim is currently supported");
-												[MLIR][TORCH] Patch up Ops and their lowerings to deal with +ve `dim`

-- In Python we have the concept of negative dimension indexing.
-- We would want to normalize such dimensions to be +ve and within the
   expected range instead.
-- This commit takes care of a few remaining set of Ops and their
   lowerings by applying `toPositiveDim` and `isValidDim` to the
   extracted integer `dim` value.

Signed-off-by: Abhishek Varma <abhishek@nod-labs.com>

											
										
										
											2023-04-07 19:49:35 +08:00
+								  int64_t inputRank = selfTy.getRank();
 								  dim = toPositiveDim(dim, inputRank);
 								  if (!isValidDim(dim, inputRank))
 								    return rewriter.notifyMatchFailure(op, "dim is statically invalid");
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
-												[MHLO] refactor pass configurations (#1315)

Related to https://github.com/llvm/torch-mlir/issues/1227

1. Reduce MHLO #ifdefs
2. Dismiss compilation warnings

											
										
										
											2022-09-01 10:36:02 +08:00
+								  Value output = gatherTensorAlongSingleAxis(
-												build: update llvm tag to 798fa4b4 (#1684)

- Support for non-prefixed accessors has been removed. See:
  https://reviews.llvm.org/D136727
- Rename `operands` to `methodOperands` in `prim.CallMethod` since the
  name `operands` overlaps with a builtin method name. See:
  https://reviews.llvm.org/D136727
- Add passes in refbackend to lower memref.subview. See:
  https://reviews.llvm.org/D136377
- Replace `CopyToValueTensorOps` first in `RewriteViewLikeSubgraph` in
  maximize-value-semantics.

  The current implementation of the `RewriteViewLikeSubgraph` pass in
  maximize-value-semantics creates temporarily invalid IR. In
  particular, given a forward slice starting from a
  `CopyToNonValueTensorOp` and ending in `CopyToValueTensorOp`s, the
  pass first replaces all uses of the `CopyToNonValueTensorOp` with
  its operand, which results in all the `CopyToValueTensorOp` users
  having their operand have type `!torch.vtensor`, which is invalid.

  The correct way to do things is to first replace all the
  `CopyToValueTensorOp`s with their operand, and then replace all uses
  of the `CopyToNonValueTensorOp` with its operand.

  This only started failing now because the generated accessor
  `getOperand` for the `CopyToValueTensorOp` now returns a
  `TypedValue<NonValueTensorType>`, which has an assert checking that
  the value returned is of the expected type.
											
										
										
											2022-12-08 04:20:41 +08:00
+								      rewriter, op, self, adaptor.getIndex(), dim, options.dimSizeIndexBits);
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								  rewriter.replaceOpWithNewOp<stablehlo::ConvertOp>(
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								      op, getTypeConverter()->convertType(op.getType()), output);
 								  return success();
 								}
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								// AtenGatherOp
 								template <>
 								LogicalResult ConvertAtenOp<AtenGatherOp>::matchAndRewrite(
 								    AtenGatherOp op, OpAdaptor adaptor,
 								    ConversionPatternRewriter &rewriter) const {
 								  Location loc = op->getLoc();
-												build: update llvm tag to 798fa4b4 (#1684)

- Support for non-prefixed accessors has been removed. See:
  https://reviews.llvm.org/D136727
- Rename `operands` to `methodOperands` in `prim.CallMethod` since the
  name `operands` overlaps with a builtin method name. See:
  https://reviews.llvm.org/D136727
- Add passes in refbackend to lower memref.subview. See:
  https://reviews.llvm.org/D136377
- Replace `CopyToValueTensorOps` first in `RewriteViewLikeSubgraph` in
  maximize-value-semantics.

  The current implementation of the `RewriteViewLikeSubgraph` pass in
  maximize-value-semantics creates temporarily invalid IR. In
  particular, given a forward slice starting from a
  `CopyToNonValueTensorOp` and ending in `CopyToValueTensorOp`s, the
  pass first replaces all uses of the `CopyToNonValueTensorOp` with
  its operand, which results in all the `CopyToValueTensorOp` users
  having their operand have type `!torch.vtensor`, which is invalid.

  The correct way to do things is to first replace all the
  `CopyToValueTensorOp`s with their operand, and then replace all uses
  of the `CopyToNonValueTensorOp` with its operand.

  This only started failing now because the generated accessor
  `getOperand` for the `CopyToValueTensorOp` now returns a
  `TypedValue<NonValueTensorType>`, which has an assert checking that
  the value returned is of the expected type.
											
										
										
											2022-12-08 04:20:41 +08:00
+								  Value input = adaptor.getSelf();
 								  Value index = adaptor.getIndex();
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								  auto inputType = cast<RankedTensorType>(input.getType());
 								  auto indexType = cast<RankedTensorType>(index.getType());
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								  auto indexElemType = indexType.getElementType();
 								  if (indexType.getRank() != inputType.getRank()) {
 								    return op.emitError("`index` and `input` param should have the same rank");
 								  }
 								  int64_t dim;
-												build: update llvm tag to 798fa4b4 (#1684)

- Support for non-prefixed accessors has been removed. See:
  https://reviews.llvm.org/D136727
- Rename `operands` to `methodOperands` in `prim.CallMethod` since the
  name `operands` overlaps with a builtin method name. See:
  https://reviews.llvm.org/D136727
- Add passes in refbackend to lower memref.subview. See:
  https://reviews.llvm.org/D136377
- Replace `CopyToValueTensorOps` first in `RewriteViewLikeSubgraph` in
  maximize-value-semantics.

  The current implementation of the `RewriteViewLikeSubgraph` pass in
  maximize-value-semantics creates temporarily invalid IR. In
  particular, given a forward slice starting from a
  `CopyToNonValueTensorOp` and ending in `CopyToValueTensorOp`s, the
  pass first replaces all uses of the `CopyToNonValueTensorOp` with
  its operand, which results in all the `CopyToValueTensorOp` users
  having their operand have type `!torch.vtensor`, which is invalid.

  The correct way to do things is to first replace all the
  `CopyToValueTensorOp`s with their operand, and then replace all uses
  of the `CopyToNonValueTensorOp` with its operand.

  This only started failing now because the generated accessor
  `getOperand` for the `CopyToValueTensorOp` now returns a
  `TypedValue<NonValueTensorType>`, which has an assert checking that
  the value returned is of the expected type.
											
										
										
											2022-12-08 04:20:41 +08:00
+								  if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim))) {
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								    return rewriter.notifyMatchFailure(
 								        op, "only constant int `dim` param supported");
 								  }
 								  dim = toPositiveDim(dim, inputType.getRank());
 								  if (!isValidDim(dim, inputType.getRank())) {
 								    return rewriter.notifyMatchFailure(op, "invalid `dim` param detected");
 								  }
 								  bool sparseGrad = false;
-												build: update llvm tag to 798fa4b4 (#1684)

- Support for non-prefixed accessors has been removed. See:
  https://reviews.llvm.org/D136727
- Rename `operands` to `methodOperands` in `prim.CallMethod` since the
  name `operands` overlaps with a builtin method name. See:
  https://reviews.llvm.org/D136727
- Add passes in refbackend to lower memref.subview. See:
  https://reviews.llvm.org/D136377
- Replace `CopyToValueTensorOps` first in `RewriteViewLikeSubgraph` in
  maximize-value-semantics.

  The current implementation of the `RewriteViewLikeSubgraph` pass in
  maximize-value-semantics creates temporarily invalid IR. In
  particular, given a forward slice starting from a
  `CopyToNonValueTensorOp` and ending in `CopyToValueTensorOp`s, the
  pass first replaces all uses of the `CopyToNonValueTensorOp` with
  its operand, which results in all the `CopyToValueTensorOp` users
  having their operand have type `!torch.vtensor`, which is invalid.

  The correct way to do things is to first replace all the
  `CopyToValueTensorOp`s with their operand, and then replace all uses
  of the `CopyToNonValueTensorOp` with its operand.

  This only started failing now because the generated accessor
  `getOperand` for the `CopyToValueTensorOp` now returns a
  `TypedValue<NonValueTensorType>`, which has an assert checking that
  the value returned is of the expected type.
											
										
										
											2022-12-08 04:20:41 +08:00
+								  if (!matchPattern(op.getSparseGrad(), m_TorchConstantBool(&sparseGrad))) {
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								    return rewriter.notifyMatchFailure(
 								        op, "only constant boolean `sparse_grad` param supported");
 								  }
-												[Stablehlo] use index type as dim size, avoid to generate index_cast (#3526)

For example, the original IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %0 = arith.index_cast %dim : index to i64
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %1 = arith.index_cast %dim_0 : index to i64
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %2 = arith.index_cast %dim_1 : index to i64
    %from_elements = tensor.from_elements %0, %1, %2 : tensor<3xi64>
    %3 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xi64>) -> tensor<?x?x?xf32>
    %4 = stablehlo.dot_general %arg0, %3, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %4 : tensor<?x?x?xf32>
  }
}
```
After using IndexType, the IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %from_elements = tensor.from_elements %dim, %dim_0, %dim_1 : tensor<3xindex>
    %0 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xindex>) -> tensor<?x?x?xf32>
    %1 = stablehlo.dot_general %arg0, %0, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %1 : tensor<?x?x?xf32>
  }
}
```

The benefits of using IndexType on shape tensor:
* simplify the IR, avoid to generate `arith.index_cast`
* let backend compiler have a chance to decide the index width of shape
tensor
* let stablehlo backend have a chance to serialize dynamic shape IR by
[shape_legalize_to_stablehlo](https://github.com/openxla/stablehlo/blob/main/stablehlo/tests/shape_legalize_to_stablehlo.mlir)
											
										
										
											2024-07-07 18:03:03 +08:00
+								  auto indexShapeInfo = hlo::getDimIndexOfTensor(rewriter, op, index);
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								  if (failed(indexShapeInfo)) {
 								    return rewriter.notifyMatchFailure(
 								        op, "failed to get dim sizes of `index` param");
 								  }
 								  auto one = rewriter.create<arith::ConstantOp>(
-												[Stablehlo] use index type as dim size, avoid to generate index_cast (#3526)

For example, the original IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %0 = arith.index_cast %dim : index to i64
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %1 = arith.index_cast %dim_0 : index to i64
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %2 = arith.index_cast %dim_1 : index to i64
    %from_elements = tensor.from_elements %0, %1, %2 : tensor<3xi64>
    %3 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xi64>) -> tensor<?x?x?xf32>
    %4 = stablehlo.dot_general %arg0, %3, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %4 : tensor<?x?x?xf32>
  }
}
```
After using IndexType, the IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %from_elements = tensor.from_elements %dim, %dim_0, %dim_1 : tensor<3xindex>
    %0 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xindex>) -> tensor<?x?x?xf32>
    %1 = stablehlo.dot_general %arg0, %0, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %1 : tensor<?x?x?xf32>
  }
}
```

The benefits of using IndexType on shape tensor:
* simplify the IR, avoid to generate `arith.index_cast`
* let backend compiler have a chance to decide the index width of shape
tensor
* let stablehlo backend have a chance to serialize dynamic shape IR by
[shape_legalize_to_stablehlo](https://github.com/openxla/stablehlo/blob/main/stablehlo/tests/shape_legalize_to_stablehlo.mlir)
											
										
										
											2024-07-07 18:03:03 +08:00
+								      loc, rewriter.getIntegerAttr(rewriter.getIndexType(), 1));
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								  auto toConcatIndexShapeValueVec = *indexShapeInfo;
 								  toConcatIndexShapeValueVec.push_back(one);
 								  auto toConcatIndexShape =
 								      rewriter.create<tensor::FromElementsOp>(loc, toConcatIndexShapeValueVec);
 								  auto indexShape = indexType.getShape();
 								  SmallVector<int64_t> toConcatIndexShapeVec(indexShape.begin(),
 								                                             indexShape.end());
 								  toConcatIndexShapeVec.push_back(1);
 								  RankedTensorType toConcatIndexType =
 								      RankedTensorType::get(toConcatIndexShapeVec, indexElemType);
 								  SmallVector<Value> toConcat;
 								  for (int64_t i = 0; i < inputType.getRank(); ++i) {
 								    if (i == dim) {
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								      toConcat.push_back(rewriter.create<stablehlo::DynamicReshapeOp>(
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								          loc, toConcatIndexType, index, toConcatIndexShape));
 								    } else {
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								      toConcat.push_back(rewriter.create<stablehlo::DynamicIotaOp>(
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								          loc, toConcatIndexType, toConcatIndexShape,
 								          rewriter.getI64IntegerAttr(i)));
 								    }
 								  }
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								  auto gatherIndicies = rewriter.create<stablehlo::ConcatenateOp>(
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								      loc, toConcat, static_cast<uint64_t>(inputType.getRank()));
 								  SmallVector<int64_t> sliceSizes(inputType.getRank(), 1);
 								  int64_t indexVecDim = inputType.getRank();
 								  SmallVector<int64_t> collapsedDims;
 								  SmallVector<int64_t> startIndexMap;
 								  for (int64_t i = 0; i < inputType.getRank(); ++i) {
 								    collapsedDims.push_back(i);
 								    startIndexMap.push_back(i);
 								  }
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								  auto dimsAttr = stablehlo::GatherDimensionNumbersAttr::get(
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								      rewriter.getContext(),
 								      /*offsetDims=*/{},
 								      /*collapsedSliceDims=*/collapsedDims,
-												Bump llvm and stablehlo (#3377)

* bump llvm to 1e5f29af81a5f6fda308074f6345b9fba4faa71c
* bump stablehlo to c44d9af8d4879adccf1054cb61a53377ae5898cb
											
										
										
											2024-05-22 23:28:45 +08:00
+								      /*operandBatchingDims=*/{},
 								      /*startIndicesBatchingDims=*/{},
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								      /*startIndexMap=*/startIndexMap,
 								      /*indexVecDim=*/indexVecDim);
-												mhlo: migrate conversion to stablehlo (#1840)

This patch replaces all MHLO operations with their StableHLO
counterparts and adds a validation pass to ensure that no MHLO operations
remain before translating all Stablehlo operations to the MHLO dialect
for further lowering to the Linalg dialect.

This patch also updates all lit tests so that they refer to the
`convert-torch-to-stablehlo` pass and so that they check for StableHLO
operations.
											
										
										
											2023-02-02 21:29:47 +08:00
+								  rewriter.replaceOpWithNewOp<stablehlo::GatherOp>(
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								      op, input, gatherIndicies, dimsAttr,
-												Bump stablehlo to openxla/stablehlo@fd52182f76cadb82f2064fe5fc49a4fb4347a826 (#2821)

With the recent LLVM integrate and changes from
https://github.com/llvm/llvm-project/pull/78260, we hit this build error
in Stablehlo (which is quite old).
```
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1020:14: error: no member named 'startRootUpdate' in 'mlir::PatternRewriter'
    rewriter.startRootUpdate(op);
    ~~~~~~~~ ^
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1026:16: error: no member named 'finalizeRootUpdate' in 'mlir::PatternRewriter'
      rewriter.finalizeRootUpdate(op);
      ~~~~~~~~ ^
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1029:16: error: no member named 'cancelRootUpdate' in 'mlir::PatternRewriter'
      rewriter.cancelRootUpdate(op);
      ~~~~~~~~ ^
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1108:14: error: no member named 'updateRootInPlace' in 'mlir::PatternRewriter'
    rewriter.updateRootInPlace(op->getParentOp(), [&]() { return; });
    ~~~~~~~~ ^
4 errors generated.
Target @torch-mlir//:torch-mlir-opt failed to build
```

I'm still puzzled as to how this didn't fail with the CMake merge gating
CI (do we not test Stablehlo builds/tests?). In any case, bumping our
submodule to https://github.com/openxla/stablehlo/pull/1918 fixes it.

It exposes a new failing lit test in TorchToStablehlo though, that I
have looped stablehlo developers into
([here](https://discord.com/channels/999073994483433573/999074539138990131/1201235845391331419)).
```
bazel run @torch-mlir//test/Conversion:TorchToStablehlo/scatter.mlir.test 

...external/torch-mlir/test/Conversion/TorchToStablehlo/scatter.mlir
within split at <stdin>:1 offset :33:8: error: unexpected error: Expects non-empty reduction block for type inference                                                                               
  %0 = torch.aten.scatter.src %arg0, %int0, %arg1, %arg2 : !torch.vtensor<[?,?],si64>, !torch.int, !torch.vtensor<[?,?],si64>, !torch.vtensor<[?,?],si64> -> !torch.vtensor<[?,?],si64>             
       ^                                                                                                                                                                                            
LLVM ERROR: Failed to infer result type(s).               
```

Bazel CI:
https://github.com/sjain-stanford/torch-mlir/actions/runs/7732673480/job/21083102228
											
										
										
											2024-02-01 06:21:17 +08:00
+								      rewriter.getDenseI64ArrayAttr(sliceSizes));
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								  return success();
 								}
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
+								// AtenSliceScatterOp
 								template <>
 								LogicalResult ConvertAtenOp<AtenSliceScatterOp>::matchAndRewrite(
 								    AtenSliceScatterOp op, OpAdaptor adaptor,
 								    ConversionPatternRewriter &rewriter) const {
 								  if (failed(verifyLinalgCompatibleTypes(op, rewriter)))
 								    return failure();
 								  Location loc = op.getLoc();
-												[build] Update llvm tag to a3f2751f (#2397)

This commit updates the `llvm-project` and `mlir-hlo` submodules to
commits:

llvm-project: a3f2751f782f3cdc6ba4790488ec20163a40ac37
mlir-hlo: 97c7e4b4506c3a2441c923e592833f45da439009

Changes made:

- Rename `getSuccessorEntryOperands` with `getEntrySuccessorOperands`
and remove `operands` from
`getSuccessorRegions` (https://reviews.llvm.org/D157506)
- Make `TypeConverter` a `const` (https://reviews.llvm.org/D157601)
											
										
										
											2023-08-16 00:53:28 +08:00
+								  const TypeConverter *typeConverter = getTypeConverter();
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
 								  auto input = adaptor.getSelf();
-												[Stablehlo] use stablehlo specs lowering AtenSliceScatterOp (#3592)


											
										
										
											2024-08-15 20:06:29 +08:00
+								  RankedTensorType inputType = cast<RankedTensorType>(input.getType());
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
-												[NFC] Change to *cast instead of .*cast variants (#3405)

Member casts have been deprecated. Changing over a bunch of the member
cast calls to the global templated variants to remove deprecation
warnings.
											
										
										
											2024-05-31 14:45:13 +08:00
+								  RankedTensorType resultType = cast<RankedTensorType>(
 								      typeConverter->convertType(op->getResult(0).getType()));
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
-												[Stablehlo] use stablehlo specs lowering AtenSliceScatterOp (#3592)


											
										
										
											2024-08-15 20:06:29 +08:00
+								  int64_t dim;
 								  if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim))) {
 								    return op->emitError("unimplemented: dim is not constant");
 								  }
 								  int64_t inputRank = inputType.getRank();
 								  dim = toPositiveDim(dim, inputRank);
 								  if (!isValidDim(dim, inputRank)) {
 								    return rewriter.notifyMatchFailure(op, "dim is statically invalid");
 								  }
 								  auto inputShape = inputType.getShape();
 								  auto dimSize = inputShape[dim];
 								  int64_t step;
 								  if (!matchPattern(op.getStep(), m_TorchConstantInt(&step))) {
 								    return op->emitError("unimplemented: step is not constant");
 								  }
 								  int64_t start;
 								  if (!matchPattern(op.getStart(), m_TorchConstantInt(&start))) {
 								    return op->emitError("unimplemented: start is not constant");
 								  } else if (ShapedType::isDynamic(dimSize) and start < 0) {
 								    return op->emitError("unimplemented: not support dynamic dimSize when "
 								                         "start smaller than 0.");
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
+								  }
-												[Stablehlo] use stablehlo specs lowering AtenSliceScatterOp (#3592)


											
										
										
											2024-08-15 20:06:29 +08:00
+								  start = start >= 0 ? start : dimSize + start;
 								  int64_t end;
 								  if (!matchPattern(op.getEnd(), m_TorchConstantInt(&end))) {
 								    return op->emitError("unimplemented: end is not constant");
 								  } else if (ShapedType::isDynamic(dimSize) and end < 0) {
 								    return op->emitError(
 								        "unimplemented: not support dynamic dimSize when end smaller than 0.");
 								  }
 								  end = end >= 0 ? end : dimSize + end;
 								  int64_t size = 0;
 								  std::vector<int64_t> indicesVec;
 								  for (int64_t i = start; i < end; i += step) {
 								    indicesVec.push_back(i);
 								    ++size;
 								  }
 								  ArrayRef<int64_t> indices(indicesVec);
 								  std::vector<int64_t> tmp_shape = {size, 1};
 								  ArrayRef<int64_t> shape(tmp_shape);
 								  RankedTensorType constType =
 								      RankedTensorType::get(shape, rewriter.getIntegerType(64));
 								  auto constAttr = DenseElementsAttr::get(
 								      RankedTensorType::get(shape, rewriter.getIntegerType(64)), indices);
 								  auto const_op =
 								      rewriter.create<stablehlo::ConstantOp>(loc, constType, constAttr);
 								  Value scatterIndices = const_op.getResult();
 								  SmallVector<int64_t> updateWindowDims;
 								  for (int64_t i = 0; i < inputType.getRank(); ++i) {
 								    if (i == dim) {
 								      continue;
 								    }
 								    updateWindowDims.push_back(i);
 								  }
 								  auto scatterArgs = stablehlo::ScatterDimensionNumbersAttr::get(
 								      rewriter.getContext(),
 								      /*updateWindowDims=*/updateWindowDims,
 								      /*insertedWindowDims=*/{dim},
 								      /*inputBatchingDims=*/{},
 								      /*scatterIndicesBatchingDims=*/{},
 								      /*scatterDimsToOperandDim=*/{dim},
 								      /*indexVectorDim=*/1);
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
 								  Value src = adaptor.getSrc();
-												[Stablehlo] use stablehlo specs lowering AtenSliceScatterOp (#3592)


											
										
										
											2024-08-15 20:06:29 +08:00
+								  auto scatterOp = rewriter.create<stablehlo::ScatterOp>(
 								      loc, resultType, input, scatterIndices, src, scatterArgs, false, false);
 								  Block &block = scatterOp.getUpdateComputation().emplaceBlock();
 								  auto blockArgumentType =
 								      RankedTensorType::get({}, inputType.getElementType());
 								  block.addArgument(blockArgumentType, loc);
 								  block.addArgument(blockArgumentType, loc);
 								  auto *lhs = block.args_begin();
 								  auto *rhs = std::next(lhs);
 								  {
 								    OpBuilder::InsertionGuard guard(rewriter);
 								    rewriter.setInsertionPointToStart(&block);
 								    rewriter.create<stablehlo::ReturnOp>(loc, *rhs);
 								  }
 								  rewriter.replaceOp(op, scatterOp.getResults());
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
 								  return success();
 								}
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								template <typename AtenOpT, int reduceType>
 								class ConvertAtenScatterOp : public ConvertAtenOp<AtenOpT> {
 								public:
 								  using ConvertAtenOp<AtenOpT>::ConvertAtenOp;
 								  using OpAdaptor = typename AtenOpT::Adaptor;
 								  LogicalResult
 								  matchAndRewrite(AtenOpT op, OpAdaptor adaptor,
 								                  ConversionPatternRewriter &rewriter) const override {
 								    Location loc = op->getLoc();
 								    Value input = adaptor.getSelf();
 								    Value index = adaptor.getIndex();
 								    Value src = adaptor.getSrc();
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								    auto inputType = cast<RankedTensorType>(input.getType());
 								    auto indexType = cast<RankedTensorType>(index.getType());
 								    auto srcType = cast<RankedTensorType>(src.getType());
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    auto indexElemType = indexType.getElementType();
 								    if (indexType.getRank() != inputType.getRank() ||
 								        inputType.getRank() != srcType.getRank()) {
 								      return op.emitError(
 								          "`index`, `input` and `src` param should have the same rank");
 								    }
 								    int64_t dim;
 								    if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim))) {
 								      return rewriter.notifyMatchFailure(
 								          op, "only constant int `dim` param supported");
 								    }
 								    dim = toPositiveDim(dim, inputType.getRank());
 								    if (!isValidDim(dim, inputType.getRank())) {
 								      return rewriter.notifyMatchFailure(op, "invalid `dim` param detected");
-												[Stablehlo] add converter for aten.scatter.src op (#2295)


											
										
										
											2023-07-24 10:14:45 +08:00
+								    }
-												[Stablehlo] use index type as dim size, avoid to generate index_cast (#3526)

For example, the original IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %0 = arith.index_cast %dim : index to i64
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %1 = arith.index_cast %dim_0 : index to i64
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %2 = arith.index_cast %dim_1 : index to i64
    %from_elements = tensor.from_elements %0, %1, %2 : tensor<3xi64>
    %3 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xi64>) -> tensor<?x?x?xf32>
    %4 = stablehlo.dot_general %arg0, %3, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %4 : tensor<?x?x?xf32>
  }
}
```
After using IndexType, the IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %from_elements = tensor.from_elements %dim, %dim_0, %dim_1 : tensor<3xindex>
    %0 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xindex>) -> tensor<?x?x?xf32>
    %1 = stablehlo.dot_general %arg0, %0, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %1 : tensor<?x?x?xf32>
  }
}
```

The benefits of using IndexType on shape tensor:
* simplify the IR, avoid to generate `arith.index_cast`
* let backend compiler have a chance to decide the index width of shape
tensor
* let stablehlo backend have a chance to serialize dynamic shape IR by
[shape_legalize_to_stablehlo](https://github.com/openxla/stablehlo/blob/main/stablehlo/tests/shape_legalize_to_stablehlo.mlir)
											
										
										
											2024-07-07 18:03:03 +08:00
+								    auto indexShapeInfo = hlo::getDimIndexOfTensor(rewriter, op, index);
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    if (failed(indexShapeInfo)) {
 								      return rewriter.notifyMatchFailure(
 								          op, "failed to get dim sizes of `index` param");
 								    }
 								    // slice src tensor to have the same shape bound of index tensor in the
 								    // leading dimensions. PyTorch has guaranteed that src tensor size will not
 								    // be smaller than that of index tensor. REF:
 								    // https://pytorch.org/docs/stable/generated/torch.Tensor.scatter_.html#torch.Tensor.scatter_
 								    auto zero = rewriter.create<arith::ConstantOp>(
-												[Stablehlo] use index type as dim size, avoid to generate index_cast (#3526)

For example, the original IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %0 = arith.index_cast %dim : index to i64
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %1 = arith.index_cast %dim_0 : index to i64
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %2 = arith.index_cast %dim_1 : index to i64
    %from_elements = tensor.from_elements %0, %1, %2 : tensor<3xi64>
    %3 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xi64>) -> tensor<?x?x?xf32>
    %4 = stablehlo.dot_general %arg0, %3, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %4 : tensor<?x?x?xf32>
  }
}
```
After using IndexType, the IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %from_elements = tensor.from_elements %dim, %dim_0, %dim_1 : tensor<3xindex>
    %0 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xindex>) -> tensor<?x?x?xf32>
    %1 = stablehlo.dot_general %arg0, %0, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %1 : tensor<?x?x?xf32>
  }
}
```

The benefits of using IndexType on shape tensor:
* simplify the IR, avoid to generate `arith.index_cast`
* let backend compiler have a chance to decide the index width of shape
tensor
* let stablehlo backend have a chance to serialize dynamic shape IR by
[shape_legalize_to_stablehlo](https://github.com/openxla/stablehlo/blob/main/stablehlo/tests/shape_legalize_to_stablehlo.mlir)
											
										
										
											2024-07-07 18:03:03 +08:00
+								        loc, rewriter.getIntegerAttr(rewriter.getIndexType(), 0));
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    auto one = rewriter.create<arith::ConstantOp>(
-												[Stablehlo] use index type as dim size, avoid to generate index_cast (#3526)

For example, the original IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %0 = arith.index_cast %dim : index to i64
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %1 = arith.index_cast %dim_0 : index to i64
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %2 = arith.index_cast %dim_1 : index to i64
    %from_elements = tensor.from_elements %0, %1, %2 : tensor<3xi64>
    %3 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xi64>) -> tensor<?x?x?xf32>
    %4 = stablehlo.dot_general %arg0, %3, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %4 : tensor<?x?x?xf32>
  }
}
```
After using IndexType, the IR is:
```
module attributes {torch.debug_module_name = "Matmul3D"} {
  func.func @forward(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %c2 = arith.constant 2 : index
    %dim = tensor.dim %arg1, %c0 : tensor<?x?x?xf32>
    %dim_0 = tensor.dim %arg1, %c1 : tensor<?x?x?xf32>
    %dim_1 = tensor.dim %arg1, %c2 : tensor<?x?x?xf32>
    %from_elements = tensor.from_elements %dim, %dim_0, %dim_1 : tensor<3xindex>
    %0 = stablehlo.dynamic_broadcast_in_dim %arg1, %from_elements, dims = [0, 1, 2] : (tensor<?x?x?xf32>, tensor<3xindex>) -> tensor<?x?x?xf32>
    %1 = stablehlo.dot_general %arg0, %0, batching_dims = [0] x [0], contracting_dims = [2] x [1] : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
    return %1 : tensor<?x?x?xf32>
  }
}
```

The benefits of using IndexType on shape tensor:
* simplify the IR, avoid to generate `arith.index_cast`
* let backend compiler have a chance to decide the index width of shape
tensor
* let stablehlo backend have a chance to serialize dynamic shape IR by
[shape_legalize_to_stablehlo](https://github.com/openxla/stablehlo/blob/main/stablehlo/tests/shape_legalize_to_stablehlo.mlir)
											
										
										
											2024-07-07 18:03:03 +08:00
+								        loc, rewriter.getIntegerAttr(rewriter.getIndexType(), 1));
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    SmallVector<Value> sliceIndicies(srcType.getRank(), zero);
 								    SmallVector<Value> sliceStrides(srcType.getRank(), one);
 								    auto sliceIndiciesValue =
 								        rewriter.create<tensor::FromElementsOp>(loc, sliceIndicies);
 								    auto sliceStridesValue =
 								        rewriter.create<tensor::FromElementsOp>(loc, sliceStrides);
 								    auto sliceLimitIndiciesValue =
 								        rewriter.create<tensor::FromElementsOp>(loc, *indexShapeInfo);
 								    auto newSrcType =
 								        RankedTensorType::get(indexType.getShape(), srcType.getElementType());
 								    src = rewriter.create<stablehlo::RealDynamicSliceOp>(
 								        loc, newSrcType, src, sliceIndiciesValue, sliceLimitIndiciesValue,
 								        sliceStridesValue);
 								    // generate scatter indicies for stablehlo::Scatter op.
 								    auto toConcatIndexShapeValueVec = *indexShapeInfo;
 								    toConcatIndexShapeValueVec.push_back(one);
 								    auto toConcatIndexShape = rewriter.create<tensor::FromElementsOp>(
 								        loc, toConcatIndexShapeValueVec);
 								    auto indexShape = indexType.getShape();
 								    SmallVector<int64_t> toConcatIndexShapeVec(indexShape.begin(),
 								                                               indexShape.end());
 								    toConcatIndexShapeVec.push_back(1);
 								    RankedTensorType toConcatIndexType =
 								        RankedTensorType::get(toConcatIndexShapeVec, indexElemType);
 								    SmallVector<Value> toConcat;
 								    for (int64_t i = 0; i < inputType.getRank(); ++i) {
 								      if (i == dim) {
 								        toConcat.push_back(rewriter.create<stablehlo::DynamicReshapeOp>(
 								            loc, toConcatIndexType, index, toConcatIndexShape));
 								      } else {
 								        toConcat.push_back(rewriter.create<stablehlo::DynamicIotaOp>(
 								            loc, toConcatIndexType, toConcatIndexShape,
 								            rewriter.getI64IntegerAttr(i)));
 								      }
 								    }
-												[Stablehlo] add converter for aten.scatter.src op (#2295)


											
										
										
											2023-07-24 10:14:45 +08:00
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    auto scatterIndicies = rewriter.create<stablehlo::ConcatenateOp>(
 								        loc, toConcat, static_cast<uint64_t>(inputType.getRank()));
 								    SmallVector<int64_t> sliceSizes(inputType.getRank(), 1);
 								    // generate ScatterDimensionNumbers for stablehlo::Scatter op.
 								    int64_t indexVecDim = inputType.getRank();
 								    SmallVector<int64_t> scatterDimOperandDimMap;
 								    SmallVector<int64_t> insertedWindowDims;
 								    for (int64_t i = 0; i < inputType.getRank(); ++i) {
 								      scatterDimOperandDimMap.push_back(i);
 								      insertedWindowDims.push_back(i);
 								    }
 								    auto scatterDimensionNumbers = stablehlo::ScatterDimensionNumbersAttr::get(
 								        rewriter.getContext(),
 								        /*updateWindowDims=*/{},
 								        /*insertedWindowDims=*/insertedWindowDims,
-												Bump llvm and stablehlo (#3377)

* bump llvm to 1e5f29af81a5f6fda308074f6345b9fba4faa71c
* bump stablehlo to c44d9af8d4879adccf1054cb61a53377ae5898cb
											
										
										
											2024-05-22 23:28:45 +08:00
+								        /*inputBatchingDims=*/{},
 								        /*scatterIndicesBatchingDims=*/{},
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								        /*scatterDimsToOperandDim=*/scatterDimOperandDimMap,
 								        /*indexVectorDim=*/indexVecDim);
 								    auto stablehloScatterOp = rewriter.create<stablehlo::ScatterOp>(
 								        loc, inputType, input, scatterIndicies, src, scatterDimensionNumbers,
 								        false, false);
 								    // config update computation function: just return the element from src.
 								    Block &block = stablehloScatterOp.getUpdateComputation().emplaceBlock();
 								    // add block arguments
 								    auto blockArgumentType =
 								        RankedTensorType::get({}, inputType.getElementType());
 								    block.addArgument(blockArgumentType, loc);
 								    block.addArgument(blockArgumentType, loc);
 								    auto *lhsArg = block.args_begin();
 								    auto *rhsArg = std::next(lhsArg);
 								    {
 								      OpBuilder::InsertionGuard guard(rewriter);
 								      rewriter.setInsertionPointToStart(&block);
 								      if (reduceType == 0) {
 								        rewriter.create<stablehlo::ReturnOp>(loc, *rhsArg);
 								      } else if (reduceType == 1) {
 								        Value res = rewriter.create<stablehlo::AddOp>(loc, blockArgumentType,
 								                                                      *lhsArg, *rhsArg);
 								        rewriter.create<stablehlo::ReturnOp>(loc, res);
 								      }
 								    }
-												[Stablehlo] add converter for aten.scatter.src op (#2295)


											
										
										
											2023-07-24 10:14:45 +08:00
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    rewriter.replaceOp(op, stablehloScatterOp.getResults());
 								    return success();
-												[Stablehlo] add converter for aten.scatter.src op (#2295)


											
										
										
											2023-07-24 10:14:45 +08:00
+								  }
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								};
-												[Stablehlo] add converter for aten.scatter.src op (#2295)


											
										
										
											2023-07-24 10:14:45 +08:00
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
+								// AtenIndexTensorOp
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								// Convert to StableHlo::GatherOp.
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
+								template <>
-												[Torch Dialect] replace none-index in aten.Index.Tensor's  param by manually generating it (#2344)

* [Torch Dialect] replace none-index in aten.Index.Tensor's  param by manually generating it
Co-authored-by: Jiawei Wu <wujiawei.aml@bytedance.com>
Co-authored-by: Jianzhe Xiao <jianzhe.xiao@bytedance.com>

* minor typo fix

* add new failed e2e tests for ltc

* fix typo

* Address comments

* Add more e2e tests

* add failed e2e tests for LTC

* address comments

* remove decomposition for AtenIndexTensorHackedTwinOp
											
										
										
											2023-08-15 19:36:08 +08:00
+								LogicalResult ConvertAtenOp<AtenIndexTensorHackedTwinOp>::matchAndRewrite(
 								    AtenIndexTensorHackedTwinOp op, OpAdaptor adaptor,
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
+								    ConversionPatternRewriter &rewriter) const {
 								  Location loc = op->getLoc();
 								  Value input = adaptor.getSelf();
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								  auto inputTensorType = cast<RankedTensorType>(input.getType());
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  auto outType =
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								      cast<RankedTensorType>(getTypeConverter()->convertType(op.getType()));
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
+								  Value indexList = op.getIndices();
 								  SmallVector<Value> indicesTorchType;
 								  if (!getListConstructElements(indexList, indicesTorchType))
 								    return op.emitError(
 								        "unimplemented: the tensor list is not from list construct");
 								  auto indexTensors = getTypeConvertedValues(rewriter, loc, getTypeConverter(),
 								                                             indicesTorchType);
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  int maxIndexRank = -1;
-												[stablehlo] fix: enhance torch's index-like op lowering to stablehlo's gather/scatter (#3829)

In torch.index_put like ops, `values` is only required to be
broadcastable to `input[indices]`, rather than exact dimension match.
This patch fixes the problem by add additional
stablehlo.dynamic_broadcast_in_dim before creating stablehlo.scatter op.
BTW, this patch also enhance the `getBroadcastResultShape` utility in
hlo namespace.
											
										
										
											2024-11-05 19:15:11 +08:00
+								  auto gatherIndicesInfo = broadcastAndConcatIndices(
 								      op, rewriter, indexTensors, options.dimSizeIndexBits, maxIndexRank);
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  if (failed(gatherIndicesInfo)) {
 								    return rewriter.notifyMatchFailure(
 								        op, "failed to generate broadcasted indices");
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
+								  }
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  auto gatherIndices = *gatherIndicesInfo;
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  int64_t numIndicesDim = indexTensors.size();
 								  int64_t indexVecDim = maxIndexRank;
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
 								  SmallVector<int64_t> offsetDims;
 								  SmallVector<int64_t> collapsedDims;
 								  SmallVector<int64_t> startIndexMap;
 								  for (int64_t i = 0; i < numIndicesDim; ++i) {
 								    collapsedDims.push_back(i);
 								    startIndexMap.push_back(i);
 								  }
 								  for (int64_t i = numIndicesDim; i < inputTensorType.getRank(); i++) {
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    offsetDims.push_back(i + maxIndexRank - numIndicesDim);
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
+								  }
 								  auto dimsAttr = stablehlo::GatherDimensionNumbersAttr::get(
 								      rewriter.getContext(),
 								      /*offsetDims=*/offsetDims,
 								      /*collapsedSliceDims=*/collapsedDims,
-												Bump llvm and stablehlo (#3377)

* bump llvm to 1e5f29af81a5f6fda308074f6345b9fba4faa71c
* bump stablehlo to c44d9af8d4879adccf1054cb61a53377ae5898cb
											
										
										
											2024-05-22 23:28:45 +08:00
+								      /*operandBatchingDims=*/{},
 								      /*startIndicesBatchingDims=*/{},
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
+								      /*startIndexMap=*/startIndexMap,
 								      /*indexVecDim=*/indexVecDim);
 								  SmallVector<int64_t> sliceSizes;
 								  auto inputShape = makeShapeTorchCompatible(inputTensorType.getShape());
 								  for (int64_t i = 0; i < inputTensorType.getRank(); ++i) {
 								    if (i < numIndicesDim) {
 								      sliceSizes.push_back(1);
 								    } else {
 								      sliceSizes.push_back(inputShape[i]);
 								    }
 								  }
 								  rewriter.replaceOpWithNewOp<stablehlo::GatherOp>(
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								      op, outType, input, gatherIndices, dimsAttr,
-												Bump stablehlo to openxla/stablehlo@fd52182f76cadb82f2064fe5fc49a4fb4347a826 (#2821)

With the recent LLVM integrate and changes from
https://github.com/llvm/llvm-project/pull/78260, we hit this build error
in Stablehlo (which is quite old).
```
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1020:14: error: no member named 'startRootUpdate' in 'mlir::PatternRewriter'
    rewriter.startRootUpdate(op);
    ~~~~~~~~ ^
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1026:16: error: no member named 'finalizeRootUpdate' in 'mlir::PatternRewriter'
      rewriter.finalizeRootUpdate(op);
      ~~~~~~~~ ^
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1029:16: error: no member named 'cancelRootUpdate' in 'mlir::PatternRewriter'
      rewriter.cancelRootUpdate(op);
      ~~~~~~~~ ^
external/stablehlo/stablehlo/transforms/StablehloRefineShapes.cpp:1108:14: error: no member named 'updateRootInPlace' in 'mlir::PatternRewriter'
    rewriter.updateRootInPlace(op->getParentOp(), [&]() { return; });
    ~~~~~~~~ ^
4 errors generated.
Target @torch-mlir//:torch-mlir-opt failed to build
```

I'm still puzzled as to how this didn't fail with the CMake merge gating
CI (do we not test Stablehlo builds/tests?). In any case, bumping our
submodule to https://github.com/openxla/stablehlo/pull/1918 fixes it.

It exposes a new failing lit test in TorchToStablehlo though, that I
have looped stablehlo developers into
([here](https://discord.com/channels/999073994483433573/999074539138990131/1201235845391331419)).
```
bazel run @torch-mlir//test/Conversion:TorchToStablehlo/scatter.mlir.test 

...external/torch-mlir/test/Conversion/TorchToStablehlo/scatter.mlir
within split at <stdin>:1 offset :33:8: error: unexpected error: Expects non-empty reduction block for type inference                                                                               
  %0 = torch.aten.scatter.src %arg0, %int0, %arg1, %arg2 : !torch.vtensor<[?,?],si64>, !torch.int, !torch.vtensor<[?,?],si64>, !torch.vtensor<[?,?],si64> -> !torch.vtensor<[?,?],si64>             
       ^                                                                                                                                                                                            
LLVM ERROR: Failed to infer result type(s).               
```

Bazel CI:
https://github.com/sjain-stanford/torch-mlir/actions/runs/7732673480/job/21083102228
											
										
										
											2024-02-01 06:21:17 +08:00
+								      rewriter.getDenseI64ArrayAttr(sliceSizes));
-												[Stablehlo] Add `AtenIndexTensor` StableHlo support (#2107)

* Add AtenIndexTensor StableHlo support

* clean up

* Empty commit, trigger test

* try to debug hanging test

* fix segfulat

* fix bad include

---------

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-05-25 02:13:57 +08:00
+								  return success();
 								}
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								// AtenIndexPutHackedTwinOP
 								// Convert to stablehlo::ScatterOp
 								template <>
 								LogicalResult ConvertAtenOp<AtenIndexPutHackedTwinOp>::matchAndRewrite(
 								    AtenIndexPutHackedTwinOp op, OpAdaptor adaptor,
 								    ConversionPatternRewriter &rewriter) const {
 								  Location loc = op->getLoc();
 								  Value input = adaptor.getSelf();
 								  Value values = adaptor.getValues();
 								  auto outType =
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								      cast<RankedTensorType>(getTypeConverter()->convertType(op.getType()));
 								  auto inputType = cast<RankedTensorType>(input.getType());
-												[stablehlo] fix: enhance torch's index-like op lowering to stablehlo's gather/scatter (#3829)

In torch.index_put like ops, `values` is only required to be
broadcastable to `input[indices]`, rather than exact dimension match.
This patch fixes the problem by add additional
stablehlo.dynamic_broadcast_in_dim before creating stablehlo.scatter op.
BTW, this patch also enhance the `getBroadcastResultShape` utility in
hlo namespace.
											
										
										
											2024-11-05 19:15:11 +08:00
+								  auto inputShape = inputType.getShape();
 								  auto inputRank = inputType.getRank();
-												Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243)

Like #3130, gradually replace the deprecated code

https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated
											
										
										
											2024-04-28 05:00:56 +08:00
+								  auto valuesType = cast<RankedTensorType>(values.getType());
-												[stablehlo]: fix aten.index_put_hacked_twin lowering to StableHlo (#3572)

Current StableHlo lowering strategy works well when `src` tensor's rank
is no bigger than `dst` tensor's. The new patch make it succeed in other
cases. The following is an example.
```
%190 = torch.prim.ListConstruct %arg4 : (!torch.vtensor<[1,1024],si64>) -> !torch.list<vtensor>
%191 = torch.aten.index_put.hacked_twin %189, %190, %186, %true : !torch.vtensor<[1024,768],f32>, !torch.list<vtensor>, !torch.vtensor<[1,1024,768],f32>, !torch.bool -> !torch.vtensor<[1024,768],f32>
```
											
										
										
											2024-07-31 22:33:57 +08:00
+								  int64_t valueRank = valuesType.getRank();
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  auto valuesShape = valuesType.getShape();
 								  bool accumulate;
 								  if (!matchPattern(op.getAccumulate(), m_TorchConstantBool(&accumulate))) {
 								    return rewriter.notifyMatchFailure(op,
 								                                       "accumulate should be a constant bool");
 								  }
 								  Value indexList = op.getIndices();
 								  SmallVector<Value> indicesTorchType;
 								  if (!getListConstructElements(indexList, indicesTorchType))
 								    return op.emitError(
 								        "unimplemented: the tensor list is not from list construct");
-												[stablehlo]: fix aten.index_put_hacked_twin lowering to StableHlo (#3572)

Current StableHlo lowering strategy works well when `src` tensor's rank
is no bigger than `dst` tensor's. The new patch make it succeed in other
cases. The following is an example.
```
%190 = torch.prim.ListConstruct %arg4 : (!torch.vtensor<[1,1024],si64>) -> !torch.list<vtensor>
%191 = torch.aten.index_put.hacked_twin %189, %190, %186, %true : !torch.vtensor<[1024,768],f32>, !torch.list<vtensor>, !torch.vtensor<[1,1024,768],f32>, !torch.bool -> !torch.vtensor<[1024,768],f32>
```
											
										
										
											2024-07-31 22:33:57 +08:00
+								  int64_t indexCnt = indicesTorchType.size();
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
 								  auto indexTensors = getTypeConvertedValues(rewriter, loc, getTypeConverter(),
 								                                             indicesTorchType);
 								  int maxIndexRank = -1;
-												[stablehlo] fix: enhance torch's index-like op lowering to stablehlo's gather/scatter (#3829)

In torch.index_put like ops, `values` is only required to be
broadcastable to `input[indices]`, rather than exact dimension match.
This patch fixes the problem by add additional
stablehlo.dynamic_broadcast_in_dim before creating stablehlo.scatter op.
BTW, this patch also enhance the `getBroadcastResultShape` utility in
hlo namespace.
											
										
										
											2024-11-05 19:15:11 +08:00
+								  auto scatterIndicesInfo = broadcastAndConcatIndices(
 								      op, rewriter, indexTensors, options.dimSizeIndexBits, maxIndexRank);
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  if (failed(scatterIndicesInfo)) {
 								    return rewriter.notifyMatchFailure(
 								        op, "failed to generate broadcasted indices");
 								  }
 								  auto scatterIndices = *scatterIndicesInfo;
-												[stablehlo] fix: enhance torch's index-like op lowering to stablehlo's gather/scatter (#3829)

In torch.index_put like ops, `values` is only required to be
broadcastable to `input[indices]`, rather than exact dimension match.
This patch fixes the problem by add additional
stablehlo.dynamic_broadcast_in_dim before creating stablehlo.scatter op.
BTW, this patch also enhance the `getBroadcastResultShape` utility in
hlo namespace.
											
										
										
											2024-11-05 19:15:11 +08:00
+								  // broadcast `values` tensor to match expectedValuesShape.
 								  SmallVector<int64_t> scatterIndicesDims;
 								  for (int64_t i = 0; i < maxIndexRank; ++i) {
 								    scatterIndicesDims.push_back(i);
 								  }
 								  auto expectedValuesShapeTensorInfo =
 								      hlo::getDimSizesOfTensor(rewriter, op, scatterIndices, scatterIndicesDims,
 								                               options.dimSizeIndexBits);
 								  if (failed(expectedValuesShapeTensorInfo)) {
 								    return rewriter.notifyMatchFailure(
 								        op, "failed to get shape of broadcasted indices");
 								  }
 								  auto expectedValuesShapeTensors = *expectedValuesShapeTensorInfo;
 								  SmallVector<int64_t> trailingInputDims;
 								  for (int64_t i = indexCnt; i < inputRank; ++i) {
 								    trailingInputDims.push_back(i);
 								  }
 								  auto trailingInputShapeTensorInfo = hlo::getDimSizesOfTensor(
 								      rewriter, op, input, trailingInputDims, options.dimSizeIndexBits);
 								  if (failed(trailingInputShapeTensorInfo)) {
 								    return rewriter.notifyMatchFailure(op, "failed to get shape of input");
 								  }
 								  expectedValuesShapeTensors.append((*trailingInputShapeTensorInfo).begin(),
 								                                    (*trailingInputShapeTensorInfo).end());
 								  llvm::ArrayRef<int64_t> scatterIndicesShape =
 								      (cast<RankedTensorType>(scatterIndices.getType())).getShape();
 								  SmallVector<int64_t> expectedValuesShape(
 								      scatterIndicesShape.begin(), scatterIndicesShape.begin() + maxIndexRank);
 								  for (int64_t i = indexCnt; i < inputRank; i++) {
 								    expectedValuesShape.push_back(inputShape[i]);
 								  }
 								  valuesType =
 								      RankedTensorType::get(expectedValuesShape, valuesType.getElementType());
 								  values =
 								      hlo::promoteAndBroadcast(rewriter, values, valuesType,
 								                               rewriter
 								                                   .create<tensor::FromElementsOp>(
 								                                       op->getLoc(), expectedValuesShapeTensors)
 								                                   .getResult());
 								  valueRank = valuesType.getRank();
 								  valuesShape = valuesType.getShape();
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  // create stablehlo::ScatterOp
 								  int64_t indexVecDim = maxIndexRank;
 								  SmallVector<int64_t> scatterDimOperandDimMap;
 								  SmallVector<int64_t> insertedWindowDims;
 								  SmallVector<int64_t> updateWindowDims;
-												[stablehlo]: fix aten.index_put_hacked_twin lowering to StableHlo (#3572)

Current StableHlo lowering strategy works well when `src` tensor's rank
is no bigger than `dst` tensor's. The new patch make it succeed in other
cases. The following is an example.
```
%190 = torch.prim.ListConstruct %arg4 : (!torch.vtensor<[1,1024],si64>) -> !torch.list<vtensor>
%191 = torch.aten.index_put.hacked_twin %189, %190, %186, %true : !torch.vtensor<[1024,768],f32>, !torch.list<vtensor>, !torch.vtensor<[1,1024,768],f32>, !torch.bool -> !torch.vtensor<[1024,768],f32>
```
											
										
										
											2024-07-31 22:33:57 +08:00
+								  for (int64_t i = 0; i < indexCnt; ++i) {
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    scatterDimOperandDimMap.push_back(i);
 								    insertedWindowDims.push_back(i);
 								  }
-												[stablehlo]: fix aten.index_put_hacked_twin lowering to StableHlo (#3572)

Current StableHlo lowering strategy works well when `src` tensor's rank
is no bigger than `dst` tensor's. The new patch make it succeed in other
cases. The following is an example.
```
%190 = torch.prim.ListConstruct %arg4 : (!torch.vtensor<[1,1024],si64>) -> !torch.list<vtensor>
%191 = torch.aten.index_put.hacked_twin %189, %190, %186, %true : !torch.vtensor<[1024,768],f32>, !torch.list<vtensor>, !torch.vtensor<[1,1024,768],f32>, !torch.bool -> !torch.vtensor<[1024,768],f32>
```
											
										
										
											2024-07-31 22:33:57 +08:00
+								  for (int64_t i = maxIndexRank; i < valueRank; ++i) {
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								    updateWindowDims.push_back(i);
 								  }
 								  auto scatterDimensionNumbers = stablehlo::ScatterDimensionNumbersAttr::get(
 								      rewriter.getContext(),
 								      /*updateWindowDims=*/updateWindowDims,
 								      /*insertedWindowDims=*/insertedWindowDims,
-												Bump llvm and stablehlo (#3377)

* bump llvm to 1e5f29af81a5f6fda308074f6345b9fba4faa71c
* bump stablehlo to c44d9af8d4879adccf1054cb61a53377ae5898cb
											
										
										
											2024-05-22 23:28:45 +08:00
+								      /*inputBatchingDims=*/{},
 								      /*scatterIndicesBatchingDims=*/{},
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								      /*scatterDimsToOperandDim=*/scatterDimOperandDimMap,
 								      /*indexVectorDim=*/indexVecDim);
 								  auto stablehloScatterOp = rewriter.create<stablehlo::ScatterOp>(
 								      loc, outType, input, scatterIndices, values, scatterDimensionNumbers,
 								      false, false);
 								  // configure update computation function.
 								  Block &block = stablehloScatterOp.getUpdateComputation().emplaceBlock();
 								  // add block arguments
 								  auto blockArgumentType =
 								      RankedTensorType::get({}, inputType.getElementType());
 								  block.addArgument(blockArgumentType, loc);
 								  block.addArgument(blockArgumentType, loc);
 								  auto *lhsArg = block.args_begin();
 								  auto *rhsArg = std::next(lhsArg);
 								  {
 								    OpBuilder::InsertionGuard guard(rewriter);
 								    rewriter.setInsertionPointToStart(&block);
 								    if (!accumulate) {
 								      rewriter.create<stablehlo::ReturnOp>(loc, *rhsArg);
 								    } else {
 								      Value out = rewriter.create<stablehlo::AddOp>(loc, blockArgumentType,
 								                                                    *lhsArg, *rhsArg);
 								      rewriter.create<stablehlo::ReturnOp>(loc, out);
 								    }
 								  }
 								  rewriter.replaceOp(op, stablehloScatterOp.getResults());
 								  return success();
 								}
-												[Stablehlo] Add lowering of GridSampler Op (#3084)

Inspired by PyTorch decompositions.py.
See
https://github.com/pytorch/pytorch/blob/ec58f1f74ebcec744d2ab90ad34abd09c1018e92/torch/_decomp/decompositions.py#L3923-L4086
Only support paddingMode=0 or 1 and interpolationMode=0 or 1
											
										
										
											2024-06-07 16:06:07 +08:00
+								// AtenGridSamplerOp
 								// See
 								// https://github.com/pytorch/pytorch/blob/ec58f1f74ebcec744d2ab90ad34abd09c1018e92/torch/_decomp/decompositions.py#L3923-L4086
 								namespace {
 								template <typename T>
 								static Value getConstantLike(OpBuilder &b, Location loc, T constant,
 								                             Value val) {
 								  Type ty = getElementTypeOrSelf(val.getType());
 								  auto getAttr = [&]() -> Attribute {
 								    if (isa<mlir::IntegerType>(ty))
 								      return b.getIntegerAttr(ty, constant);
 								    if (isa<mlir::FloatType>(ty))
 								      return b.getFloatAttr(ty, constant);
 								    if (auto complexTy = dyn_cast<mlir::ComplexType>(ty))
 								      return complex::NumberAttr::get(complexTy, constant, 0);
 								    llvm_unreachable("unhandled element type");
 								  };
 								  return b.create<mlir::chlo::ConstantLikeOp>(loc, cast<TypedAttr>(getAttr()),
 								                                              val);
 								}
 								template <typename T>
 								static Value getConstTensor(ConversionPatternRewriter &rewriter, Operation *op,
 								                            ArrayRef<T> values, ArrayRef<int64_t> shape,
 								                            Type ty) {
 								  Location loc = op->getLoc();
 								  RankedTensorType valueType = RankedTensorType::get(shape, ty);
 								  auto valueAttr = DenseElementsAttr::get(valueType, values);
 								  return rewriter.create<stablehlo::ConstantOp>(loc, valueType, valueAttr);
 								}
 								template <typename T>
 								static Value getConstScalarTensor(ConversionPatternRewriter &rewriter,
 								                                  Operation *op, T value, Type ty) {
 								  return getConstTensor(rewriter, op, ArrayRef<T>{value}, {}, ty);
 								}
 								// Helper function to lower AtenGridSamplerOp.
 								static Value unnormalize(ConversionPatternRewriter &rewriter, Operation *op,
 								                         Value coords, int64_t size, Type elemTy,
 								                         bool alignCorners) {
 								  Location loc = op->getLoc();
 								  APFloat pointFive(cast<mlir::FloatType>(elemTy).getFloatSemantics(), "0.5");
 								  APFloat sizeFloat =
 								      APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), size);
 								  APFloat one = APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 1);
 								  APFloat zero = APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 0);
 								  // double mul = alignCorners ? (size * 0.5 - 0.5) : (size * 0.5);
 								  // double ofs = size * 0.5 - 0.5;
 								  APFloat mul =
 								      alignCorners ? sizeFloat * pointFive - pointFive : sizeFloat * pointFive;
 								  APFloat ofs = sizeFloat * pointFive - pointFive;
 								  Value constMul = getConstScalarTensor(rewriter, op, mul, elemTy);
 								  Value constOfs = getConstScalarTensor(rewriter, op, ofs, elemTy);
 								  // use chlo::BroadcastMulOp to multiply constMul with coords.
 								  DenseI64ArrayAttr bcastDimensions;
 								  Value mulResult = rewriter.create<chlo::BroadcastMulOp>(loc, coords, constMul,
 								                                                          bcastDimensions);
 								  // use chlo::BroadcastAddOp to add constOfs to mulResult.
 								  Value result = rewriter.create<chlo::BroadcastAddOp>(loc, mulResult, constOfs,
 								                                                       bcastDimensions);
 								  return result;
 								}
 								static Value computeCoordinates(ConversionPatternRewriter &rewriter,
 								                                Operation *op, Value coords, int64_t size,
 								                                Type elemTy, int64_t padding_mode) {
 								  // TODO: add support for padding_mode 1 and 2.
 								  return coords;
 								}
 								static Value computeSourceIndex(ConversionPatternRewriter &rewriter,
 								                                Operation *op, Value coords, int64_t size,
 								                                Type elemTy, int64_t padding_mode,
 								                                bool alignCorners) {
 								  Value coordsUn =
 								      unnormalize(rewriter, op, coords, size, elemTy, alignCorners);
 								  return computeCoordinates(rewriter, op, coordsUn, size, elemTy, padding_mode);
 								}
 								// def in_bounds_cond(xs: Tensor, ys: Tensor) -> Tensor:
 								//     return torch.logical_and(
 								//         0 <= xs, torch.logical_and(xs < iW, torch.logical_and(0 <= ys, ys
 								//         < iH))
 								//     )
 								static Value inBoundsCond(ConversionPatternRewriter &rewriter, Operation *op,
 								                          Value xs, Value ys, int64_t ih, int64_t iw,
 								                          Type elemTy) {
 								  Location loc = op->getLoc();
 								  APFloat zeroFloat =
 								      APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 0);
 								  Value zero = getConstScalarTensor(rewriter, op, zeroFloat, elemTy);
 								  APFloat iwFloat =
 								      APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), iw);
 								  APFloat ihFloat =
 								      APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), ih);
 								  Value iwFloatValue = getConstScalarTensor(rewriter, op, iwFloat, elemTy);
 								  Value ihFloatValue = getConstScalarTensor(rewriter, op, ihFloat, elemTy);
 								  chlo::ComparisonTypeAttr compareTypeAttr = chlo::ComparisonTypeAttr::get(
 								      rewriter.getContext(), chlo::ComparisonType::FLOAT);
 								  chlo::ComparisonDirectionAttr compareLTAttr =
 								      chlo::ComparisonDirectionAttr::get(rewriter.getContext(),
 								                                         chlo::ComparisonDirection::LT);
 								  chlo::ComparisonDirectionAttr compareGEAttr =
 								      chlo::ComparisonDirectionAttr::get(rewriter.getContext(),
 								                                         chlo::ComparisonDirection::GE);
 								  DenseI64ArrayAttr bcastDimensions;
 								  Value cond1 = rewriter.create<chlo::BroadcastCompareOp>(
 								      loc, xs, zero, bcastDimensions, compareGEAttr, compareTypeAttr);
 								  Value cond2 = rewriter.create<chlo::BroadcastCompareOp>(
 								      loc, xs, iwFloatValue, bcastDimensions, compareLTAttr, compareTypeAttr);
 								  Value cond3 = rewriter.create<chlo::BroadcastCompareOp>(
 								      loc, ys, zero, bcastDimensions, compareGEAttr, compareTypeAttr);
 								  Value cond4 = rewriter.create<chlo::BroadcastCompareOp>(
 								      loc, ys, ihFloatValue, bcastDimensions, compareLTAttr, compareTypeAttr);
 								  Value cond5 =
 								      rewriter.create<chlo::BroadcastAndOp>(loc, cond1, cond2, bcastDimensions);
 								  Value cond6 =
 								      rewriter.create<chlo::BroadcastAndOp>(loc, cond3, cond4, bcastDimensions);
 								  return rewriter.create<chlo::BroadcastAndOp>(loc, cond5, cond6,
 								                                               bcastDimensions);
 								}
 								// def clip(xs: Tensor, ys: Tensor, ws: Tensor) -> TensorSequenceType:
 								//     cond = in_bounds_cond(xs, ys)
 								//     # To clip to inside valid coordinates, we map the coordinates
 								//     # to (x, y) = (0, 0) and also set the weight to 0
 								//     # We also change the shape of the tensor to the appropriate one for
 								//     # broadcasting with N_idx, C_idx for the purposes of advanced
 								//     indexing c = C if _expand_grid else 1
 								//     return tuple(
 								//         torch.where(cond, t, 0).view(N, c, oH, oW)
 								//         for t in (xs.to(dtype=torch.int64), ys.to(dtype=torch.int64), ws)
 								//     )
 								SmallVector<Value> clip(ConversionPatternRewriter &rewriter, Operation *op,
 								                        Value xs, Value ys, Value ws, int64_t N, int64_t oH,
 								                        int64_t oW, int64_t iH, int64_t iW, Type elemTy) {
 								  Location loc = op->getLoc();
 								  auto indexElemTy = rewriter.getI64Type();
 								  auto indexTy = RankedTensorType::get(mlir::ArrayRef<int64_t>{1}, indexElemTy);
 								  Value zeroIntValue = rewriter.create<stablehlo::ConstantOp>(
 								      loc, indexTy, DenseIntElementsAttr::get(indexTy, ArrayRef<int64_t>{0}));
 								  APFloat zeroAPFloat =
 								      APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 0);
 								  Value zeroFloatValue =
 								      getConstScalarTensor(rewriter, op, zeroAPFloat, elemTy);
 								  Value cond = inBoundsCond(rewriter, op, xs, ys, iH, iW, elemTy);
 								  Value xsInt = rewriter.create<stablehlo::ConvertOp>(loc, xs, indexElemTy);
 								  Value ysInt = rewriter.create<stablehlo::ConvertOp>(loc, ys, indexElemTy);
 								  Value selectXs = rewriter.create<chlo::BroadcastSelectOp>(
 								      loc, ArrayRef<Value>{cond, xsInt, zeroIntValue});
 								  Value selectYs = rewriter.create<chlo::BroadcastSelectOp>(
 								      loc, ArrayRef<Value>{cond, ysInt, zeroIntValue});
 								  Value selectWs = rewriter.create<chlo::BroadcastSelectOp>(
 								      loc, ArrayRef<Value>{cond, ws, zeroFloatValue});
 								  SmallVector<int64_t> sizes = {N, 1, oH, oW};
 								  Value reshapedXs = rewriter.create<stablehlo::ReshapeOp>(
 								      loc, RankedTensorType::get(sizes, indexElemTy), selectXs);
 								  Value reshapedYs = rewriter.create<stablehlo::ReshapeOp>(
 								      loc, RankedTensorType::get(sizes, indexElemTy), selectYs);
 								  Value reshapedWs = rewriter.create<stablehlo::ReshapeOp>(
 								      loc, RankedTensorType::get(sizes, elemTy), selectWs);
 								  return SmallVector<Value>{reshapedXs, reshapedYs, reshapedWs};
 								}
 								Value getSummand(ConversionPatternRewriter &rewriter, Operation *op,
 								                 Value input, Value ix, Value iy, Value w, int64_t N,
 								                 int64_t oH, int64_t oW, int64_t iH, int64_t iW, Value Nidx,
-												[stablehlo] support dynamic-shaped index in stablehlo conversion for aten.index-like ops (#3322)

For now, at most one dynamic dim of index tensors in
aten.index/aten.index_put-like op is supported.
											
										
										
											2024-08-01 10:41:09 +08:00
+								                 Value CIdx, RankedTensorType outType, Type elemTy,
 								                 size_t dimSizeIndexBits) {
-												[Stablehlo] Add lowering of GridSampler Op (#3084)

Inspired by PyTorch decompositions.py.
See
https://github.com/pytorch/pytorch/blob/ec58f1f74ebcec744d2ab90ad34abd09c1018e92/torch/_decomp/decompositions.py#L3923-L4086
Only support paddingMode=0 or 1 and interpolationMode=0 or 1
											
										
										
											2024-06-07 16:06:07 +08:00
+								  Location loc = op->getLoc();
 								  auto inputTensorType = cast<RankedTensorType>(input.getType());
 								  SmallVector<Value> clipValues =
 								      clip(rewriter, op, ix, iy, w, N, oH, oW, iH, iW, elemTy);
 								  Value idxX = clipValues[0];
 								  Value idxY = clipValues[1];
 								  Value idxW = clipValues[2];
 								  SmallVector<Value> indexTensors{Nidx, CIdx, idxY, idxX};
 								  int maxIndexRank = -1;
-												[stablehlo] fix: enhance torch's index-like op lowering to stablehlo's gather/scatter (#3829)

In torch.index_put like ops, `values` is only required to be
broadcastable to `input[indices]`, rather than exact dimension match.
This patch fixes the problem by add additional
stablehlo.dynamic_broadcast_in_dim before creating stablehlo.scatter op.
BTW, this patch also enhance the `getBroadcastResultShape` utility in
hlo namespace.
											
										
										
											2024-11-05 19:15:11 +08:00
+								  auto gatherIndicesInfo =
 								      broadcastAndConcatIndices(input.getDefiningOp(), rewriter, indexTensors,
 								                                dimSizeIndexBits, maxIndexRank);
-												[Stablehlo] Add lowering of GridSampler Op (#3084)

Inspired by PyTorch decompositions.py.
See
https://github.com/pytorch/pytorch/blob/ec58f1f74ebcec744d2ab90ad34abd09c1018e92/torch/_decomp/decompositions.py#L3923-L4086
Only support paddingMode=0 or 1 and interpolationMode=0 or 1
											
										
										
											2024-06-07 16:06:07 +08:00
+								  auto gatherIndices = *gatherIndicesInfo;
 								  int64_t numIndicesDim = indexTensors.size();
 								  int64_t indexVecDim = maxIndexRank;
 								  SmallVector<int64_t> offsetDims;
 								  SmallVector<int64_t> collapsedDims;
 								  SmallVector<int64_t> startIndexMap;
 								  for (int64_t i = 0; i < numIndicesDim; ++i) {
 								    collapsedDims.push_back(i);
 								    startIndexMap.push_back(i);
 								  }
 								  for (int64_t i = numIndicesDim; i < inputTensorType.getRank(); i++) {
 								    offsetDims.push_back(i + maxIndexRank - numIndicesDim);
 								  }
 								  auto dimsAttr = stablehlo::GatherDimensionNumbersAttr::get(
 								      rewriter.getContext(),
 								      /*offsetDims=*/offsetDims,
 								      /*collapsedSliceDims=*/collapsedDims,
 								      /*operandBatchingDims=*/{},
 								      /*startIndicesBatchingDims=*/{},
 								      /*startIndexMap=*/startIndexMap,
 								      /*indexVecDim=*/indexVecDim);
 								  SmallVector<int64_t> sliceSizes;
 								  auto inputShape = makeShapeTorchCompatible(inputTensorType.getShape());
 								  for (int64_t i = 0; i < inputTensorType.getRank(); ++i) {
 								    if (i < numIndicesDim) {
 								      sliceSizes.push_back(1);
 								    } else {
 								      sliceSizes.push_back(inputShape[i]);
 								    }
 								  }
 								  Value gather = rewriter.create<stablehlo::GatherOp>(
 								      loc, input, gatherIndices, dimsAttr,
 								      rewriter.getDenseI64ArrayAttr(sliceSizes));
 								  // use chlo::BroadcastMulOp to multiply idxW with gather.
 								  DenseI64ArrayAttr bcastDimensions;
 								  return rewriter.create<chlo::BroadcastMulOp>(loc, gather, idxW,
 								                                               bcastDimensions);
 								}
 								} // namespace
 								template <>
 								LogicalResult ConvertAtenOp<AtenGridSamplerOp>::matchAndRewrite(
 								    AtenGridSamplerOp op, OpAdaptor adaptor,
 								    ConversionPatternRewriter &rewriter) const {
 								  Location loc = op->getLoc();
 								  Value input = adaptor.getInput();
 								  Value grid = adaptor.getGrid();
 								  int64_t interpolationMode;
 								  if (!matchPattern(op.getInterpolationMode(),
 								                    m_TorchConstantInt(&interpolationMode)))
 								    return rewriter.notifyMatchFailure(
 								        op, "interpolation_mode must be an integer constant");
 								  int64_t paddingMode;
 								  if (!matchPattern(op.getPaddingMode(), m_TorchConstantInt(&paddingMode)))
 								    return rewriter.notifyMatchFailure(
 								        op, "padding_mode must be an integer constant");
 								  if (interpolationMode != 0 && interpolationMode != 1)
 								    return rewriter.notifyMatchFailure(
 								        op, "only support interpolation_mode = 0 (bilinear) or 1(nearest)");
 								  if (paddingMode != 0)
 								    return rewriter.notifyMatchFailure(op,
 								                                       "only support paddingMode = 0 (Zero)");
 								  bool alignCorners = false;
 								  if (!matchPattern(op.getAlignCorners(), m_TorchConstantBool(&alignCorners)))
 								    return rewriter.notifyMatchFailure(
 								        op, "alignCorners must be a boolean constant");
 								  RankedTensorType inputTy = cast<RankedTensorType>(input.getType());
 								  RankedTensorType gridTy = cast<RankedTensorType>(grid.getType());
 								  RankedTensorType outTy =
 								      cast<RankedTensorType>(getTypeConverter()->convertType(op.getType()));
 								  Type elemTy = inputTy.getElementType();
 								  if (inputTy.getRank() != 4)
 								    return rewriter.notifyMatchFailure(op, "input must be a 4D tensor");
 								  if (gridTy.getRank() != 4)
 								    return rewriter.notifyMatchFailure(op, "grid must be a 4D tensor");
 								  auto inputSize = inputTy.getShape();
 								  auto gridSize = gridTy.getShape();
 								  int64_t N = inputSize[0];
 								  int64_t C = inputSize[1];
 								  int64_t iH = inputSize[2];
 								  int64_t iW = inputSize[3];
 								  int64_t oH = gridSize[1];
 								  int64_t oW = gridSize[2];
 								  // grid is a 4D tensor with shape (N, oH, oW, 2)
 								  Type indexElemTy = rewriter.getI64Type();
 								  RankedTensorType indexTy =
 								      RankedTensorType::get(mlir::ArrayRef<int64_t>{1}, indexElemTy);
 								  Value constN = rewriter.create<stablehlo::ConstantOp>(
 								      loc, indexTy, DenseIntElementsAttr::get(indexTy, {N}));
 								  Value constC = rewriter.create<stablehlo::ConstantOp>(
 								      loc, indexTy, DenseIntElementsAttr::get(indexTy, {C}));
 								  APFloat one = APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 1);
 								  APFloat zero = APFloat(cast<mlir::FloatType>(elemTy).getFloatSemantics(), 0);
 								  Value constOneFloat = getConstScalarTensor(rewriter, op, one, elemTy);
 								  auto NidxFlatten = rewriter.create<stablehlo::DynamicIotaOp>(
 								      loc, RankedTensorType::get(mlir::ArrayRef<int64_t>{N}, indexElemTy),
 								      constN, 0);
 								  auto CidxFlatten = rewriter.create<stablehlo::DynamicIotaOp>(
 								      loc, RankedTensorType::get(mlir::ArrayRef<int64_t>{C}, indexElemTy),
 								      constC, 0);
 								  // Reshape NidxFlatten to 4D tensor (N, 1, 1, 1)
 								  auto NidxSizes = mlir::SmallVector<int64_t>{N, 1, 1, 1};
 								  auto Nidx = rewriter.create<stablehlo::ReshapeOp>(
 								      loc, RankedTensorType::get(NidxSizes, indexElemTy), NidxFlatten);
 								  // Reshape CidxFlatten to 4D tensor (1, C, 1, 1)
 								  auto CidxSizes = mlir::SmallVector<int64_t>{1, C, 1, 1};
 								  auto Cidx = rewriter.create<stablehlo::ReshapeOp>(
 								      loc, RankedTensorType::get(CidxSizes, indexElemTy), CidxFlatten);
 								  llvm::SmallVector<int64_t> stride(4, 1);
 								  auto gridX = rewriter.create<stablehlo::SliceOp>(
 								      loc,
 								      RankedTensorType::get(mlir::SmallVector<int64_t>{N, oH, oW, 1},
 								                            gridTy.getElementType()),
 								      grid, mlir::SmallVector<int64_t>{0, 0, 0, 0},
 								      mlir::SmallVector<int64_t>{N, oH, oW, 1}, stride);
 								  auto gridY = rewriter.create<stablehlo::SliceOp>(
 								      loc,
 								      RankedTensorType::get(mlir::SmallVector<int64_t>{N, oH, oW, 1},
 								                            gridTy.getElementType()),
 								      grid, mlir::SmallVector<int64_t>{0, 0, 0, 1},
 								      mlir::SmallVector<int64_t>{N, oH, oW, 2}, stride);
 								  // squeeze last dimension
 								  auto gridXshape = mlir::SmallVector<int64_t>{N, oH, oW};
 								  auto gridXReshape = rewriter.create<stablehlo::ReshapeOp>(
 								      loc, RankedTensorType::get(gridXshape, gridTy.getElementType()), gridX);
 								  auto gridYReshape = rewriter.create<stablehlo::ReshapeOp>(
 								      loc, RankedTensorType::get(gridXshape, gridTy.getElementType()), gridY);
 								  if (interpolationMode == 0) {
 								    Value ix = computeSourceIndex(rewriter, op, gridXReshape, iW, elemTy,
 								                                  paddingMode, alignCorners);
 								    Value iy = computeSourceIndex(rewriter, op, gridYReshape, iH, elemTy,
 								                                  paddingMode, alignCorners);
 								    Value ix_nw = rewriter.create<stablehlo::FloorOp>(loc, ix);
 								    Value iy_nw = rewriter.create<stablehlo::FloorOp>(loc, iy);
 								    DenseI64ArrayAttr bcastDimensions;
 								    Value ix_ne = rewriter.create<chlo::BroadcastAddOp>(
 								        loc, ix_nw, constOneFloat, bcastDimensions);
 								    Value iy_ne = iy_nw;
 								    Value ix_sw = ix_nw;
 								    Value iy_sw = rewriter.create<chlo::BroadcastAddOp>(
 								        loc, iy_nw, constOneFloat, bcastDimensions);
 								    Value ix_se = ix_ne;
 								    Value iy_se = iy_sw;
 								    // w_nw = (ix_se - ix) * (iy_se - iy)
 								    // w_ne = (ix - ix_sw) * (iy_sw - iy)
 								    // w_sw = (ix_ne - ix) * (iy - iy_ne)
 								    // w_se = (ix - ix_nw) * (iy - iy_nw)
 								    Value w_nw = rewriter.create<chlo::BroadcastMulOp>(
 								        loc,
 								        rewriter.create<chlo::BroadcastSubOp>(loc, ix_se, ix, bcastDimensions),
 								        rewriter.create<chlo::BroadcastSubOp>(loc, iy_se, iy, bcastDimensions),
 								        bcastDimensions);
 								    Value w_ne = rewriter.create<chlo::BroadcastMulOp>(
 								        loc,
 								        rewriter.create<chlo::BroadcastSubOp>(loc, ix, ix_sw, bcastDimensions),
 								        rewriter.create<chlo::BroadcastSubOp>(loc, iy_sw, iy, bcastDimensions),
 								        bcastDimensions);
 								    Value w_sw = rewriter.create<chlo::BroadcastMulOp>(
 								        loc,
 								        rewriter.create<chlo::BroadcastSubOp>(loc, ix_ne, ix, bcastDimensions),
 								        rewriter.create<chlo::BroadcastSubOp>(loc, iy, iy_ne, bcastDimensions),
 								        bcastDimensions);
 								    Value w_se = rewriter.create<chlo::BroadcastMulOp>(
 								        loc,
 								        rewriter.create<chlo::BroadcastSubOp>(loc, ix, ix_nw, bcastDimensions),
 								        rewriter.create<chlo::BroadcastSubOp>(loc, iy, iy_nw, bcastDimensions),
 								        bcastDimensions);
-												[stablehlo] support dynamic-shaped index in stablehlo conversion for aten.index-like ops (#3322)

For now, at most one dynamic dim of index tensors in
aten.index/aten.index_put-like op is supported.
											
										
										
											2024-08-01 10:41:09 +08:00
+								    Value summand_nw =
 								        getSummand(rewriter, op, input, ix_nw, iy_nw, w_nw, N, oH, oW, iH, iW,
 								                   Nidx, Cidx, outTy, elemTy, options.dimSizeIndexBits);
 								    Value summand_ne =
 								        getSummand(rewriter, op, input, ix_ne, iy_ne, w_ne, N, oH, oW, iH, iW,
 								                   Nidx, Cidx, outTy, elemTy, options.dimSizeIndexBits);
 								    Value summand_sw =
 								        getSummand(rewriter, op, input, ix_sw, iy_sw, w_sw, N, oH, oW, iH, iW,
 								                   Nidx, Cidx, outTy, elemTy, options.dimSizeIndexBits);
 								    Value summand_se =
 								        getSummand(rewriter, op, input, ix_se, iy_se, w_se, N, oH, oW, iH, iW,
 								                   Nidx, Cidx, outTy, elemTy, options.dimSizeIndexBits);
-												[Stablehlo] Add lowering of GridSampler Op (#3084)

Inspired by PyTorch decompositions.py.
See
https://github.com/pytorch/pytorch/blob/ec58f1f74ebcec744d2ab90ad34abd09c1018e92/torch/_decomp/decompositions.py#L3923-L4086
Only support paddingMode=0 or 1 and interpolationMode=0 or 1
											
										
										
											2024-06-07 16:06:07 +08:00
 								    // summand_nw + summand_ne + summand_sw + summand_se
 								    Value sum = rewriter.create<stablehlo::AddOp>(loc, summand_nw, summand_ne);
 								    sum = rewriter.create<stablehlo::AddOp>(loc, sum, summand_sw);
 								    sum = rewriter.create<stablehlo::AddOp>(loc, sum, summand_se);
 								    rewriter.replaceOp(op, sum);
 								  } else if (interpolationMode == 1) {
 								    Value ix = computeSourceIndex(rewriter, op, gridXReshape, iW, elemTy,
 								                                  paddingMode, alignCorners);
 								    Value iy = computeSourceIndex(rewriter, op, gridYReshape, iH, elemTy,
 								                                  paddingMode, alignCorners);
 								    Value ix_round = rewriter.create<stablehlo::RoundOp>(loc, ix);
 								    Value iy_round = rewriter.create<stablehlo::RoundOp>(loc, iy);
 								    Value oneTensor = getConstantLike(rewriter, loc, 1.0, ix_round);
-												[stablehlo] support dynamic-shaped index in stablehlo conversion for aten.index-like ops (#3322)

For now, at most one dynamic dim of index tensors in
aten.index/aten.index_put-like op is supported.
											
										
										
											2024-08-01 10:41:09 +08:00
+								    Value summand = getSummand(rewriter, op, input, ix_round, iy_round,
 								                               oneTensor, N, oH, oW, iH, iW, Nidx, Cidx, outTy,
 								                               elemTy, options.dimSizeIndexBits);
-												[Stablehlo] Add lowering of GridSampler Op (#3084)

Inspired by PyTorch decompositions.py.
See
https://github.com/pytorch/pytorch/blob/ec58f1f74ebcec744d2ab90ad34abd09c1018e92/torch/_decomp/decompositions.py#L3923-L4086
Only support paddingMode=0 or 1 and interpolationMode=0 or 1
											
										
										
											2024-06-07 16:06:07 +08:00
+								    rewriter.replaceOp(op, summand);
 								  }
 								  return success();
 								}
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
+								void mlir::torch::torch_to_stablehlo::
 								    populateGatherScatterOpPatternsAndLegality(
 								        TypeConverter &typeConverter, RewritePatternSet &patterns,
 								        ConversionTarget &target, const TorchToStablehloOptions &options) {
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								  MLIRContext *context = patterns.getContext();
 								#define INSERT_ATENOP_PATTERN(AtenOp)                                          \
 								  target.addIllegalOp<AtenOp>();                                               \
-												[MHLO] refactor pass configurations (#1315)

Related to https://github.com/llvm/torch-mlir/issues/1227

1. Reduce MHLO #ifdefs
2. Dismiss compilation warnings

											
										
										
											2022-09-01 10:36:02 +08:00
+								  patterns.add<ConvertAtenOp<AtenOp>>(typeConverter, context, options)
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								  INSERT_ATENOP_PATTERN(AtenEmbeddingOp);
-												[MLIR][TORCH] Add StableHLO lowering for embedding_bag.padding_idx op

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>

											
										
										
											2023-09-05 21:28:37 +08:00
+								  INSERT_ATENOP_PATTERN(AtenEmbeddingBagPaddingIdxOp);
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								  INSERT_ATENOP_PATTERN(AtenIndexSelectOp);
-												[MHLO] Add torch-to-mhlo e2e support for aten.gather op (#1410)

* Add torch-to-mhlo e2e support for aten.gather op 

* Add more e2e tests for torch.aten.gather op
											
										
										
											2022-09-25 22:07:46 +08:00
+								  INSERT_ATENOP_PATTERN(AtenGatherOp);
-												[StableHLO] Support for slice_scatter (#1960)

Co-authored-by: zhekun.zhang <zhekun.zhang@bytedance.com>
											
										
										
											2023-03-23 04:41:04 +08:00
+								  INSERT_ATENOP_PATTERN(AtenSliceScatterOp);
-												[Torch Dialect] replace none-index in aten.Index.Tensor's  param by manually generating it (#2344)

* [Torch Dialect] replace none-index in aten.Index.Tensor's  param by manually generating it
Co-authored-by: Jiawei Wu <wujiawei.aml@bytedance.com>
Co-authored-by: Jianzhe Xiao <jianzhe.xiao@bytedance.com>

* minor typo fix

* add new failed e2e tests for ltc

* fix typo

* Address comments

* Add more e2e tests

* add failed e2e tests for LTC

* address comments

* remove decomposition for AtenIndexTensorHackedTwinOp
											
										
										
											2023-08-15 19:36:08 +08:00
+								  INSERT_ATENOP_PATTERN(AtenIndexTensorHackedTwinOp);
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
+								  INSERT_ATENOP_PATTERN(AtenIndexPutHackedTwinOp);
-												[Stablehlo] Add lowering of GridSampler Op (#3084)

Inspired by PyTorch decompositions.py.
See
https://github.com/pytorch/pytorch/blob/ec58f1f74ebcec744d2ab90ad34abd09c1018e92/torch/_decomp/decompositions.py#L3923-L4086
Only support paddingMode=0 or 1 and interpolationMode=0 or 1
											
										
										
											2024-06-07 16:06:07 +08:00
+								  INSERT_ATENOP_PATTERN(AtenGridSamplerOp);
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								#undef INSERT_ATENOP_PATTERN
-												[stablehlo] add aten.index_put and aten.scatter_add op conversion support (#3086)


											
										
										
											2024-04-01 19:39:49 +08:00
 								#define INSERT_ATEN_SCATTER_PATTERN(AtenOp, reduceType)                        \
 								  target.addIllegalOp<AtenOp>();                                               \
 								  patterns.add<ConvertAtenScatterOp<AtenOp, reduceType>>(typeConverter,        \
 								                                                         context, options)
 								  INSERT_ATEN_SCATTER_PATTERN(AtenScatterSrcOp, 0); // 0 for None reduce op
 								  INSERT_ATEN_SCATTER_PATTERN(AtenScatterAddOp, 1); // 1 for Add reduce op
 								#undef INSERT_ATEN_SCATTER_PATTERN
-												[MHLO] Init MHLO gather op patterns (#1104)

See RFC https://github.com/llvm/torch-mlir/issues/999

Co-authored-by: Bairen Yi yibairen.byron@bytedance.com
Co-authored-by: Jiawei Wu xremold@gmail.com
Co-authored-by: Tianyou Guo tianyou.gty@alibaba-inc.com
Co-authored-by: Xu Yan yancey.yx@alibaba-inc.com
Co-authored-by: Ziheng Jiang ziheng.jiang@bytedance.com
											
										
										
											2022-07-25 23:47:46 +08:00
+								}