//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // Also available under a BSD-style license. See LICENSE. // //===----------------------------------------------------------------------===// #include "PassDetail.h" #include "mlir/IR/BuiltinDialect.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "torch-mlir/Dialect/Torch/IR/TorchDialect.h" #include "torch-mlir/Dialect/Torch/IR/TorchOps.h" #include "torch-mlir/Dialect/Torch/IR/TorchTypes.h" #include "torch-mlir/Dialect/Torch/Transforms/Passes.h" #include "torch-mlir/Dialect/Torch/Utils/Utils.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include #include using namespace mlir; using namespace mlir::torch; using namespace mlir::torch::Torch; // Helper function to check whether the `dtype` is None or Float type. static bool isNoneOrFloatDtype(MLIRContext *context, Value dtype) { if (isa(dtype.getType())) return true; int64_t dtypeInt; if (!matchPattern(dtype, m_TorchConstantInt(&dtypeInt))) return false; FailureOr resDtype = getTypeForScalarType(context, (torch_upstream::ScalarType)dtypeInt); if (failed(resDtype)) return false; return resDtype->isa(); } // Helper function to compute the return type of the reduction function. // `dim` specifies the dimension to reduce and `keepDim` preserves the rank of // the input tensor. static Type computeReductionType(PatternRewriter &rewriter, Operation *op, BaseTensorType tensorType, Value dim, bool keepDim) { SmallVector sizes; int64_t dimInt; if (tensorType.hasSizes()) { ArrayRef inputShape = tensorType.getSizes(); int64_t inputRank = inputShape.size(); if (matchPattern(dim, m_TorchConstantInt(&dimInt))) { dimInt = toPositiveDim(dimInt, inputRank); if (!isValidDim(dimInt, inputRank)) { (void)rewriter.notifyMatchFailure(op, "dim is not a valid dim"); return nullptr; } sizes.append(inputShape.begin(), inputShape.end()); // The dimension to be reduced is set to 1 when `keepDim` is true else it // is removed. if (keepDim) sizes[dimInt] = 1; else sizes.erase(sizes.begin() + dimInt); } else { unsigned reducedRank = keepDim ? inputRank : inputRank - 1; sizes.resize(reducedRank, kUnknownSize); } } Type resultType = tensorType.getWithSizesAndDtype( !tensorType.hasSizes() ? std::optional>() : llvm::ArrayRef(sizes), tensorType.getOptionalDtype()); return resultType; } // Reduction function to calculate sum along given `dim`. static Value createSumAlongDimension(PatternRewriter &rewriter, Location loc, Operation *op, Value input, Value dim, bool keepDim) { Value dimList = rewriter.create( loc, Torch::ListType::get(dim.getType()), dim); Value keepDimCst = rewriter.create(loc, keepDim); Value dtype = rewriter.create(loc); Type resultType = computeReductionType( rewriter, op, cast(input.getType()), dim, keepDim); if (!resultType) return nullptr; return rewriter.create(loc, resultType, input, dimList, keepDimCst, dtype); } // Reduction function to calculate max along given `dim`. static Value createMaxAlongDimension(PatternRewriter &rewriter, Location loc, Operation *op, Value input, Value dim, bool keepDim) { Value keepDimCst = rewriter.create(loc, keepDim); BaseTensorType valueType = computeReductionType(rewriter, op, cast(input.getType()), dim, keepDim) .cast(); if (!valueType) return nullptr; BaseTensorType indexType = valueType .getWithSizesAndDtype( !valueType.hasSizes() ? std::optional>() : llvm::ArrayRef(valueType.getSizes()), IntegerType::get(op->getContext(), 64, IntegerType::Signed)) .cast(); return rewriter .create(loc, valueType, indexType, input, dim, keepDimCst) .getValues(); } // Helper for creating `aten::sub_tensor_op`. static Value createTensorSub(PatternRewriter &rewriter, Location loc, Type tensorType, Value lhs, Value rhs) { Value alpha = rewriter.create(loc, rewriter.getF64FloatAttr(1)); Value sub = rewriter.create(loc, tensorType, lhs, rhs, alpha); return sub; } // Share code between `softmax_backward` and `log_softmax_backward` ops. // Returns x - y * sum(z, dim). static Value createSoftmaxBackwardCommonKernel(PatternRewriter &rewriter, Location loc, Operation *op, Type tensorType, Value x, Value y, Value z, Value dim) { Value sum = createSumAlongDimension(rewriter, loc, op, z, dim, /*keepDim=*/true); if (!sum) return nullptr; auto broadcastSizeType = Torch::ListType::get(Torch::IntType::get(op->getContext())); Value broadcastSize = rewriter.create(loc, broadcastSizeType, z); Value sumBroadcast = rewriter.create(loc, tensorType, sum, broadcastSize); Value temp = rewriter.create(loc, tensorType, y, sumBroadcast); Value sub = createTensorSub(rewriter, loc, tensorType, x, temp); return sub; } static SmallVector computeDimsOrderForMoveDim(int64_t srcDimInt, int64_t dstDimInt, unsigned inputRank) { llvm::iota_range dimsOrderIR(0, inputRank, /*inclusive=*/false); SmallVector dimsOrder(dimsOrderIR.begin(), dimsOrderIR.end()); dimsOrder.erase(dimsOrder.begin() + srcDimInt); dimsOrder.insert(dimsOrder.begin() + dstDimInt, srcDimInt); return dimsOrder; } static bool rewriteEquationWithEllipsisSlicing(std::string &equation, SmallVector &inputRanks) { // split equation into input and result size_t arrowPos = equation.find("->"); if (arrowPos == std::string::npos) { return false; } std::string inputStr = equation.substr(0, arrowPos); std::string resultStr = equation.substr(arrowPos + 2); // split input into tokens SmallVector inputTokens; size_t start = 0; size_t end = 0; std::set usedTokens; while (end < inputStr.size()) { end = inputStr.find(",", start); if (end == std::string::npos) { end = inputStr.size(); } std::string token = inputStr.substr(start, end - start); inputTokens.push_back(token); start = end + 1; } if (inputTokens.size() != inputRanks.size()) { return false; } // find the rank which ellipsis represents, and max ellipsis rank because a // tensor can be broadcasted SmallVector ellipsisRanks; int maxEllipsisRank = 0; for (const auto &[token, inputRank] : llvm::zip(inputTokens, inputRanks)) { int explictRank = 0; for (auto c : token) { if (std::isalpha(c)) { usedTokens.insert(c); explictRank++; } else if (c == '.' || c == ' ') { continue; } else { return false; } } int ellipsisRank = inputRank - explictRank; if (ellipsisRank > maxEllipsisRank) { maxEllipsisRank = ellipsisRank; } if (ellipsisRank < 0) { return false; } ellipsisRanks.push_back(inputRank - explictRank); } auto isTokenUsed = [&usedTokens](char c) { return usedTokens.find(c) != usedTokens.end(); }; std::string ellipsisToken; int usedCount = 0; // Iterate over the alphabet to create a new token for ellipsis for (char c = 'a'; c <= 'z'; ++c) { if (!isTokenUsed(c)) { ellipsisToken.push_back(c); usedCount++; if (usedCount == maxEllipsisRank) { break; } } } // replace ellipsis with ellipsisToken for (size_t i = 0; i < inputTokens.size(); i++) { size_t ellipsisPos = inputTokens[i].find("..."); if (ellipsisPos == std::string::npos) { continue; } if (ellipsisRanks[i] == maxEllipsisRank) { inputTokens[i].replace(ellipsisPos, 3, ellipsisToken); } else if (ellipsisRanks[i] == 0) { inputTokens[i].replace(ellipsisPos, 3, ""); } else { inputTokens[i].replace( ellipsisPos, 3, ellipsisToken.substr(ellipsisToken.size() - ellipsisRanks[i])); } } // replace ellipsis in result size_t ellipsisPos = resultStr.find("..."); if (ellipsisPos != std::string::npos) { resultStr.replace(ellipsisPos, 3, ellipsisToken); } // join input and result equation = llvm::join(inputTokens, ",") + " -> " + resultStr; return true; } static bool parseEquation(const std::string &equation, SmallVector> &inputTokens, SmallVector &resultTokens) { SmallVector inputToken; size_t index = 0; enum EquationVariable { kIsInput, kIsResult }; EquationVariable currentVariable = kIsInput; while (index < equation.size()) { if (std::isalpha(equation[index])) { if (currentVariable == kIsInput) { inputToken.push_back(equation[index]); } else { resultTokens.push_back(equation[index]); } } else if (equation[index] == ',') { inputTokens.push_back(inputToken); inputToken.clear(); } else if ((index < (equation.size() - 1)) && (equation.substr(index, 2).find("->") != std::string::npos)) { inputTokens.push_back(inputToken); inputToken.clear(); currentVariable = kIsResult; index++; } else if (equation[index] != ' ') { return false; } index++; } return true; } // [*batchingDims, *lhsOtherDims, *lhsReduceDims, *lhsContractingDims] => // [batchingDimsProd, lhsOtherDimsProd, lhsContractingDimsProd] static Value collapseDimForMatmul(PatternRewriter &rewriter, Location loc, Value input, int64_t batchDimsLength, int64_t contractingDimsLength, int64_t otherDimsLength, int64_t reduceDimsLength, bool isLhs) { auto inputType = cast(input.getType()); auto inputRank = batchDimsLength + contractingDimsLength + otherDimsLength + reduceDimsLength; SmallVector inputShapeTensor; for (auto i = 0; i < inputRank; ++i) { inputShapeTensor.emplace_back(rewriter.create( loc, input, rewriter.create(loc, rewriter.getI64IntegerAttr(i)))); } SmallVector outShapeTensor; Value constOne = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); auto dimOffset = 0; auto appendDims = [&](int64_t dimLength) { Value prod = constOne; for (auto i = 0; i < dimLength; ++i) { prod = rewriter.create(loc, prod, inputShapeTensor[i + dimOffset]); } outShapeTensor.emplace_back(prod); dimOffset += dimLength; }; appendDims(batchDimsLength); if (!isLhs) appendDims(contractingDimsLength); appendDims(otherDimsLength + reduceDimsLength); if (isLhs) appendDims(contractingDimsLength); auto outShapeValue = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(input.getContext())), outShapeTensor); auto outType = inputType.getWithSizesAndDtype(std::nullopt, inputType.getOptionalDtype()); return rewriter.create(loc, outType, input, outShapeValue); } // classify every dim token into different categories. Note that although we // parse out reduce dims, we delay their execution until // `performLastPermuteAndReduce`. static void parseDimTokens( SmallVector &lhsTokens, SmallVector &rhsTokens, SmallVector &finalResultTokens, SmallVector &contractingDims, SmallVector &lhsReduceDims, SmallVector &rhsReduceDims, SmallVector &batchingDims, SmallVector &lhsOtherDims, SmallVector &rhsOtherDims) { llvm::SmallDenseSet lhsTokenSet(lhsTokens.begin(), lhsTokens.end()); llvm::SmallDenseSet rhsTokenSet(rhsTokens.begin(), rhsTokens.end()); llvm::SmallDenseSet finalResultTokenSet(finalResultTokens.begin(), finalResultTokens.end()); for (size_t i = 0; i < lhsTokens.size(); ++i) { bool rhsContains = rhsTokenSet.contains(lhsTokens[i]); bool finalResultConatins = finalResultTokenSet.contains(lhsTokens[i]); // batching dim if (rhsContains && finalResultConatins) { batchingDims.push_back(lhsTokens[i]); // reduce dim of lhs } else if (!rhsContains && !finalResultConatins) { lhsReduceDims.push_back(lhsTokens[i]); // other dim of lhs } else if (finalResultConatins) { lhsOtherDims.push_back(lhsTokens[i]); // contracting dim of lhs } else if (rhsContains) { contractingDims.push_back(lhsTokens[i]); } } for (size_t i = 0; i < rhsTokens.size(); ++i) { bool lhsContains = lhsTokenSet.contains(rhsTokens[i]); bool finalResultConatins = finalResultTokenSet.contains(rhsTokens[i]); // batching dim if (lhsContains && finalResultConatins) { // reduce dim of rhs } else if (!lhsContains && !finalResultConatins) { rhsReduceDims.push_back(rhsTokens[i]); // other dim of rhs } else if (finalResultConatins) { rhsOtherDims.push_back(rhsTokens[i]); // contracting dim of rhs } else if (lhsContains) { } } } static void generateIdealReusltDimTokens(SmallVector &batchingDims, SmallVector &lhsOtherDims, SmallVector &rhsOtherDims, SmallVector &lhsReduceDims, SmallVector &rhsReduceDims, SmallVector &resultTokens) { // generate ideal result dims, i.e., // [*batchingDims, *lhsOtherDims, *lhsReduceDims, *rhsOtherDims, // *rhsReduceDims] resultTokens.insert(resultTokens.end(), batchingDims.begin(), batchingDims.end()); resultTokens.insert(resultTokens.end(), lhsOtherDims.begin(), lhsOtherDims.end()); resultTokens.insert(resultTokens.end(), lhsReduceDims.begin(), lhsReduceDims.end()); resultTokens.insert(resultTokens.end(), rhsOtherDims.begin(), rhsOtherDims.end()); resultTokens.insert(resultTokens.end(), rhsReduceDims.begin(), rhsReduceDims.end()); } static Value permuteTensorForMatmul(PatternRewriter &rewriter, Location loc, Value input, SmallVector &dimTokens, SmallVector &batchingDims, SmallVector &contractingDims, SmallVector &otherDims, SmallVector &reduceDims, bool isLhs) { auto inputType = cast(input.getType()); llvm::SmallDenseMap dimTokenMap; for (size_t idx = 0; idx < dimTokens.size(); ++idx) { dimTokenMap[dimTokens[idx]] = idx; } SmallVector permuteVec; auto appendDims = [&](SmallVector dimTokens) { for (auto d : dimTokens) { permuteVec.push_back(rewriter.create( loc, rewriter.getI64IntegerAttr(dimTokenMap[d]))); } }; appendDims(batchingDims); if (!isLhs) appendDims(contractingDims); appendDims(otherDims); appendDims(reduceDims); if (isLhs) appendDims(contractingDims); Value dstDims = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(rewriter.getContext())), permuteVec); auto outType = inputType.getWithSizesAndDtype(std::nullopt, inputType.getOptionalDtype()); return rewriter.create(loc, outType, input, dstDims); } static LogicalResult performMatmul(PatternRewriter &rewriter, Location loc, Value lhs, SmallVector &lhsTokens, Value rhs, SmallVector &rhsTokens, Value &result, SmallVector &resultTokens, SmallVector &finalResultTokens) { auto lhsType = cast(lhs.getType()); auto rhsType = cast(rhs.getType()); Type outputDType = lhsType.hasDtype() ? lhsType.getOptionalDtype() : rhsType.getOptionalDtype(); llvm::SmallDenseMap lhsDimShapeMap; for (size_t idx = 0; idx < lhsTokens.size(); ++idx) { char d = lhsTokens[idx]; lhsDimShapeMap[d] = rewriter.create( loc, lhs, rewriter.create(loc, rewriter.getI64IntegerAttr(idx))); } llvm::SmallDenseMap rhsDimShapeMap; for (size_t idx = 0; idx < rhsTokens.size(); ++idx) { char d = rhsTokens[idx]; rhsDimShapeMap[d] = rewriter.create( loc, rhs, rewriter.create(loc, rewriter.getI64IntegerAttr(idx))); } // parse batch, contracting, other, reduce dims of lhs and rhs SmallVector contractingDims; SmallVector lhsReduceDims; SmallVector rhsReduceDims; SmallVector lhsOtherDims; SmallVector rhsOtherDims; SmallVector batchingDims; parseDimTokens(lhsTokens, rhsTokens, finalResultTokens, contractingDims, lhsReduceDims, rhsReduceDims, batchingDims, lhsOtherDims, rhsOtherDims); llvm::SmallDenseMap outDimShapeMap; auto generateOutDimShapeMap = [&](SmallVector &dims) { for (auto d : dims) { bool lhsContains = lhsDimShapeMap.count(d) > 0; bool rhsContains = rhsDimShapeMap.count(d) > 0; if (lhsContains && rhsContains) { outDimShapeMap[d] = rewriter.create( loc, lhsDimShapeMap[d], rhsDimShapeMap[d]); } else if (lhsContains) { outDimShapeMap[d] = lhsDimShapeMap[d]; } else if (rhsContains) { outDimShapeMap[d] = rhsDimShapeMap[d]; } } }; generateOutDimShapeMap(contractingDims); generateOutDimShapeMap(batchingDims); generateOutDimShapeMap(lhsReduceDims); generateOutDimShapeMap(rhsReduceDims); generateOutDimShapeMap(lhsOtherDims); generateOutDimShapeMap(rhsOtherDims); if (contractingDims.size() == 0 && lhsOtherDims.size() == 0 && rhsOtherDims.size() == 0) { return rewriter.notifyMatchFailure( loc, "Hadamard product is currently not supported"); } // shape: [*batchingDims, *lhsOtherDims, *lhsReduceDims, *lhsContractingDims] lhs = permuteTensorForMatmul(rewriter, loc, lhs, lhsTokens, batchingDims, contractingDims, lhsOtherDims, lhsReduceDims, true); // shape: [*batchingDims, *rhsContractingDims, *rhsOtherDims, *rhsReduceDims] rhs = permuteTensorForMatmul(rewriter, loc, rhs, rhsTokens, batchingDims, contractingDims, rhsOtherDims, rhsReduceDims, false); // shape: [batchingDimsProd, lhsOtherDimsProd, lhsContractingDimsProd] lhs = collapseDimForMatmul(rewriter, loc, lhs, batchingDims.size(), contractingDims.size(), lhsOtherDims.size(), lhsReduceDims.size(), true); // shape: [batchingDimsProd, rhsContractingDimsProd, rhsOtherDimsProd] rhs = collapseDimForMatmul(rewriter, loc, rhs, batchingDims.size(), contractingDims.size(), rhsOtherDims.size(), rhsReduceDims.size(), false); // perform matmul auto outType = lhsType.getWithSizesAndDtype(std::nullopt, outputDType); result = rewriter.create(loc, outType, lhs, rhs); // generate ideal result dims. generateIdealReusltDimTokens(batchingDims, lhsOtherDims, rhsOtherDims, lhsReduceDims, rhsReduceDims, resultTokens); // reshape matmul result to ideal shape: // [batchingDimsProd, lhsOtherDimsProd, rhsOtherDimsProd] => // [*batchingDims, *lhsOtherDims, *lhsReduceDims, *rhsOtherDims, // *rhsReduceDims] SmallVector outShapeTensors; for (char d : resultTokens) { outShapeTensors.emplace_back(outDimShapeMap[d]); } auto outResultShape = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(lhs.getContext())), outShapeTensors); result = rewriter.create( loc, lhsType.getWithSizesAndDtype(std::nullopt, outputDType), result, outResultShape); return success(); } static Value performLastReduceAndPermute(PatternRewriter &rewriter, Location loc, Type outType, Value input, SmallVector &inputTokens, SmallVector &outTokens) { auto inputType = cast(input.getType()); llvm::SmallDenseSet outTokenSet(outTokens.begin(), outTokens.end()); SmallVector sumDims; llvm::SmallDenseMap inputDimToIdx; int64_t idx = 0; for (size_t i = 0; i < inputTokens.size(); ++i) { char d = inputTokens[i]; if (!outTokenSet.contains(d)) { sumDims.emplace_back(i); } else { inputDimToIdx[d] = idx++; } } if (sumDims.size() > 0) { SmallVector sumDimsTensor; for (auto d : sumDims) { sumDimsTensor.emplace_back(rewriter.create( loc, rewriter.getI64IntegerAttr(d))); } auto sumDimsListValue = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(rewriter.getContext())), sumDimsTensor); auto falseValue = rewriter.create( loc, rewriter.getBoolAttr(false)); auto noneValue = rewriter.create(loc); input = rewriter.create( loc, inputType.getWithSizesAndDtype(std::nullopt, inputType.getOptionalDtype()), input, sumDimsListValue, falseValue, noneValue); } SmallVector permuteDimsTensor; for (auto d : outTokens) { permuteDimsTensor.emplace_back(rewriter.create( loc, rewriter.getI64IntegerAttr(inputDimToIdx[d]))); } auto permuteDimsListValue = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(input.getContext())), permuteDimsTensor); auto out = rewriter.create(loc, outType, input, permuteDimsListValue); return out; } namespace { /// We decompose aten.amax into a set of aten.max.dim op(s) depending on the /// number of dimensions across which the max needs to be computed. /// Eg: /// INPUT: /// final_output = aten.amax(initial_input, dim=(0, 2, 1), keepdim=False) /// /// OUTPUT: /// input_1 = aten.max.dim(initial_input, 2, keepdim) #1 /// input_2 = aten.max.dim(input_1, 1, keepdim) #2 /// final_output = aten.max.dim(input_2, 0, keepdim) #3 /// /// NOTE: We iterate over, in reverse order, every dimension included in `dim` /// of the `aten.amax` op and create an `aten.amax.dim` op. /// Input tensor to the next `aten.amax.dim` op is thus the output of the /// previous `aten.amax.dim` op. class DecomposeAtenAmaxOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenAmaxOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); SmallVector dims; if (!matchPattern(op.getDim(), m_TorchListOfConstantInts(dims))) return rewriter.notifyMatchFailure(op, "non-const dim parameter unsupported"); bool keepDim; if (!matchPattern(op.getKeepdim(), m_TorchConstantBool(&keepDim))) return rewriter.notifyMatchFailure( op, "Expected a constant boolean value for keepDim"); Value input = op.getSelf(); auto inputTy = dyn_cast(input.getType()); if (!inputTy || !inputTy.hasSizes()) { return rewriter.notifyMatchFailure(op, "Expected input type having sizes"); } // For every dimension included in `dim` of the op, iterated over in // reverse order, we create a call to aten.max.dim. std::sort(dims.rbegin(), dims.rend()); for (int64_t dimInt : dims) { int64_t inputRank = inputTy.getSizes().size(); dimInt = toPositiveDim(dimInt, inputRank); if (!isValidDim(dimInt, inputRank)) return rewriter.notifyMatchFailure(op, "dim is statically invalid"); Value dim = rewriter.create( loc, rewriter.getI64IntegerAttr(dimInt)); // The input to the next invocation of aten.max.dim is the output of the // previous aten.max.dim op. input = createMaxAlongDimension(rewriter, loc, op, input, dim, keepDim); } rewriter.replaceOp(op, input); return success(); } }; } // end namespace namespace { class DecomposeAtenTriuOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenTriuOp op, PatternRewriter &rewriter) const override { MLIRContext *context = op.getContext(); Location loc = op.getLoc(); Value input = op.getSelf(); auto inputType = cast(input.getType()); if (!inputType.hasSizes() || !inputType.hasDtype()) { return rewriter.notifyMatchFailure(op, "should have shape and dtype"); } if (inputType.getSizes().size() < 2) { return rewriter.notifyMatchFailure(op, "the rank of tensor should >= 2"); } auto baseType = ValueTensorType::getWithLeastStaticInformation(context); Value cstZero = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); Value cstOne = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); Value none = rewriter.create(loc); Value rowDim = rewriter.create( loc, rewriter.getI64IntegerAttr(-2)); Value colDim = rewriter.create( loc, rewriter.getI64IntegerAttr(-1)); Value rowSize = rewriter.create(loc, input, rowDim); Value colSize = rewriter.create(loc, input, colDim); Value rowArange = rewriter.create( loc, baseType, rowSize, /*dtype=*/none, /*layout=*/none, /*device=*/none, /*pin_memory=*/none); Value colArange = rewriter.create( loc, baseType, colSize, /*dtype=*/none, /*layout=*/none, /*device=*/none, /*pin_memory=*/none); Value unsqueezeRowArange = rewriter.create(loc, baseType, rowArange, cstOne); Value unsqueezeColArange = rewriter.create(loc, baseType, colArange, cstZero); Value unsqueezeRowArangePlusDiagonal = rewriter.create( loc, baseType, unsqueezeRowArange, op.getDiagonal(), cstOne); Value condTensor = rewriter.create( loc, baseType, unsqueezeColArange, unsqueezeRowArangePlusDiagonal); rewriter.replaceOpWithNewOp( op, op.getResult().getType(), condTensor, input, cstZero); return success(); } }; } // namespace namespace { class DecomposeAtenSizeOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenSizeOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value self = op.getSelf(); MLIRContext *context = op.getContext(); std::optional maybeRank = getTensorRank(self); if (!maybeRank) return rewriter.notifyMatchFailure(op, "Unimplemented: unranked tensor"); unsigned rank = *maybeRank; SmallVector sizes; for (unsigned i = 0; i < rank; i++) { Value dim = rewriter.create( loc, rewriter.getI64IntegerAttr(i)); sizes.push_back(rewriter.create(loc, self, dim)); } Value sizeList = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(context)), sizes); rewriter.replaceOp(op, sizeList); return success(); } }; } // namespace namespace { class DecomposeAtenSelectIntOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenSelectIntOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value start = op.getIndex(); Value dim = op.getDim(); Value self = op.getSelf(); auto resultTy = cast(op.getType()); if (!resultTy.hasSizes() || !resultTy.hasDtype()) { return rewriter.notifyMatchFailure( op, "expected result type to have sizes and dtype"); } // convert `start` to non-negative: start += int(start < 0) * dimSize Value zero = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); Value isNegative = rewriter.create(loc, start, zero); isNegative = rewriter.create(loc, isNegative); Value dimSize = rewriter.create(loc, self, dim); Value indexOffset = rewriter.create(loc, isNegative, dimSize); start = rewriter.create(loc, start, indexOffset); Value one = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); Value startPlusOne = rewriter.create(loc, one.getType(), start, one); Value slice = rewriter.create( loc, computeReductionType(rewriter, op, cast(self.getType()), dim, /*keepDim=*/true), op.getSelf(), dim, start, startPlusOne, /*step=*/one); auto sliceTy = cast(slice.getType()); if (sliceTy.getSizes().size() == resultTy.getSizes().size()) { rewriter.replaceOp(op, slice); return success(); } // `aten.slice.tensor` doesn't squeeze the dim even when it's size 1 after // slicing, while `aten.select.int` does. rewriter.replaceOpWithNewOp(op, op.getResult().getType(), slice, op.getDim()); return success(); } }; } // namespace namespace { class DecomposePrimTolistOp : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(PrimTolistOp op, PatternRewriter &rewriter) const override { auto loc = op.getLoc(); auto self = op.getOperands()[0]; auto selfTy = dyn_cast(self.getType()); if (!selfTy || !selfTy.hasSizes()) return rewriter.notifyMatchFailure(op, "Unknown self shape"); int64_t rank = selfTy.getSizes().size(); if (rank != 1) return rewriter.notifyMatchFailure(op, "Expected rank-1"); int64_t length = selfTy.getSizes().back(); if (length == Torch::kUnknownSize) return rewriter.notifyMatchFailure(op, "Tolist length is unknown"); auto resultTy = dyn_cast(op.getType(0)); if (!resultTy) return rewriter.notifyMatchFailure(op, "Result type is not list"); auto scalarTy = resultTy.getContainedType(); Value zero = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); auto extractTy = rewriter.getType( llvm::SmallVector{1}, selfTy.getOptionalDtype()); llvm::SmallVector results; llvm::SmallVector sizes(selfTy.getSizes()); for (int64_t i = 0; i < length; ++i) { Value iv = rewriter.create(loc, rewriter.getI64IntegerAttr(i)); Value extract = rewriter.create( loc, extractTy, self, /*dim=*/zero, /*index=*/iv); Value scalar = rewriter.create(loc, scalarTy, extract); results.push_back(scalar); } rewriter.replaceOpWithNewOp(op, resultTy, results); return failure(); } }; } // namespace namespace { class DecomposeAtenSplitSizesOp : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenSplitSizesOp op, PatternRewriter &rewriter) const override { rewriter.replaceOpWithNewOp( op, op->getResultTypes(), op.getSelf(), op.getSplitSize(), op.getDim()); return success(); } }; } // namespace namespace { class DecomposeAtenSplitWithSizesOp : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenSplitWithSizesOp op, PatternRewriter &rewriter) const override { auto loc = op.getLoc(); Value self = op.getSelf(); SmallVector splitSizes; if (!getListConstructElements(op.getSplitSizes(), splitSizes)) return rewriter.notifyMatchFailure(op, "Unable to get sizes"); if (splitSizes.empty()) return rewriter.notifyMatchFailure(op, "No split sizes"); auto selfTy = dyn_cast(self.getType()); if (!selfTy || !selfTy.hasSizes()) return rewriter.notifyMatchFailure(op, "Self shape unknown"); int64_t rank = selfTy.getSizes().size(); auto resultTy = dyn_cast(op.getResult().getType()); if (!resultTy) return rewriter.notifyMatchFailure(op, "Result type not a list"); auto sliceTy = dyn_cast_or_null(resultTy.getContainedType()); if (!isa(sliceTy)) return rewriter.notifyMatchFailure(op, "Slice type is unknown"); int64_t dimInt = 0; bool hasDim = matchPattern(op.getDim(), m_TorchConstantInt(&dimInt)); if (dimInt < 0) dimInt += rank; auto intTy = rewriter.getType(); Value one = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); Value begin = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); llvm::SmallVector slices; llvm::SmallVector sliceSizes(sliceTy.getSizes()); int64_t defaultLength = !hasDim ? Torch::kUnknownSize : sliceSizes[dimInt]; for (auto size : splitSizes) { Value end = rewriter.create(loc, intTy, begin, size); int64_t sizeInt; if (hasDim && matchPattern(size, m_TorchConstantInt(&sizeInt))) { sliceSizes[dimInt] = sizeInt; } else if (hasDim) { sliceSizes[dimInt] = defaultLength; } sliceTy = rewriter.getType(sliceSizes, sliceTy.getOptionalDtype()); Value slice = rewriter.create( loc, sliceTy, op.getSelf(), /*dim=*/op.getDim(), /*start=*/begin, /*end=*/end, /*step=*/one); slices.push_back(slice); begin = end; } rewriter.replaceOpWithNewOp(op, resultTy, slices); return success(); } }; } // namespace namespace { class DecomposeAtenNarrowOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenNarrowOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value start = op.getStart(); Value dim = op.getDim(); Value length = op.getLength(); Value one = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); Value startPlusLength = rewriter.create(loc, one.getType(), start, length); rewriter.replaceOpWithNewOp( op, op.getResult().getType(), op.getSelf(), /*dim=*/dim, /*start=*/start, /*end=*/startPlusLength, /*step=*/one); return success(); } }; } // namespace namespace { // Decompose `aten.narrow.Tensor` to `aten.narrow` op class DecomposeAtenNarrowTensorOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenNarrowTensorOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); auto *context = op.getContext(); // PyTorch makes sure that `start` param is an 0-dim integral tensor. // REF: https://pytorch.org/docs/stable/generated/torch.narrow.html. auto start = rewriter.create( loc, Torch::IntType::get(context), op.getStart()); rewriter.replaceOpWithNewOp( op, op.getType(), op.getSelf(), op.getDim(), start, op.getLength()); return success(); } }; } // namespace namespace { class DecomposeAtenGluOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenGluOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value self = op.getSelf(); Value dim = op.getDim(); auto outputTy = dyn_cast(op.getType()); if (!outputTy || !outputTy.hasSizes() || !outputTy.hasDtype()) { return rewriter.notifyMatchFailure( op, "Expected output type having sizes and dtype"); } Value zero = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); Value dimSize = rewriter.create(loc, self, dim); Value two = rewriter.create(loc, rewriter.getI64IntegerAttr(2)); Value remainder = rewriter.create(loc, dimSize, two); Value eqOrNot = rewriter.create(loc, remainder, zero); rewriter.create( loc, eqOrNot, rewriter.getStringAttr("AtenGluOp's dim size must be multiple of 2")); Value splitLength = rewriter.create(loc, dimSize, two); Value a = rewriter.create(loc, outputTy, self, dim, zero, splitLength); Value b = rewriter.create(loc, outputTy, self, dim, splitLength, splitLength); // a⊗σ(b) Value sigmoidB = rewriter.create(loc, outputTy, b); Value result = rewriter.create(loc, outputTy, a, sigmoidB); rewriter.replaceOp(op, result); return success(); } }; } // namespace namespace { class DecomposeAtenZeroOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenZeroOp op, PatternRewriter &rewriter) const override { Value zero = rewriter.create(op.getLoc(), rewriter.getI64IntegerAttr(0)); rewriter.replaceOpWithNewOp(op, op.getType(), op.getSelf(), zero); return success(); } }; } // namespace namespace { class DecomposeAtenEyeOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenEyeOp op, PatternRewriter &rewriter) const override { Value n = op.getN(); Value m = op.getN(); rewriter.replaceOpWithNewOp(op, op.getType(), n, m, op.getDtype(), op.getLayout(), op.getDevice(), op.getPinMemory()); return success(); } }; } // namespace namespace { class DecomposeAtenEyeMOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenEyeMOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); int64_t n; if (!matchPattern(op.getN(), m_TorchConstantInt(&n))) return rewriter.notifyMatchFailure(op, "unimplemented: n must be constant"); int64_t m; if (!matchPattern(op.getM(), m_TorchConstantInt(&m))) return rewriter.notifyMatchFailure(op, "unimplemented: m must be constant"); Value none = rewriter.create(loc); auto outType = dyn_cast(op.getType()); if (!outType) return rewriter.notifyMatchFailure( op, "Only tensor types input are currently supported"); if (!outType.hasDtype()) { return rewriter.notifyMatchFailure(op, "result should have dtype"); } if (n < 0) { return rewriter.notifyMatchFailure(op, "n must be greater or equal to 0"); } if (m < 0) { return rewriter.notifyMatchFailure(op, "m must be greater or equal to 0"); } auto context = op.getContext(); auto int64Dtype = getDtypeIntValueForType( rewriter, loc, rewriter.getIntegerType(/*width=*/64, /*isSigned=*/true)); auto si64Type = IntegerType::get(context, 64, IntegerType::Signed); auto arangeType = outType.getWithSizesAndDtype(llvm::ArrayRef(n), si64Type); Value rangeN = rewriter.create( loc, arangeType, op.getN(), /*dtype=*/int64Dtype, /*layout=*/none, /*device=*/op.getDevice(), /*pin_memory=*/none); auto arangeType1 = outType.getWithSizesAndDtype(llvm::ArrayRef(m), si64Type); Value rangeM = rewriter.create( loc, arangeType1, op.getM(), /*dtype=*/int64Dtype, /*layout=*/none, /*device=*/none, /*pin_memory=*/none); Value constMinusOne = rewriter.create( loc, rewriter.getI64IntegerAttr(-1)); auto unsqzTensorInfo = unsqueezeTensor(rewriter, op, rangeN, /*dim=*/constMinusOne); if (failed(unsqzTensorInfo)) { return rewriter.notifyMatchFailure(op, "cannot generate unsqueeze tensor"); } Value unsqzRangeN = *unsqzTensorInfo; // compare unsqueezed input with boundaries auto eqType = ValueTensorType::get( context, cast(op.getType()).getSizes(), IntegerType::get(context, 1)); Value eqTensor = rewriter.create(loc, eqType, unsqzRangeN, rangeM); Value dtype = op.getDtype(); if (isa(dtype.getType())) { rewriter.replaceOp(op, eqTensor); return success(); } else { auto zero = rewriter.create(loc, rewriter.getF64FloatAttr(0.0)); auto one = rewriter.create(loc, rewriter.getF64FloatAttr(1.0)); Value outTensor = rewriter.create(loc, outType, eqTensor, one, zero); rewriter.replaceOp(op, outTensor); return success(); } } }; } // namespace namespace { class DecomposeAtenIsnanOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenIsnanOp op, PatternRewriter &rewriter) const override { Value input = op.getSelf(); // Create a new aten.ne operation with the same type and input value. rewriter.replaceOpWithNewOp(op, op.getType(), input, input); return success(); } }; } // namespace namespace { class DecomposeAtenIsinfOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenIsinfOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value self = op.getSelf(); mlir::FloatType f64Type = rewriter.getF64Type(); Value inf = rewriter.create( loc, rewriter.getFloatAttr( f64Type, APFloat::getInf(f64Type.getFloatSemantics()))); Value abs = rewriter.create(loc, self.getType(), self); rewriter.replaceOpWithNewOp(op, op.getType(), abs, inf); return success(); } }; } // namespace namespace { class DecomposeAtenIsneginfOp : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenIsneginfOp op, PatternRewriter &rewriter) const override { mlir::FloatType f64Type = rewriter.getF64Type(); Value inf = rewriter.create( op.getLoc(), rewriter.getFloatAttr( f64Type, APFloat::getInf(f64Type.getFloatSemantics(), true))); rewriter.replaceOpWithNewOp(op, op.getType(), op.getSelf(), inf); return success(); } }; } // namespace namespace { class DecomposeAtenIsposinfOp : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenIsposinfOp op, PatternRewriter &rewriter) const override { mlir::FloatType f64Type = rewriter.getF64Type(); Value inf = rewriter.create( op.getLoc(), rewriter.getFloatAttr(f64Type, APFloat::getInf(f64Type.getFloatSemantics()))); rewriter.replaceOpWithNewOp(op, op.getType(), op.getSelf(), inf); return success(); } }; } // namespace namespace { class DecomposeAtenReshapeOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenReshapeOp op, PatternRewriter &rewriter) const override { Value input = op.getSelf(); // TODO: Handle non value tensor type operands. if (!isa(input.getType())) { return rewriter.notifyMatchFailure( op, "unimplemented: only value tensor type operands are supported"); } rewriter.replaceOpWithNewOp(op, op.getType(), input, op.getShape()); return success(); } }; } // namespace namespace { // Decompose AtenEinsumOp to AtenMatmulOp, and supports possible reduce // operation and permute operation. Currently, this pass doesn't support // Hadamard product. The basic idea is that: // Step 1: split the string equation to input/result tokens and find // batchingDims, contractingDims, otherDims and reduceDims. // Step 2: permute and reshape input tensors suitable // for matmul operations. // Step 3: use AtenMatmulOp to get the result. // Step 4: iteratively execute step 2 & 3 until we get the final result. // Step 5: perform remaining permute and reduce operations. // notice: support static shape only class DecomposeAtenEinsumOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenEinsumOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); SmallVector inputTensors; if (!getListConstructElements(op.getTensors(), inputTensors)) { return rewriter.notifyMatchFailure( op, "input should comes from a PrimListConstructOp"); } auto allTensorHasSizes = [](Value tensor) { auto type = dyn_cast(tensor.getType()); if (!type || !type.hasSizes()) return false; return true; }; if (!llvm::all_of(inputTensors, allTensorHasSizes)) { return rewriter.notifyMatchFailure(op, "all input tensors should have sizes"); } std::string equation; if (!matchPattern(op.getEquation(), m_TorchConstantStr(equation))) { return rewriter.notifyMatchFailure(op, "Unsupported value of equation"); } // if "..." in equation, modify it if (equation.find("...") != std::string::npos) { SmallVector inputRanks; for (Value tensor : inputTensors) { auto type = cast(tensor.getType()); inputRanks.push_back(type.getSizes().size()); } if (!rewriteEquationWithEllipsisSlicing(equation, inputRanks)) { return rewriter.notifyMatchFailure( op, "Unexpected character in equations encountered"); } } SmallVector resultTokens; SmallVector> inputTokens; if (!parseEquation(equation, inputTokens, resultTokens)) { return rewriter.notifyMatchFailure( op, "Unexpected character in equations encountered"); } SmallVector lhsTokens = inputTokens[0]; Value lhs = inputTensors[0]; Value result; for (size_t i = 1; i < inputTensors.size(); ++i) { auto rhs = inputTensors[i]; auto rhsTokens = inputTokens[i]; SmallVector outTokens; if (failed(performMatmul(rewriter, loc, lhs, lhsTokens, rhs, rhsTokens, result, outTokens, resultTokens))) { return failure(); } lhs = result; lhsTokens = outTokens; } result = performLastReduceAndPermute(rewriter, loc, op.getType(), lhs, lhsTokens, resultTokens); rewriter.replaceOp(op, result); return success(); } }; } // namespace namespace { // Calculate the trace of the input tensor as the sum over its diagonal // elements. This computation is performed as: // // Step1: Obtain the diagonal using AtenDiagonalOp // Step2: Compute the trace using AtenSumOp. // // It is verified that the input tensor has rank two. class DecomposeAtenTraceOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenTraceOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value self = op.getSelf(); std::optional inRank = getTensorRank(self); if (inRank != 2) return rewriter.notifyMatchFailure( op, "Expected input tensor to have rank 2."); Value none = rewriter.create(loc); Value zero = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); Value one = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); BaseTensorType inputType = cast(self.getType()); Value output = op.getResult(); BaseTensorType outputType = cast(output.getType()); ArrayRef inputShape = inputType.getSizes(); int64_t diagonalSize = std::min(inputShape[0], inputShape[1]); SmallVector diagonalShape{diagonalSize}; Type elementType = inputType.getOptionalDtype(); Type diagonalType = inputType.getWithSizesAndDtype( llvm::ArrayRef(diagonalShape), elementType); Value diagonal = rewriter.create( loc, diagonalType, /*input=*/self, /*offset=*/zero, /*dim1=*/zero, /*dim2=*/one); Value sum = rewriter.create(loc, outputType, /*self=*/diagonal, /*dtype=*/none); rewriter.replaceOp(op, sum); return success(); } }; } // namespace // Calculates the softmax function on the given `input` tensor. Softmax(x) = // exp(x)/sum(exp(x)). // To avoid overflow we use the following decomposition rule: // x_max = max(input, dim, keepdim = True) // unnorm = aten.exp(input - x_max) // softmax = unnorm / sum(unnorm, dim, keepdim = True) template static Value getSoftmaxResult(OpTy op, Value self, Type resultType, Type accumulatorType, PatternRewriter &rewriter) { Location loc = op.getLoc(); Value dim = op.getDim(); if (resultType != accumulatorType) self = convertTensorToDtype(rewriter, loc, self, accumulatorType); Value xMax = createMaxAlongDimension(rewriter, loc, op, self, dim, /*keepDim=*/true); if (!xMax) return nullptr; Value unNormalized = createTensorSub(rewriter, loc, self.getType(), self, xMax); Value unNormalizedExp = rewriter.create(loc, self.getType(), unNormalized); Value sum = createSumAlongDimension(rewriter, loc, op, unNormalizedExp, dim, /*keepDim=*/true); if (!sum) return nullptr; Value result = rewriter.create(loc, self.getType(), unNormalizedExp, sum); if (resultType != accumulatorType) result = convertTensorToDtype(rewriter, loc, result, cast(resultType).getDtype()); return result; } // Decompose softmax into: exp(x) / sum(exp(x)) namespace { class DecomposeAtenSoftmaxIntOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenSoftmaxIntOp op, PatternRewriter &rewriter) const override { Value self = op.getSelf(); BaseTensorType resultTensorType = cast(op.getType()); if (!resultTensorType.hasDtype()) { return rewriter.notifyMatchFailure( op, "expected result type to have a dtype"); } Type resultTensorDtype = resultTensorType.getDtype(); if (!isa(resultTensorDtype)) return rewriter.notifyMatchFailure(op, "Only support floating-point type"); // If `dtype` arg is non-none then convert the input to `dtype`. if (!isa(op.getDtype().getType())) { Location loc = op.getLoc(); Value none = rewriter.create(loc); Value cstFalse = rewriter.create(loc, false); self = rewriter.create( loc, resultTensorType, self, getDtypeIntValueForType(rewriter, loc, resultTensorDtype), /*non_blocking=*/cstFalse, /*copy=*/cstFalse, /*memory_format=*/none); } Type accumulatorTensorType = getDefaultAccType(rewriter, resultTensorDtype); Value result = getSoftmaxResult(op, self, resultTensorType, accumulatorTensorType, rewriter); if (!result) return failure(); rewriter.replaceOpWithNewOp(op, op.getType(), result); return success(); } }; } // namespace namespace { class DecomposeAten_SoftmaxOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(Aten_SoftmaxOp op, PatternRewriter &rewriter) const override { Value self = op.getSelf(); BaseTensorType tensorType = cast(self.getType()); if (!tensorType.hasDtype() || !isa(tensorType.getDtype())) return rewriter.notifyMatchFailure(op, "Only support floating type"); bool halfToFloat; if (!matchPattern(op.getHalfToFloat(), m_TorchConstantBool(&halfToFloat))) return rewriter.notifyMatchFailure( op, "Expected a boolean value for half_to_float"); BaseTensorType resultTensorType = cast(op.getType()); if (!resultTensorType.hasDtype()) { return rewriter.notifyMatchFailure( op, "expected result type to have a dtype"); } Type resultTensorDtype = resultTensorType.getDtype(); // `torch.ops.aten._softmax`'s softmax with half to float conversion is not // supported on CPU, but we go ahead with the decomposing. // TODO: Add an e2e test once upstream support is added. // If `half_to_float` is set, we convert the input's elemental type to match // that of output's. if (halfToFloat) { Location loc = op.getLoc(); Value none = rewriter.create(loc); Value cstFalse = rewriter.create(loc, false); self = rewriter.create( loc, resultTensorType, self, getDtypeIntValueForType(rewriter, loc, resultTensorDtype), /*non_blocking=*/cstFalse, /*copy=*/cstFalse, /*memory_format=*/none); } Type accumulatorTensorType = getDefaultAccType(rewriter, resultTensorDtype); Value result = getSoftmaxResult(op, self, resultTensorType, accumulatorTensorType, rewriter); if (!result) return op.emitError("failed to get softmax result"); rewriter.replaceOpWithNewOp(op, resultTensorType, result); return success(); } }; } // namespace // Aten_SoftmaxBackwardDataOp(gradOutput, output, dim) => // newGrad = gradOutput * output // result = newGrad - output * sum(newGrad, dim)) // // Refer to // https://github.com/pytorch/pytorch/blob/15fecc4c830a3907fde4b44c9962dc4144da50a4/torch/csrc/jit/codegen/cuda/ops/normalization.cpp#L31 namespace { class DecomposeAten_SoftmaxBackwardDataOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(Aten_SoftmaxBackwardDataOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value gradOutput = op.getGradOutput(); Value output = op.getOutput(); Value dim = op.getDim(); BaseTensorType tensorType = cast(gradOutput.getType()); if (!tensorType.hasDtype() || !isa(tensorType.getDtype())) return rewriter.notifyMatchFailure(op, "Only support floating type"); Value newGrad = rewriter.create(loc, tensorType, gradOutput, output); Value result = createSoftmaxBackwardCommonKernel( rewriter, loc, op, tensorType, newGrad, output, newGrad, dim); if (!result) return rewriter.notifyMatchFailure( op, "nullptr returned by createSoftmaxBackwardCommonKernel function."); rewriter.replaceOp(op, result); return success(); } }; } // namespace // AtenTanhBackwardOp(gradOutput, output) => // result = gradOutput * (1 - output^2) // To get away from broadcasts the above formula is expanded i.e., // result = gradOutput - (gradOutput * output^2) namespace { class DecomposeAtenTanhBackwardOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenTanhBackwardOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value gradOutput = op.getGradOutput(); // `output` is the value flowing out from tanh. Hence, tanh(x) = output. // Since, dTanh(x) = (1 - tanh(x)^2) hence, dOutput = (1 - output^2). Value output = op.getOutput(); BaseTensorType tensorType = cast(gradOutput.getType()); if (!tensorType.hasDtype() || !isa(tensorType.getDtype())) return rewriter.notifyMatchFailure(op, "Only support floating type"); Value tanhSquare = rewriter.create(loc, tensorType, output, output); Value gradMulTanhSquare = rewriter.create( loc, tensorType, tanhSquare, gradOutput); Value newGrad = createTensorSub(rewriter, loc, tensorType, gradOutput, gradMulTanhSquare); rewriter.replaceOp(op, newGrad); return success(); } }; } // namespace // Aten_LogSoftmaxBackwardDataOp(gradOutput, output, dim) => // result = gradOutput - (exp(output) * sum(gradOutput, dim)) namespace { class DecomposeAten_LogSoftmaxBackwardDataOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(Aten_LogSoftmaxBackwardDataOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value gradOutput = op.getGradOutput(); Value output = op.getOutput(); Value dim = op.getDim(); BaseTensorType tensorType = cast(gradOutput.getType()); if (!tensorType.hasDtype() || !isa(tensorType.getDtype())) return rewriter.notifyMatchFailure(op, "Only support floating type"); Value expOut = rewriter.create(loc, tensorType, output); Value result = createSoftmaxBackwardCommonKernel( rewriter, loc, op, tensorType, gradOutput, expOut, gradOutput, dim); if (!result) return rewriter.notifyMatchFailure( op, "nullptr returned by createSoftmaxBackwardCommonKernel function."); rewriter.replaceOp(op, result); return success(); } }; } // namespace namespace { class DecomposeAtenAMinMaxOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(Torch::AtenAminOp op, PatternRewriter &rewriter) const override { llvm::SmallVector dimList; if (!matchPattern(op.getDim(), m_TorchListOfConstantInts(dimList))) { return rewriter.notifyMatchFailure(op, "dims not foldable constants"); } bool keepdim; if (!matchPattern(op.getKeepdim(), m_TorchConstantBool(&keepdim))) { return rewriter.notifyMatchFailure(op, "keepdims not foldable constants"); } auto loc = op.getLoc(); std::sort(dimList.begin(), dimList.end(), std::greater()); Value reduction = op.getSelf(); auto resultTy = cast(op.getType()); auto reductionTy = cast(reduction.getType()); llvm::SmallVector reductionShape(reductionTy.getSizes()); for (auto dim : dimList) { auto dimValue = rewriter.create( loc, rewriter.getI64IntegerAttr(dim)); reductionShape[dim] = 1; if (!keepdim) { for (int i = dim, s = reductionShape.size() - 1; i < s; ++i) reductionShape[i] = reductionShape[i + 1]; reductionShape.resize(reductionShape.size() - 1); } reductionTy = rewriter.getType( reductionShape, resultTy.getOptionalDtype()); auto idxTy = rewriter.getType( reductionShape, rewriter.getIntegerType(32, /*is_signed*/ true)); llvm::SmallVector types{reductionTy, idxTy}; reduction = rewriter .create(loc, types, reduction, dimValue, op.getKeepdim()) .getResult(0); } rewriter.replaceOp(op, reduction); return success(); } }; } // namespace // Decompose `AtenArgMaxOp` into `AtenMaxDimOp` as well as `AtenArgMinOp` into // `AtenMinDimOp` namespace { template class DecomposeAtenArgMinMaxOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(OpTy op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value input = op.getSelf(); Value dim = op.getDim(); Value keepDim = op.getKeepdim(); Value result = op.getResult(); BaseTensorType inputType = cast(input.getType()); BaseTensorType indicesTensorType = cast(result.getType()); std::optional maybeInputRank = getTensorRank(input); if (!maybeInputRank) { return rewriter.notifyMatchFailure( op, "expected input tensor to have a rank"); } unsigned inputRank = *maybeInputRank; if (!indicesTensorType.hasSizes()) return failure(); BaseTensorType valueTensorType = inputType .getWithSizesAndDtype(indicesTensorType.getOptionalSizes(), inputType.getOptionalDtype()) .cast(); // If the dim type is `NoneType` i.e. reduce along all the dimensions. // `AtenMaxDimOp` and `AtenMinDimOp` do not support dim as `NoneType` so // first the input tensor is flattened to 1d tensor and then the reduction // happens on the 0th dimension. if (isa(dim.getType())) { BaseTensorType flattenType = inputType .getWithSizesAndDtype({kUnknownSize}, inputType.getOptionalDtype()) .cast(); dim = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); Value end = rewriter.create( loc, rewriter.getI64IntegerAttr(inputRank - 1)); input = rewriter.create(loc, flattenType, input, dim, end); } Value resultArg = rewriter .create(loc, valueTensorType, indicesTensorType, input, dim, keepDim) .getIndices(); rewriter.replaceOp(op, resultArg); return success(); } }; } // namespace // Decompose `aten.bucketize` into the following op sequence: // // def aten_bucketize(input, boundaries, out_int32, right): // unsqz_input = input.unsqueeze(-1) // if not right: // comparison = unsqz_input <= boundaries // else: // comparison = unsqz_input < boundaries // indices = torch.argmax(comparison.float(), dim=-1) // within_bound = comparison[..., -1] // result = torch.where(within_bound, indices, boundaries.shape[0]) // if out_int32: // result = result.int() // return result // namespace { class DecomposeAtenBucketizeTensorOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenBucketizeTensorOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value input = op.getSelf(); auto inputType = cast(input.getType()); if (!inputType.hasSizes()) { return rewriter.notifyMatchFailure( op, "unimplemented: input must have known sizes"); } ArrayRef inputShape = inputType.getSizes(); Value boundaries = op.getBoundaries(); auto boundariesType = cast(boundaries.getType()); if (!boundariesType.hasSizes() || boundariesType.getSizes().size() != 1) { return rewriter.notifyMatchFailure(op, "unimplemented: boundaries must have " "known sizes and must be a 1D array"); } int64_t boundariesSize = boundariesType.getSizes()[0]; bool outInt32; if (!matchPattern(op.getOutInt32(), m_TorchConstantBool(&outInt32))) { return rewriter.notifyMatchFailure( op, "unimplemented: out_int32 must be a constant bool"); } bool right; if (!matchPattern(op.getRight(), m_TorchConstantBool(&right))) { return rewriter.notifyMatchFailure( op, "unimplemented: right must be a constant bool"); } // unsqueeze input at the last dim to make it broadcastable with boundaries Value constMinusOne = rewriter.create( loc, rewriter.getI64IntegerAttr(-1)); auto unsqzTensorInfo = unsqueezeTensor(rewriter, op, input, /*dim=*/constMinusOne); if (failed(unsqzTensorInfo)) { return rewriter.notifyMatchFailure(op, "cannot generate unsqueeze tensor"); } Value unsqzInput = *unsqzTensorInfo; // compare unsqueezed input with boundaries SmallVector compareShape(inputShape); compareShape.push_back(boundariesSize); Type compareType = inputType.getWithSizesAndDtype(compareShape, rewriter.getI1Type()); Value compare; if (!right) { compare = rewriter.create(loc, compareType, unsqzInput, boundaries); } else { compare = rewriter.create(loc, compareType, unsqzInput, boundaries); } // convert the comparison results to float32 as the argmax op input, // which does not support integer dtype in LINALG backend Value compareF32 = convertTensorToDtype(rewriter, loc, compare, rewriter.getF32Type()); // get the first boundary index where the input element is less than (or // equal to) the boundary value Type indicesType = inputType.getWithSizesAndDtype( inputShape, rewriter.getIntegerType(64, IntegerType::Signed)); Value constFalse = rewriter.create(loc, false); Value indices = rewriter.create(loc, indicesType, compareF32, /*dim=*/constMinusOne, /*keepdim=*/constFalse); // get the comparison results between each input element and the rightmost // boundary value Type withinUpperBoundType = inputType.getWithSizesAndDtype(inputShape, rewriter.getI1Type()); Value withinUpperBound = rewriter.create( loc, withinUpperBoundType, compare, /*dim=*/constMinusOne, /*index=*/constMinusOne); // If the input element is less than (or equal to) the rightmost boundary, // take the max index as result. Otherwise, the element is beyond the // rightmost boundary, so take the boundary size. Value constZero = rewriter.create( loc, rewriter.getI64IntegerAttr(0)); Value upperBound = rewriter.create(loc, boundaries, /*dim=*/constZero); Value result = rewriter.create( loc, indicesType, withinUpperBound, indices, upperBound); if (outInt32) { result = convertTensorToDtype( rewriter, loc, result, rewriter.getIntegerType(32, IntegerType::Signed)); } rewriter.replaceOp(op, result); return success(); } }; } // namespace // To avoid overflow we use the following decomposition rule: // x_max = aten.max(x, dim, keepdim=True)[0] // shifted = x - x_max // shifted_logsumexp = aten.log(aten.sum(aten.exp(shifted), dim, keepdim=True)) // log_softmax = shifted - shifted_logsumexp template static Value getLogSoftmaxResult(OpTy op, PatternRewriter &rewriter) { Location loc = op.getLoc(); Value dim = op.getDim(); Value self = op.getSelf(); BaseTensorType tensorType = cast(self.getType()); Value xMax = createMaxAlongDimension(rewriter, loc, op, self, dim, /*keepDim=*/true); if (!xMax) return nullptr; Value shifted = createTensorSub(rewriter, loc, tensorType, self, xMax); Value shiftedExp = rewriter.create(loc, tensorType, shifted); Value shiftedSumExp = createSumAlongDimension(rewriter, loc, op, shiftedExp, dim, /*keepDim=*/true); if (!shiftedSumExp) return nullptr; Value shiftedLogSumExp = rewriter.create(loc, shiftedSumExp.getType(), shiftedSumExp); Value result = createTensorSub(rewriter, loc, op.getType(), shifted, shiftedLogSumExp); return result; } namespace { class DecomposeAtenLogSoftmaxIntOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenLogSoftmaxIntOp op, PatternRewriter &rewriter) const override { Value self = op.getSelf(); if (!isa(op.getDtype().getType())) return rewriter.notifyMatchFailure( op, "Unimplemented non-None dtype for log_softmax"); BaseTensorType tensorType = cast(self.getType()); if (!tensorType.hasDtype() || !isa(tensorType.getDtype())) return rewriter.notifyMatchFailure(op, "Only support floating type"); Value logSoftmax = getLogSoftmaxResult(op, rewriter); if (!logSoftmax) return rewriter.notifyMatchFailure( op, "getLogSoftmaxResult function returned nullptr"); rewriter.replaceOp(op, logSoftmax); return success(); } }; } // namespace namespace { class DecomposeAten_LogSoftmaxOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(Aten_LogSoftmaxOp op, PatternRewriter &rewriter) const override { bool halfToFloat; if (!matchPattern(op.getHalfToFloat(), m_TorchConstantBool(&halfToFloat))) return rewriter.notifyMatchFailure( op, "Expected a boolean value for half_to_float"); // Currently, setting `halfToFloat` is not supported as the E2E testing for // the same is not present on CPU. if (halfToFloat) return rewriter.notifyMatchFailure( op, "halfToFloat is currently not supported."); Value _logSoftmax = getLogSoftmaxResult(op, rewriter); if (!_logSoftmax) return rewriter.notifyMatchFailure( op, "getLogSoftmaxResult function returned nullptr"); rewriter.replaceOp(op, _logSoftmax); return success(); } }; } // namespace namespace { class DecomposeAtenLogSigmoidOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenLogSigmoidOp op, PatternRewriter &rewriter) const override { Value sigmoid = rewriter.create(op.getLoc(), op.getType(), op.getSelf()); rewriter.replaceOpWithNewOp(op, op.getType(), sigmoid); return success(); } }; } // namespace // Decompose aten.matmul into: aten.mm and aten.bmm according to ranks. namespace { class DecomposeAtenMatmulOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenMatmulOp op, PatternRewriter &rewriter) const override { Value lhs = op.getSelf(); Value rhs = op.getOther(); std::optional maybeLhsRank = getTensorRank(lhs); std::optional maybeRhsRank = getTensorRank(rhs); if (!maybeLhsRank || !maybeRhsRank) { return rewriter.notifyMatchFailure( op, "expected input tensors to have a rank"); } unsigned lhsRank = *maybeLhsRank; unsigned rhsRank = *maybeRhsRank; if (lhsRank == 2 && rhsRank == 2) { // If both lhs and rhs ranks are 2 then map it to `aten.mm` op. rewriter.replaceOpWithNewOp(op, op.getType(), lhs, rhs); } else if (lhsRank == 3 && rhsRank == 3) { // If both lhs and rhs ranks are 3 then map it to `aten.bmm` op. rewriter.replaceOpWithNewOp(op, op.getType(), lhs, rhs); } else { return failure(); } return success(); } }; } // namespace // Decompose aten.mv into: aten.matmul. namespace { class DecomposeAtenMvOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenMvOp op, PatternRewriter &rewriter) const override { Value lhs = op.getSelf(); Value rhs = op.getVec(); rewriter.replaceOpWithNewOp(op, op.getType(), lhs, rhs); return success(); } }; } // namespace // Decompose aten.linalg_cross into: aten.broadcast_to, aten.index_select, // aten.add.Tensor and aten.mull.Tensor. See // https://github.com/pytorch/pytorch/blob/ed3c256b61f05720843454a9282aa7c903da2c81/torch/_refs/linalg/__init__.py#L70. // def linalg_cross(self: Tensor, other: Tensor, dim: int = -1): // broadcast_shape = compute_broadcast_shape(self, other) // a = torch.broadcast_to(self, broadcast_shape) // b = torch.broadcast_to(other, broadcast_shape) // idx = torch.arange(3) // return a.index_select(dim, (idx + 1) % 3) * // b.index_select(dim, (idx + 2) % 3) - // a.index_select(dim, (idx + 2) % 3) * // b.index_select(dim, (idx + 1) % 3) namespace { class DecomposeAtenLinalgCrossOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenLinalgCrossOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value self = op.getSelf(); Value other = op.getOther(); Type opType = op.getType(); Value dim = op.getDim(); auto resType = cast(self.getType()); if (!resType.hasDtype()) { return rewriter.notifyMatchFailure(op, "result should have dtype"); } Type dtype = resType.getDtype(); if (isa(dtype)) { return rewriter.notifyMatchFailure( op, "lowering of aten.linalg_cross for complex inputs dtype is " "currently unimplemented"); } // calculate common shape for broadcast SmallVector broadcastShape; SmallVector broadcastShapeValue; computeBroadcastShape(rewriter, loc, self, other, broadcastShape, broadcastShapeValue); Type broadcastType = ValueTensorType::get( op.getContext(), llvm::ArrayRef(broadcastShape), dtype); Value indexBroadcastShapeTorchList = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(op.getContext())), broadcastShapeValue); // broadcast tensors to common shape auto a = rewriter.create(loc, broadcastType, self, indexBroadcastShapeTorchList); auto b = rewriter.create(loc, broadcastType, other, indexBroadcastShapeTorchList); // create constants Value constOne = rewriter.create( loc, rewriter.getI64IntegerAttr(1)); Value constTwo = rewriter.create( loc, rewriter.getI64IntegerAttr(2)); Value constThree = rewriter.create( loc, rewriter.getI64IntegerAttr(3)); Value none = rewriter.create(loc); // idx = torch.arange(3) auto outType = dyn_cast(opType); auto arangeType = outType.getWithSizesAndDtype( llvm::ArrayRef(3), IntegerType::get(op.getContext(), 64, IntegerType::Signed)); auto idx = rewriter.create( loc, arangeType, constThree, /*dtype=*/none, /*layout=*/none, /*device=*/none, /*pin_memory=*/none); // (idx + 1) and (idx + 2) auto idxPlusOne = rewriter.create(loc, arangeType, idx, constOne, constOne); auto idxPlusTwo = rewriter.create(loc, arangeType, idx, constTwo, constOne); // (idx + 1) % 3 and (idx + 2) % 3 auto idxPlusOneRemainderThree = rewriter.create( loc, arangeType, idxPlusOne, constThree); auto idxPlusTwoRemainderThree = rewriter.create( loc, arangeType, idxPlusTwo, constThree); // a.index_select(dim, (idx + 1) % 3) * b.index_select(dim, (idx + 2) % 3) auto idxSelectAPlusOne = rewriter.create( loc, opType, a, dim, idxPlusOneRemainderThree); auto idxSelectBPlusTwo = rewriter.create( loc, opType, b, dim, idxPlusTwoRemainderThree); auto firstMul = rewriter.create( loc, opType, idxSelectAPlusOne, idxSelectBPlusTwo); // a.index_select(dim, (idx + 2) % 3) * b.index_select(dim, (idx + 1) % 3) auto idxSelectAPlusTwo = rewriter.create( loc, opType, a, dim, idxPlusTwoRemainderThree); auto idxSelectBPlusOne = rewriter.create( loc, opType, b, dim, idxPlusOneRemainderThree); auto secondMul = rewriter.create( loc, opType, idxSelectAPlusTwo, idxSelectBPlusOne); // subtract the results of the two multiplications from above rewriter.replaceOpWithNewOp(op, opType, firstMul, secondMul, constOne); return success(); } }; } // namespace // Decompose aten.pixel_shuffle into: prims.split_dim, aten.permute, and // prims.collapse operations. // // If input is a tensor of shape // (*leading_dims, C*r*r, H, W), // // where leading_dims is of size N, then // X = pixel_shuffle(input, upscale_factor) // // gets replaced with // X = input.split_dim(...) # shape (*leading_dims, C, r*r, H, W) // X = X.split_dim(...) # shape (*leading_dims, C, r, r, H, W) // X = X.permute(0, ..., N, N+3, N+1, N+4, N+2) // # shape (*leading_dims, C, H, r, W, r) // X = X.collapse(...) # shape (*leading_dims, C, r, H, r*W) // X = X.collapse(...) # shape (*leading_dims, C, r*H, r*W) // // 'r' above is referred to as the 'upscale factor' or just 'factor' below. namespace { class DecomposeAtenPixelShuffleOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenPixelShuffleOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value inValue = op.getSelf(); auto inType = cast(inValue.getType()); auto maybeSizes = inType.getOptionalSizes(); if (!maybeSizes) { return rewriter.notifyMatchFailure( op, "Expected input tensor to have known rank."); } auto inShape = maybeSizes.value(); auto inRank = inShape.size(); // The input tensor must have at least 3 dimensions: (1) the channel // dimension which gets smaller by 'factor*factor', (2) the H channel which // gets larger by 'factor' and (3) the W channel which get larger by // 'factor'. The total number of dimensions is 3 + N, where N is the number // of leading dimensions, and N >= 0 so the input must have rank at least 3. if (inRank < 3) return rewriter.notifyMatchFailure( op, "Expected input tensor to have rank greater than 2."); const auto inOptionalDType = inType.getOptionalDtype(); auto getTypeFromShape = [inOptionalDType](auto &&vals) { // Get a vector of integers from a vector of Values. auto getIntShape = [](auto &&vals) { SmallVector shape; shape.reserve(vals.size()); for (auto v : vals) { int64_t cst_val; if (matchPattern(v, m_TorchConstantInt(&cst_val))) { shape.push_back(cst_val); } else { shape.push_back(kUnknownSize); } } return shape; }; const auto intShape = getIntShape(vals); return ValueTensorType::get(vals[0].getContext(), llvm::ArrayRef(intShape), inOptionalDType); }; auto nLeadingDims = inRank - 3; // Get the size of the dimension 'i'. Note the use of 'createOrFold' instead // of 'create': if the dimension size is known, then the AtenSizeIntOp is // folded to a ConstantOp. auto getDimSize = [&](uint64_t i) -> Value { Value dim = rewriter.create(loc, rewriter.getI64IntegerAttr(i)); return rewriter.createOrFold(loc, inValue, dim); }; auto inC = getDimSize(inRank - 3); auto inH = getDimSize(inRank - 2); auto inW = getDimSize(inRank - 1); auto factor = op.getUpscaleFactor(); Value factorSquared = rewriter.createOrFold(loc, factor, factor); Value outC = rewriter.createOrFold(loc, inC, factorSquared); Value outH = rewriter.createOrFold(loc, inH, factor); Value outW = rewriter.createOrFold(loc, inW, factor); SmallVector dimensionConstants; dimensionConstants.reserve(inRank + 2); for (unsigned i = 0; i < inRank + 2; ++i) { dimensionConstants.push_back( rewriter.create(loc, rewriter.getI64IntegerAttr(i))); } SmallVector leadingDims; leadingDims.reserve(nLeadingDims); for (unsigned i = 0; i < nLeadingDims; ++i) { Value leadingDimSize = rewriter.createOrFold( loc, inValue, dimensionConstants[i]); leadingDims.push_back(leadingDimSize); } SmallVector partiallyExpandedShape = leadingDims; partiallyExpandedShape.append({outC, factorSquared, inH, inW}); SmallVector prePermuteShape = leadingDims; prePermuteShape.append({outC, factor, factor, inH, inW}); SmallVector postPermuteShape = leadingDims; postPermuteShape.append({outC, inH, factor, inW, factor}); SmallVector partiallyCollapsedShape = leadingDims; partiallyCollapsedShape.append({outC, inH, factor, outW}); SmallVector outShape = leadingDims; outShape.append({outC, outH, outW}); SmallVector permutation{dimensionConstants.begin(), dimensionConstants.begin() + nLeadingDims}; SmallVector permutationTail{0, 3, 1, 4, 2}; for (uint64_t d : permutationTail) { permutation.push_back(dimensionConstants[nLeadingDims + d]); } Value permuteDimsOrder = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(op->getContext())), permutation); // Split input channel inC -> (inC, factorSquared) auto partiallyExpanded = rewriter .create( loc, getTypeFromShape(partiallyExpandedShape), inValue, dimensionConstants[nLeadingDims], outC) .getResult(); // Split new dimension factorSquared -> (factor, factor) auto fullyExpanded = rewriter.create( loc, getTypeFromShape(prePermuteShape), partiallyExpanded, dimensionConstants[nLeadingDims + 1], factor); // Perform the permutation auto permuted = rewriter.create(loc, getTypeFromShape(postPermuteShape), fullyExpanded, permuteDimsOrder); // Collapse final 2 dimension auto partiallyCollapsed = rewriter.create( loc, getTypeFromShape(partiallyCollapsedShape), permuted, dimensionConstants[nLeadingDims + 3], dimensionConstants[nLeadingDims + 4]); // Collapse back to original rank rewriter.replaceOpWithNewOp( op, op.getType(), partiallyCollapsed, dimensionConstants[nLeadingDims + 1], dimensionConstants[nLeadingDims + 2]); return success(); } }; } // namespace // ReLU6(x) = min(max(0, x), 6) = min(Relu(x), 6) static Value getRelu6Results(PatternRewriter &rewriter, Location loc, Value input) { BaseTensorType inputType = cast(input.getType()); Value relu = rewriter.create