//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // Also available under a BSD-style license. See LICENSE. // //===----------------------------------------------------------------------===// #include "PassDetail.h" #include "mlir/IR/BuiltinDialect.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "torch-mlir/Dialect/Torch/IR/TorchDialect.h" #include "torch-mlir/Dialect/Torch/IR/TorchOps.h" #include "torch-mlir/Dialect/Torch/IR/TorchTypes.h" #include "torch-mlir/Dialect/Torch/Transforms/Passes.h" #include "torch-mlir/Dialect/Torch/Utils/Utils.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include using namespace mlir; using namespace mlir::torch; using namespace mlir::torch::Torch; // Helper function to check whether the `dtype` is None or Float type. static bool isNoneOrFloatDtype(MLIRContext *context, Value dtype) { if (dtype.getType().isa()) return true; int64_t dtypeInt; if (!matchPattern(dtype, m_TorchConstantInt(&dtypeInt))) return false; FailureOr resDtype = getTypeForScalarType(context, (torch_upstream::ScalarType)dtypeInt); if (failed(resDtype)) return false; return resDtype->isa(); } // Helper function to compute the return type of the reduction function. // `dim` specifies the dimension to reduce and `keepDim` preserves the rank of // the input tensor. static Type computeReductionType(PatternRewriter &rewriter, Operation *op, BaseTensorType tensorType, Value dim, bool keepDim) { SmallVector sizes; int64_t dimInt; if (tensorType.hasSizes()) { ArrayRef inputShape = tensorType.getSizes(); int64_t inputRank = inputShape.size(); if (matchPattern(dim, m_TorchConstantInt(&dimInt))) { dimInt = toPositiveDim(dimInt, inputRank); if (!isValidDim(dimInt, inputRank)) { (void)rewriter.notifyMatchFailure(op, "dim is not a valid dim"); return nullptr; } sizes.append(inputShape.begin(), inputShape.end()); // The dimension to be reduced is set to 1 when `keepDim` is true else it // is removed. if (keepDim) sizes[dimInt] = 1; else sizes.erase(sizes.begin() + dimInt); } else { unsigned reducedRank = keepDim ? inputRank : inputRank - 1; sizes.resize(reducedRank, kUnknownSize); } } Type resultType = tensorType.getWithSizesAndDtype( sizes.size() == 0 ? std::optional>() : llvm::ArrayRef(sizes), tensorType.getOptionalDtype()); return resultType; } // Reduction function to calculate sum along given `dim`. static Value createSumAlongDimension(PatternRewriter &rewriter, Location loc, Operation *op, Value input, Value dim, bool keepDim) { Value dimList = rewriter.create( loc, Torch::ListType::get(dim.getType()), dim); Value keepDimCst = rewriter.create(loc, keepDim); Value dtype = rewriter.create(loc); Type resultType = computeReductionType( rewriter, op, input.getType().cast(), dim, keepDim); if (!resultType) return nullptr; return rewriter.create(loc, resultType, input, dimList, keepDimCst, dtype); } // Redunction function to calculate max along given `dim`. static Value createMaxAlongDimension(PatternRewriter &rewriter, Location loc, Operation *op, Value input, Value dim, bool keepDim) { Value keepDimCst = rewriter.create(loc, keepDim); BaseTensorType valueType = computeReductionType(rewriter, op, input.getType().cast(), dim, keepDim) .cast(); if (!valueType) return nullptr; BaseTensorType indexType = valueType .getWithSizesAndDtype( !valueType.hasSizes() ? std::optional>() : llvm::ArrayRef(valueType.getSizes()), IntegerType::get(op->getContext(), 64, IntegerType::Signed)) .cast(); return rewriter .create(loc, valueType, indexType, input, dim, keepDimCst) .getValues(); } // Helper for creating `aten::sub_tensor_op`. static Value createTensorSub(PatternRewriter &rewriter, Location loc, Type tensorType, Value lhs, Value rhs) { Value alpha = rewriter.create(loc, rewriter.getF64FloatAttr(1)); Value sub = rewriter.create(loc, tensorType, lhs, rhs, alpha); return sub; } // Helper to create a tensor filled with the given scalar. Scalar would be // converted the to the element type of the given tensor type. static Value createInitTensor(PatternRewriter &rewriter, Location loc, BaseTensorType resultType, Value scalar, Value sizeList) { assert(resultType.hasDtype() && "result must have dtype"); Value noneVal = rewriter.create(loc); Value dtype = getDtypeIntValueForType(rewriter, loc, resultType.getDtype()); return rewriter.create(loc, resultType, sizeList, scalar, dtype, /*layout=*/noneVal, /*device=*/noneVal, /*memory_format=*/noneVal); } // Helper to create a rank 0 tensor filled with the given `scalar`. `scalar` // would be converted to the element type of the given `inputType`. static Value createRank0Tensor(PatternRewriter &rewriter, Location loc, BaseTensorType inputType, Value scalar) { assert(inputType.hasDtype() && "input must have dtype"); SmallVector sizes; BaseTensorType rank0TensorTy = inputType.getWithSizesAndDtype(ArrayRef(sizes), inputType.getDtype()) .cast(); Value dimList = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(inputType.getContext())), ValueRange{}); return createInitTensor(rewriter, loc, rank0TensorTy, scalar, dimList); } // Share code between `softmax_backward` and `log_softmax_backward` ops. // Returns x - y * sum(z, dim). static Value createSoftmaxBackwardCommonKernel(PatternRewriter &rewriter, Location loc, Operation *op, Type tensorType, Value x, Value y, Value z, Value dim) { Value sum = createSumAlongDimension(rewriter, loc, op, z, dim, /*keepDim=*/true); if (!sum) return nullptr; auto broadcastSizeType = Torch::ListType::get(Torch::IntType::get(op->getContext())); Value broadcastSize = rewriter.create(loc, broadcastSizeType, z); Value sumBroadcast = rewriter.create(loc, tensorType, sum, broadcastSize); Value temp = rewriter.create(loc, tensorType, y, sumBroadcast); Value sub = createTensorSub(rewriter, loc, tensorType, x, temp); return sub; } static SmallVector computeDimsOrderForMoveDim(int64_t srcDimInt, int64_t dstDimInt, unsigned inputRank) { llvm::iota_range dimsOrderIR(0, inputRank, /*inclusive=*/false); SmallVector dimsOrder(dimsOrderIR.begin(), dimsOrderIR.end()); dimsOrder.erase(dimsOrder.begin() + srcDimInt); dimsOrder.insert(dimsOrder.begin() + dstDimInt, srcDimInt); return dimsOrder; } namespace { /// We decompose aten.amax into a set of aten.max.dim op(s) depending on the /// number of dimensions across which the max needs to be computed. /// Eg: /// INPUT: /// final_output = aten.amax(initial_input, dim=(0, 2, 1), keepdim=False) /// /// OUTPUT: /// input_1 = aten.max.dim(initial_input, 2, keepdim) #1 /// input_2 = aten.max.dim(input_1, 1, keepdim) #2 /// final_output = aten.max.dim(input_2, 0, keepdim) #3 /// /// NOTE: We iterate over, in reverse order, every dimension included in `dim` /// of the `aten.amax` op and create an `aten.amax.dim` op. /// Input tensor to the next `aten.amax.dim` op is thus the output of the /// previous `aten.amax.dim` op. class DecomposeAtenAmaxOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenAmaxOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); SmallVector dims; if (!matchPattern(op.getDim(), m_TorchListOfConstantInts(dims))) return rewriter.notifyMatchFailure(op, "non-const dim parameter unsupported"); bool keepDim; if (!matchPattern(op.getKeepdim(), m_TorchConstantBool(&keepDim))) return rewriter.notifyMatchFailure( op, "Expected a constant boolean value for keepDim"); Value input = op.getSelf(); auto inputTy = input.getType().dyn_cast(); if (!inputTy || !inputTy.hasSizes()) { return rewriter.notifyMatchFailure(op, "Expected input type having sizes"); } // For every dimension included in `dim` of the op, iterated over in // reverse order, we create a call to aten.max.dim. std::sort(dims.begin(), dims.end()); std::reverse(dims.begin(), dims.end()); for (int64_t dimInt : dims) { int64_t inputRank = inputTy.getSizes().size(); dimInt = toPositiveDim(dimInt, inputRank); if (!isValidDim(dimInt, inputRank)) return rewriter.notifyMatchFailure(op, "dim is statically invalid"); Value dim = rewriter.create( loc, rewriter.getI64IntegerAttr(dimInt)); // The input to the next invocation of aten.max.dim is the output of the // previous aten.max.dim op. input = createMaxAlongDimension(rewriter, loc, op, input, dim, keepDim); } rewriter.replaceOp(op, input); return success(); } }; } // end namespace namespace { class DecomposeAtenSizeOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenSizeOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value self = op.getSelf(); MLIRContext *context = op.getContext(); std::optional maybeRank = getTensorRank(self); if (!maybeRank) return rewriter.notifyMatchFailure(op, "Unimplemented: unranked tensor"); unsigned rank = *maybeRank; SmallVector sizes; for (unsigned i = 0; i < rank; i++) { Value dim = rewriter.create( loc, rewriter.getI64IntegerAttr(i)); sizes.push_back(rewriter.create(loc, self, dim)); } Value sizeList = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(context)), sizes); rewriter.replaceOp(op, sizeList); return success(); } }; } // namespace namespace { class DecomposeAtenSelectIntOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenSelectIntOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value start = op.getIndex(); Value dim = op.getDim(); Value self = op.getSelf(); // convert `start` to non-negative: start += int(start < 0) * dimSize Value zero = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); Value isNegative = rewriter.create(loc, start, zero); isNegative = rewriter.create(loc, isNegative); Value dimSize = rewriter.create(loc, self, dim); Value indexOffset = rewriter.create(loc, isNegative, dimSize); start = rewriter.create(loc, start, indexOffset); Value one = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); Value startPlusOne = rewriter.create(loc, one.getType(), start, one); Value slice = rewriter.create( loc, computeReductionType(rewriter, op, self.getType().cast(), dim, /*keepDim=*/true), op.getSelf(), dim, start, startPlusOne, /*step=*/one); // `aten.slice.tensor` doesn't squeeze the dim even when it's size 1 after // slicing, while `aten.select.int` does. rewriter.replaceOpWithNewOp(op, op.getResult().getType(), slice, op.getDim()); return success(); } }; } // namespace namespace { class DecomposeAtenNarrowOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenNarrowOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value start = op.getStart(); Value dim = op.getDim(); Value length = op.getLength(); Value one = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); Value startPlusLength = rewriter.create(loc, one.getType(), start, length); rewriter.replaceOpWithNewOp( op, op.getResult().getType(), op.getSelf(), /*dim=*/dim, /*start=*/start, /*end=*/startPlusLength, /*step=*/one); return success(); } }; } // namespace namespace { // Decompose `aten.narrow.Tensor` to `aten.narrow` op class DecomposeAtenNarrowTensorOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenNarrowTensorOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); auto *context = op.getContext(); // PyTorch makes sure that `start` param is an 0-dim integral tensor. // REF: https://pytorch.org/docs/stable/generated/torch.narrow.html. auto start = rewriter.create( loc, Torch::IntType::get(context), op.getStart()); rewriter.replaceOpWithNewOp( op, op.getType(), op.getSelf(), op.getDim(), start, op.getLength()); return success(); } }; } // namespace namespace { class DecomposeAtenZeroOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenZeroOp op, PatternRewriter &rewriter) const override { Value zero = rewriter.create(op.getLoc(), rewriter.getI64IntegerAttr(0)); rewriter.replaceOpWithNewOp(op, op.getType(), op.getSelf(), zero); return success(); } }; } // namespace namespace { class DecomposeAtenIsnanOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenIsnanOp op, PatternRewriter &rewriter) const override { Value input = op.getSelf(); // Create a new aten.ne operation with the same type and input value. rewriter.replaceOpWithNewOp(op, op.getType(), input, input); return success(); } }; } // namespace namespace { class DecomposeAtenReshapeOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenReshapeOp op, PatternRewriter &rewriter) const override { Value input = op.getSelf(); // TODO: Handle non value tensor type operands. if (!input.getType().isa()) { return rewriter.notifyMatchFailure( op, "unimplemented: only value tensor type operands are supported"); } rewriter.replaceOpWithNewOp(op, op.getType(), input, op.getShape()); return success(); } }; } // namespace // Calculates the softmax function on the given `input` tensor. Softmax(x) = // exp(x)/sum(exp(x)). // To avoid overflow we use the following decomposition rule: // x_max = max(input, dim, keepdim = True) // unnorm = aten.exp(input - x_max) // softmax = unnorm / sum(unnorm, dim, keepdim = True) template static Value getSoftmaxResult(OpTy op, Value self, Type resultType, PatternRewriter &rewriter) { Location loc = op.getLoc(); Value dim = op.getDim(); Value xMax = createMaxAlongDimension(rewriter, loc, op, self, dim, /*keepDim=*/true); if (!xMax) return nullptr; Value unNormalized = createTensorSub(rewriter, loc, resultType, self, xMax); Value unNormalizedExp = rewriter.create(loc, resultType, unNormalized); Value sum = createSumAlongDimension(rewriter, loc, op, unNormalizedExp, dim, /*keepDim=*/true); if (!sum) return nullptr; return rewriter.create(loc, resultType, unNormalizedExp, sum); } // Decompose softmax into: exp(x) / sum(exp(x)) namespace { class DecomposeAtenSoftmaxIntOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenSoftmaxIntOp op, PatternRewriter &rewriter) const override { Value self = op.getSelf(); BaseTensorType resultTensorType = op.getType().cast(); if (!resultTensorType.hasDtype()) { return rewriter.notifyMatchFailure( op, "expected result type to have a dtype"); } Type resultTensorDtype = resultTensorType.getDtype(); if (!resultTensorDtype.isa()) return rewriter.notifyMatchFailure(op, "Only support floating-point type"); // If `dtype` arg is non-none then convert the input to `dtype`. if (!op.getDtype().getType().isa()) { Location loc = op.getLoc(); Value none = rewriter.create(loc); Value cstFalse = rewriter.create(loc, false); self = rewriter.create( loc, resultTensorType, self, getDtypeIntValueForType(rewriter, loc, resultTensorDtype), /*non_blocking=*/cstFalse, /*copy=*/cstFalse, /*memory_format=*/none); } Value result = getSoftmaxResult(op, self, resultTensorType, rewriter); if (!result) return failure(); rewriter.replaceOpWithNewOp(op, op.getType(), result); return success(); } }; } // namespace namespace { class DecomposeAten_SoftmaxOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(Aten_SoftmaxOp op, PatternRewriter &rewriter) const override { Value self = op.getSelf(); BaseTensorType tensorType = self.getType().cast(); if (!tensorType.hasDtype() || !tensorType.getDtype().isa()) return rewriter.notifyMatchFailure(op, "Only support floating type"); bool halfToFloat; if (!matchPattern(op.getHalfToFloat(), m_TorchConstantBool(&halfToFloat))) return rewriter.notifyMatchFailure( op, "Expected a boolean value for half_to_float"); BaseTensorType resultTensorType = op.getType().cast(); if (!resultTensorType.hasDtype()) { return rewriter.notifyMatchFailure( op, "expected result type to have a dtype"); } Type resultTensorDtype = resultTensorType.getDtype(); // `torch.ops.aten._softmax`'s softmax with half to float conversion is not // supported on CPU, but we go ahead with the decomposing. // TODO: Add an e2e test once upstream support is added. // If `half_to_float` is set, we convert the input's elemental type to match // that of output's. if (halfToFloat) { Location loc = op.getLoc(); Value none = rewriter.create(loc); Value cstFalse = rewriter.create(loc, false); self = rewriter.create( loc, resultTensorType, self, getDtypeIntValueForType(rewriter, loc, resultTensorDtype), /*non_blocking=*/cstFalse, /*copy=*/cstFalse, /*memory_format=*/none); } Value result = getSoftmaxResult(op, self, resultTensorType, rewriter); if (!result) return op.emitError("failed to get softmax result"); rewriter.replaceOpWithNewOp(op, resultTensorType, result); return success(); } }; } // namespace // Aten_SoftmaxBackwardDataOp(gradOutput, output, dim) => // newGrad = gradOutput * output // result = newGrad - output * sum(newGrad, dim)) // // Refer to // https://github.com/pytorch/pytorch/blob/15fecc4c830a3907fde4b44c9962dc4144da50a4/torch/csrc/jit/codegen/cuda/ops/normalization.cpp#L31 namespace { class DecomposeAten_SoftmaxBackwardDataOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(Aten_SoftmaxBackwardDataOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value gradOutput = op.getGradOutput(); Value output = op.getOutput(); Value dim = op.getDim(); BaseTensorType tensorType = gradOutput.getType().cast(); if (!tensorType.hasDtype() || !tensorType.getDtype().isa()) return rewriter.notifyMatchFailure(op, "Only support floating type"); Value newGrad = rewriter.create(loc, tensorType, gradOutput, output); Value result = createSoftmaxBackwardCommonKernel( rewriter, loc, op, tensorType, newGrad, output, newGrad, dim); if (!result) return rewriter.notifyMatchFailure( op, "nullptr returned by createSoftmaxBackwardCommonKernel function."); rewriter.replaceOp(op, result); return success(); } }; } // namespace // AtenTanhBackwardOp(gradOutput, output) => // result = gradOutput * (1 - output^2) // To get away from broadcasts the above formula is expanded i.e., // result = gradOutput - (gradOutput * output^2) namespace { class DecomposeAtenTanhBackwardOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenTanhBackwardOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value gradOutput = op.getGradOutput(); // `output` is the value flowing out from tanh. Hence, tanh(x) = output. // Since, dTanh(x) = (1 - tanh(x)^2) hence, dOutput = (1 - output^2). Value output = op.getOutput(); BaseTensorType tensorType = gradOutput.getType().cast(); if (!tensorType.hasDtype() || !tensorType.getDtype().isa()) return rewriter.notifyMatchFailure(op, "Only support floating type"); Value tanhSquare = rewriter.create(loc, tensorType, output, output); Value gradMulTanhSquare = rewriter.create( loc, tensorType, tanhSquare, gradOutput); Value newGrad = createTensorSub(rewriter, loc, tensorType, gradOutput, gradMulTanhSquare); rewriter.replaceOp(op, newGrad); return success(); } }; } // namespace // Aten_LogSoftmaxBackwardDataOp(gradOutput, output, dim) => // result = gradOutput - (exp(output) * sum(gradOutput, dim)) namespace { class DecomposeAten_LogSoftmaxBackwardDataOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(Aten_LogSoftmaxBackwardDataOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value gradOutput = op.getGradOutput(); Value output = op.getOutput(); Value dim = op.getDim(); BaseTensorType tensorType = gradOutput.getType().cast(); if (!tensorType.hasDtype() || !tensorType.getDtype().isa()) return rewriter.notifyMatchFailure(op, "Only support floating type"); Value expOut = rewriter.create(loc, tensorType, output); Value result = createSoftmaxBackwardCommonKernel( rewriter, loc, op, tensorType, gradOutput, expOut, gradOutput, dim); if (!result) return rewriter.notifyMatchFailure( op, "nullptr returned by createSoftmaxBackwardCommonKernel function."); rewriter.replaceOp(op, result); return success(); } }; } // namespace // Decompose `AtenArgMaxOp` into `AtenMaxDimOp`. namespace { class DecomposeAtenArgMaxOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenArgmaxOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value input = op.getSelf(); Value dim = op.getDim(); Value keepDim = op.getKeepdim(); Value result = op.getResult(); BaseTensorType inputType = input.getType().cast(); BaseTensorType indicesTensorType = result.getType().cast(); std::optional maybeInputRank = getTensorRank(input); if (!maybeInputRank) { return rewriter.notifyMatchFailure( op, "expected input tensor to have a rank"); } unsigned inputRank = *maybeInputRank; if (!indicesTensorType.hasSizes()) return failure(); BaseTensorType valueTensorType = inputType .getWithSizesAndDtype(indicesTensorType.getOptionalSizes(), inputType.getOptionalDtype()) .cast(); // If the dim type is `NoneType` i.e. reduce along all the dimensions. // `AtenMaxDimOp` doesn't support dim as `NoneType` so first the input // tensor is flattened to 1d tensor and then the reduction happens on the // 0th dimension. if (dim.getType().isa()) { BaseTensorType flattenType = inputType .getWithSizesAndDtype({kUnknownSize}, inputType.getOptionalDtype()) .cast(); dim = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); Value end = rewriter.create( loc, rewriter.getI64IntegerAttr(inputRank - 1)); input = rewriter.create(loc, flattenType, input, dim, end); } Value maxResult = rewriter .create(loc, valueTensorType, indicesTensorType, input, dim, keepDim) .getIndices(); rewriter.replaceOp(op, maxResult); return success(); } }; } // namespace // Decompose `aten.bucketize` into the following op sequence: // // def aten_bucketize(input, boundaries, out_int32, right): // unsqz_input = input.unsqueeze(-1) // if not right: // comparison = unsqz_input <= boundaries // else: // comparison = unsqz_input < boundaries // indices = torch.argmax(comparison.float(), dim=-1) // within_bound = comparison[..., -1] // result = torch.where(within_bound, indices, boundaries.shape[0]) // if out_int32: // result = result.int() // return result // namespace { class DecomposeAtenBucketizeTensorOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenBucketizeTensorOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value input = op.getSelf(); auto inputType = input.getType().cast(); if (!inputType.hasSizes()) { return rewriter.notifyMatchFailure( op, "unimplemented: input must have known sizes"); } ArrayRef inputShape = inputType.getSizes(); Value boundaries = op.getBoundaries(); auto boundariesType = boundaries.getType().cast(); if (!boundariesType.hasSizes() || boundariesType.getSizes().size() != 1) { return rewriter.notifyMatchFailure(op, "unimplemented: boundaries must have " "known sizes and must be a 1D array"); } int64_t boundariesSize = boundariesType.getSizes()[0]; bool outInt32; if (!matchPattern(op.getOutInt32(), m_TorchConstantBool(&outInt32))) { return rewriter.notifyMatchFailure( op, "unimplemented: out_int32 must be a constant bool"); } bool right; if (!matchPattern(op.getRight(), m_TorchConstantBool(&right))) { return rewriter.notifyMatchFailure( op, "unimplemented: right must be a constant bool"); } // unsqueeze input at the last dim to make it broadcastable with boundaries Value constMinusOne = rewriter.create( loc, rewriter.getI64IntegerAttr(-1)); auto unsqzTensorInfo = unsqueezeTensor(rewriter, op, input, /*dim=*/constMinusOne); if (failed(unsqzTensorInfo)) { return rewriter.notifyMatchFailure(op, "cannot generate unsqueeze tensor"); } Value unsqzInput = *unsqzTensorInfo; // compare unsqueezed input with boundaries SmallVector compareShape(inputShape); compareShape.push_back(boundariesSize); Type compareType = inputType.getWithSizesAndDtype(compareShape, rewriter.getI1Type()); Value compare; if (!right) { compare = rewriter.create(loc, compareType, unsqzInput, boundaries); } else { compare = rewriter.create(loc, compareType, unsqzInput, boundaries); } // convert the comparison results to float32 as the argmax op input, // which does not support integer dtype in LINALG backend Value compareF32 = convertTensorToDtype(rewriter, loc, compare, rewriter.getF32Type()); // get the first boundary index where the input element is less than (or // equal to) the boundary value Type indicesType = inputType.getWithSizesAndDtype( inputShape, rewriter.getIntegerType(64, IntegerType::Signed)); Value constFalse = rewriter.create(loc, false); Value indices = rewriter.create(loc, indicesType, compareF32, /*dim=*/constMinusOne, /*keepdim=*/constFalse); // get the comparison results between each input element and the rightmost // boundary value Type withinUpperBoundType = inputType.getWithSizesAndDtype(inputShape, rewriter.getI1Type()); Value withinUpperBound = rewriter.create( loc, withinUpperBoundType, compare, /*dim=*/constMinusOne, /*index=*/constMinusOne); // If the input element is less than (or equal to) the rightmost boundary, // take the max index as result. Otherwise, the element is beyond the // rightmost boundary, so take the boundary size. Value constZero = rewriter.create( loc, rewriter.getI64IntegerAttr(0)); Value upperBound = rewriter.create(loc, boundaries, /*dim=*/constZero); Value result = rewriter.create( loc, indicesType, withinUpperBound, indices, upperBound); if (outInt32) { result = convertTensorToDtype( rewriter, loc, result, rewriter.getIntegerType(32, IntegerType::Signed)); } rewriter.replaceOp(op, result); return success(); } }; } // namespace // To avoid overflow we use the following decomposition rule: // x_max = aten.max(x, dim, keepdim=True)[0] // shifted = x - x_max // shifted_logsumexp = aten.log(aten.sum(aten.exp(shifted), dim, keepdim=True)) // log_softmax = shifted - shifted_logsumexp template static Value getLogSoftmaxResult(OpTy op, PatternRewriter &rewriter) { Location loc = op.getLoc(); Value dim = op.getDim(); Value self = op.getSelf(); BaseTensorType tensorType = self.getType().cast(); Value xMax = createMaxAlongDimension(rewriter, loc, op, self, dim, /*keepDim=*/true); if (!xMax) return nullptr; Value shifted = createTensorSub(rewriter, loc, tensorType, self, xMax); Value shiftedExp = rewriter.create(loc, tensorType, shifted); Value shiftedSumExp = createSumAlongDimension(rewriter, loc, op, shiftedExp, dim, /*keepDim=*/true); if (!shiftedSumExp) return nullptr; Value shiftedLogSumExp = rewriter.create(loc, shiftedSumExp.getType(), shiftedSumExp); Value result = createTensorSub(rewriter, loc, op.getType(), shifted, shiftedLogSumExp); return result; } namespace { class DecomposeAtenLogSoftmaxIntOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenLogSoftmaxIntOp op, PatternRewriter &rewriter) const override { Value self = op.getSelf(); if (!op.getDtype().getType().isa()) return rewriter.notifyMatchFailure( op, "Unimplemented non-None dtype for log_softmax"); BaseTensorType tensorType = self.getType().cast(); if (!tensorType.hasDtype() || !tensorType.getDtype().isa()) return rewriter.notifyMatchFailure(op, "Only support floating type"); Value logSoftmax = getLogSoftmaxResult(op, rewriter); if (!logSoftmax) return rewriter.notifyMatchFailure( op, "getLogSoftmaxResult function returned nullptr"); rewriter.replaceOp(op, logSoftmax); return success(); } }; } // namespace namespace { class DecomposeAten_LogSoftmaxOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(Aten_LogSoftmaxOp op, PatternRewriter &rewriter) const override { bool halfToFloat; if (!matchPattern(op.getHalfToFloat(), m_TorchConstantBool(&halfToFloat))) return rewriter.notifyMatchFailure( op, "Expected a boolean value for half_to_float"); // Currently, setting `halfToFloat` is not supported as the E2E testing for // the same is not present on CPU. if (halfToFloat) return rewriter.notifyMatchFailure( op, "halfToFloat is currently not supported."); Value _logSoftmax = getLogSoftmaxResult(op, rewriter); if (!_logSoftmax) return rewriter.notifyMatchFailure( op, "getLogSoftmaxResult function returned nullptr"); rewriter.replaceOp(op, _logSoftmax); return success(); } }; } // namespace // Decompose aten.matmul into: aten.mm and aten.bmm according to ranks. namespace { class DecomposeAtenMatmulOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenMatmulOp op, PatternRewriter &rewriter) const override { Value lhs = op.getSelf(); Value rhs = op.getOther(); std::optional maybeLhsRank = getTensorRank(lhs); std::optional maybeRhsRank = getTensorRank(rhs); if (!maybeLhsRank || !maybeRhsRank) { return rewriter.notifyMatchFailure( op, "expected input tensors to have a rank"); } unsigned lhsRank = *maybeLhsRank; unsigned rhsRank = *maybeRhsRank; if (lhsRank == 2 && rhsRank == 2) { // If both lhs and rhs ranks are 2 then map it to `aten.mm` op. rewriter.replaceOpWithNewOp(op, op.getType(), lhs, rhs); } else if (lhsRank == 3 && rhsRank == 3) { // If both lhs and rhs ranks are 3 then map it to `aten.bmm` op. rewriter.replaceOpWithNewOp(op, op.getType(), lhs, rhs); } else { return failure(); } return success(); } }; } // namespace // Decompose aten.mv into: aten.matmul. namespace { class DecomposeAtenMvOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenMvOp op, PatternRewriter &rewriter) const override { Value lhs = op.getSelf(); Value rhs = op.getVec(); rewriter.replaceOpWithNewOp(op, op.getType(), lhs, rhs); return success(); } }; } // namespace // ReLU6(x) = min(max(0, x), 6) = min(Relu(x), 6) static Value getRelu6Results(PatternRewriter &rewriter, Location loc, Value input) { BaseTensorType inputType = input.getType().cast(); Value relu = rewriter.create(loc, inputType, input); Value cst6 = rewriter.create(loc, rewriter.getI64IntegerAttr(6)); Value sixTensor = createRank0Tensor(rewriter, loc, inputType, cst6); Value relu6Out = rewriter.create(loc, inputType, relu, sixTensor); return relu6Out; } namespace { class DecomposeAtenRelu6Op : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenRelu6Op op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); auto resType = op.getType().cast(); if (!resType.hasDtype()) { return rewriter.notifyMatchFailure(op, "result should have dtype"); } Value relu6 = getRelu6Results(rewriter, loc, op.getSelf()); rewriter.replaceOp(op, relu6); return success(); } }; } // namespace // Hardswish(x) = x * Relu6(x+3)/6 namespace { class DecomposeAtenHardswishOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenHardswishOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value input = op.getSelf(); Type inputType = input.getType(); Value constantOne = rewriter.create( loc, rewriter.getI64IntegerAttr(1)); Value constantThree = rewriter.create( loc, rewriter.getI64IntegerAttr(3)); Value constantSix = rewriter.create( loc, rewriter.getI64IntegerAttr(6)); Value inputPlusThree = rewriter.create( loc, inputType, input, constantThree, /*alpha=*/constantOne); Value relu6 = getRelu6Results(rewriter, loc, inputPlusThree); Value divTensor = rewriter.create(loc, inputType, relu6, constantSix); Value mulTensor = rewriter.create(loc, inputType, divTensor, input); rewriter.replaceOp(op, mulTensor); return success(); } }; } // namespace // LeakyRelu = max(0,x) + negative_slope * min(0,x) namespace { class DecomposeAtenLeakyReluOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenLeakyReluOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value input = op.getSelf(); Value negativeSlope = op.getNegativeSlope(); auto resType = op.getType().cast(); if (!resType.hasDtype()) { return rewriter.notifyMatchFailure(op, "result should have dtype"); } Value constantZero = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); Value constantOne = rewriter.create(loc, rewriter.getF64FloatAttr(1.0)); Value zeroTensor = createRank0Tensor(rewriter, loc, resType, constantZero); Value positiveOutput = rewriter.create(loc, resType, zeroTensor, input); Value negativeOutput = rewriter.create(loc, resType, zeroTensor, input); Value scaledNegativeOutput = rewriter.create( loc, resType, negativeOutput, negativeSlope); Value leakyReluOutput = rewriter.create( loc, resType, positiveOutput, scaledNegativeOutput, constantOne); rewriter.replaceOp(op, leakyReluOutput); return success(); } }; } // namespace // LeakyReluBackward = max(0,grad) + negative_slope * min(0,x) namespace { class DecomposeAtenLeakyReluBackwardOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenLeakyReluBackwardOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value gradOutput = op.getGradOutput(); Value input = op.getSelf(); Value negativeSlope = op.getNegativeSlope(); auto resType = op.getType().cast(); if (!resType.hasDtype()) { return rewriter.notifyMatchFailure(op, "result should have dtype"); } bool selfIsResult = false; if (!matchPattern(op.getSelfIsResult(), m_TorchConstantBool(&selfIsResult)) || selfIsResult) return rewriter.notifyMatchFailure( op, "unimplemented: self_is_result should be false"); Value constantZero = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); Value constantOne = rewriter.create(loc, rewriter.getF64FloatAttr(1.0)); Value zeroTensor = createRank0Tensor(rewriter, loc, resType, constantZero); Value positiveOutput = rewriter.create(loc, resType, zeroTensor, gradOutput); Value negativeOutput = rewriter.create(loc, resType, zeroTensor, input); Value scaledNegativeOutput = rewriter.create( loc, resType, negativeOutput, negativeSlope); Value leakyReluBackwardOutput = rewriter.create( loc, resType, positiveOutput, scaledNegativeOutput, constantOne); rewriter.replaceOp(op, leakyReluBackwardOutput); return success(); } }; } // namespace // Elu = scale * max(0,x) + alpha * scale * (exp(min(0,x) * input_scale) - 1) namespace { class DecomposeAtenEluOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenEluOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value input = op.getSelf(); Value alpha = op.getAlpha(); Value scale = op.getScale(); Value inputScale = op.getInputScale(); auto resType = op.getType().cast(); if (!resType.hasDtype()) { return rewriter.notifyMatchFailure(op, "result should have dtype"); } Value constantZero = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); Value constantOne = rewriter.create(loc, rewriter.getF64FloatAttr(1.0)); Value zeroTensor = createRank0Tensor(rewriter, loc, resType, constantZero); Value maxZeroX = rewriter.create(loc, resType, zeroTensor, input); Value positiveOutput = rewriter.create(loc, resType, maxZeroX, scale); Value minZeroX = rewriter.create(loc, resType, zeroTensor, input); Value scaledMinZeroX = rewriter.create(loc, resType, minZeroX, inputScale); Value expX = rewriter.create(loc, resType, scaledMinZeroX); Value expXM1 = rewriter.create(loc, resType, expX, constantOne, constantOne); Value scaledExpXM1 = rewriter.create(loc, resType, expXM1, scale); Value negativeOutput = rewriter.create(loc, resType, scaledExpXM1, alpha); Value eluOutput = rewriter.create( loc, resType, positiveOutput, negativeOutput, constantOne); rewriter.replaceOp(op, eluOutput); return success(); } }; } // namespace namespace { class DecomposeAtenTOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenTOp op, PatternRewriter &rewriter) const override { Value lhs = op.getSelf(); std::optional lhsRank = getTensorRank(lhs); auto loc = op.getLoc(); if (!lhsRank) { return rewriter.notifyMatchFailure(op, "expected input to have a rank"); } else if (*lhsRank > 2) { std::string errorMessage = "t() expects a tensor with <=2 dimensions, but self is " + std::to_string(*lhsRank) + "D"; return rewriter.notifyMatchFailure(op, errorMessage.c_str()); } else if (*lhsRank < 2) rewriter.replaceOp(op, lhs); else { Value zero = rewriter.create(loc, rewriter.getI64IntegerAttr(0)); Value one = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); rewriter.replaceOpWithNewOp(op, op.getType(), lhs, zero, one); } return success(); } }; } // namespace // Decompose `aten.stack` into `aten.unsqueeze` and `aten.cat`. namespace { class DecomposeAtenStackOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenStackOp op, PatternRewriter &rewriter) const override { SmallVector tensors; if (!getListConstructElements(op.getTensors(), tensors)) { return rewriter.notifyMatchFailure( op, "unimplemented: the tensor list is not from list construct"); } // Ensure all tensors have known sizes for (Value tensor : tensors) { BaseTensorType tensorType = tensor.getType().cast(); if (!tensorType.hasSizes()) { return rewriter.notifyMatchFailure( op, "unimplemented: one tensor does not have known sizes"); } } SmallVector unsqueezedTensors; for (Value tensor : tensors) { auto unsqueezedInfo = unsqueezeTensor(rewriter, op, tensor, op.getDim()); if (failed(unsqueezedInfo)) { return rewriter.notifyMatchFailure( op, "cannot generate unsqueeze tensor op"); } unsqueezedTensors.push_back(*unsqueezedInfo); } Type listElemType = op.getType().cast().getWithSizesAndDtype( /*optionalSizes=*/std::nullopt, /*optionalDtype=*/nullptr); Type listType = Torch::ListType::get(listElemType); Value unsqueezedTensorList = rewriter.create( op.getLoc(), listType, unsqueezedTensors); rewriter.replaceOpWithNewOp(op, op.getType(), unsqueezedTensorList, op.getDim()); return success(); } }; } // namespace // Decompose aten.roll into aten.slice and aten.cat ops. // https://pytorch.org/docs/stable/generated/torch.roll.html namespace { class DecomposeAtenRollOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenRollOp op, PatternRewriter &rewriter) const override { SmallVector shifts; if (!getListConstructElements(op.getShifts(), shifts)) return rewriter.notifyMatchFailure( op, "unimplemented: shifts not list of Scalar"); SmallVector dims; if (!getListConstructElements(op.getDims(), dims)) return rewriter.notifyMatchFailure( op, "unimplemented: dims not list of Scalar"); if (shifts.size() != dims.size()) return op.emitError("list sizes of shifts and dims are not the same"); auto loc = op.getLoc(); Value constNone = rewriter.create(loc); Value constZero = rewriter.create( loc, rewriter.getI64IntegerAttr(0)); Value constOne = rewriter.create( loc, rewriter.getI64IntegerAttr(1)); auto self = op.getSelf(); auto selfTy = self.getType().cast(); // roll(input, shift, dim) = cat({ // slice(input, dim, -shift, none), // slice(input, dim, 0, -shift)}, dim) auto imitateRoll = [&](Value input, Value shift, Value dim, int64_t cstDim) { Value negShift = rewriter.create(loc, shift); ArrayRef inputShape = selfTy.getSizes(); SmallVector sizes; sizes.append(inputShape.begin(), inputShape.end()); sizes[cstDim] = kUnknownSize; Type sliceTy = selfTy.getWithSizesAndDtype(llvm::ArrayRef(sizes), selfTy.getOptionalDtype()); Value slice0 = rewriter.create( loc, sliceTy, input, dim, negShift, constNone, constOne); Value slice1 = rewriter.create( loc, sliceTy, input, dim, constZero, negShift, constOne); Type listType = Torch::ListType::get(sliceTy); Value slices = rewriter.create( loc, listType, llvm::ArrayRef{slice0, slice1}); return rewriter.create(loc, self.getType(), slices, dim); }; std::optional maybeRank = getTensorRank(self); if (!maybeRank) return rewriter.notifyMatchFailure(op, "Unimplemented: unranked tensor"); unsigned rank = *maybeRank; Value output = self; auto nShifts = shifts.size(); for (size_t k = 0; k < nShifts; ++k) { auto dim = dims[k]; int64_t cstDim = -1; if (!matchPattern(dim, m_TorchConstantInt(&cstDim))) return rewriter.notifyMatchFailure( op, "unimplemented: dim must be constant"); cstDim = toPositiveDim(cstDim, rank); output = imitateRoll(output, shifts[k], dim, cstDim); } rewriter.replaceOp(op, output); return success(); } }; } // namespace // Decompose aten.repeat into aten.expand and aten.view ops. // // Ref: https://pytorch.org/docs/stable/generated/torch.Tensor.repeat.html // // For shape [S1, S2, S3] and repeats [M0, M1, M2, M3] // MS0 = M0; MS1 = M1 * S1; MS2 = M2 * S2; MS3 = M3 * S3 // // def aten_repeat(self, repeats): // sizes = self.size() // unsqueezed_sizes = [] // expanded_sizes = [] // reshape_sizes = [] // leading_rank = repeats.size() - sizes.size() // for r in range(leading_rank): // unsqueezed_sizes.append(1) // expanded_sizes.append(repeats[r]) // reshaped_sizes.append(repeats[r]) // // for s, m in zip(sizes, repeats[leading_rank:]): // unsqueezed_sizes += [1, s] // expanded_sizes += [m, s] // reshaped_sizes += [m * s] // return // self.view(unsqueezed_sizes).expand(expanded_sizes).view(reshaped_sizes) // namespace { class DecomposeAtenRepeatOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenRepeatOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value self = op.getSelf(); MLIRContext *context = op.getContext(); std::optional maybeRank = getTensorRank(self); if (!maybeRank) return rewriter.notifyMatchFailure(op, "Unimplemented: unranked tensor"); unsigned rank = *maybeRank; SmallVector repeats; if (!getListConstructElements(op.getRepeats(), repeats)) return rewriter.notifyMatchFailure( op, "Unimplemented: repeats not list of Scalar"); if (rank > repeats.size()) { return rewriter.notifyMatchFailure( op, "repeats are not matched with self's rank"); } auto insertDimSizes = [](SmallVector &dimSizes, SmallVector &shape, const ArrayRef &vals) { dimSizes.insert(dimSizes.end(), vals.begin(), vals.end()); std::transform(vals.begin(), vals.end(), std::back_inserter(shape), [&](Value val) -> int64_t { int64_t cst_val; if (matchPattern(val, m_TorchConstantInt(&cst_val))) { return cst_val; } else { return kUnknownSize; } }); }; Value one = rewriter.create( loc, rewriter.getI64IntegerAttr(1)); SmallVector unsqueezedSizes, expandedSizes, reshapedSizes; SmallVector unsqueezedIntSizes, expandedIntSizes; assert(repeats.size() >= rank && "leadingRank should greater than 0"); auto leadingRank = repeats.size() - rank; for (size_t i = 0; i < leadingRank; ++i) { insertDimSizes(unsqueezedSizes, unsqueezedIntSizes, ArrayRef{one}); insertDimSizes(expandedSizes, expandedIntSizes, ArrayRef{repeats[i]}); reshapedSizes.push_back(repeats[i]); } auto selfType = self.getType().dyn_cast(); auto selfShape = selfType.getSizes(); for (unsigned i = 0; i < rank; i++) { auto scale = repeats[i + leadingRank]; Value dimSize; if (selfShape[i] == kUnknownSize) { Value dim = rewriter.create( loc, rewriter.getI64IntegerAttr(i)); dimSize = rewriter.create(loc, self, dim); } else { dimSize = rewriter.create( loc, rewriter.getI64IntegerAttr(selfShape[i])); } insertDimSizes(unsqueezedSizes, unsqueezedIntSizes, ArrayRef{one, dimSize}); insertDimSizes(expandedSizes, expandedIntSizes, ArrayRef{scale, dimSize}); Value scaledSize = rewriter.create(loc, dimSize, scale); reshapedSizes.push_back(scaledSize); } Type dtype = self.getType().cast().getOptionalDtype(); Type unsqueezedType = ValueTensorType::get( context, llvm::ArrayRef(unsqueezedIntSizes), dtype); Type expandedType = ValueTensorType::get(context, llvm::ArrayRef(expandedIntSizes), dtype); auto listType = Torch::ListType::get(Torch::IntType::get(op.getContext())); Value unsqueezedDims = rewriter.create(loc, listType, unsqueezedSizes); Value expandedDims = rewriter.create(loc, listType, expandedSizes); Value reshapedDims = rewriter.create(loc, listType, reshapedSizes); auto reshaped = rewriter.create(loc, unsqueezedType, op.getSelf(), unsqueezedDims); auto expanded = rewriter.create(loc, expandedType, reshaped, expandedDims); rewriter.replaceOpWithNewOp(op, op.getType(), expanded, reshapedDims); return success(); } }; } // namespace // Decompose aten.flatten.using_ints into aten.view op. namespace { class DecomposeAtenFlattenUsingIntsOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenFlattenUsingIntsOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value self = op.getSelf(); MLIRContext *context = op.getContext(); std::optional maybeRank = getTensorRank(self); if (!maybeRank) return rewriter.notifyMatchFailure(op, "unimplemented: unranked tensor"); unsigned rank = *maybeRank; int64_t start, end; if (!matchPattern(op.getStartDim(), m_TorchConstantInt(&start)) || !matchPattern(op.getEndDim(), m_TorchConstantInt(&end))) { return rewriter.notifyMatchFailure( op, "unimplemented: requires start and end dims to be constants"); } SmallVector newSizes; if (rank == 0) { Value one = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); newSizes.push_back(one); } else { start = toPositiveDim(start, rank); end = toPositiveDim(end, rank); if (start > end) { return rewriter.notifyMatchFailure( op, "expected end dim larger than start dim"); } newSizes.reserve(rank - end + start); for (int64_t k = 0; k < start; ++k) { Value dim = rewriter.create(loc, rewriter.getI64IntegerAttr(k)); newSizes.push_back( rewriter.create(loc, self, /*dim=*/dim)); } Value flattenDimSize = rewriter.create(loc, rewriter.getI64IntegerAttr(-1)); newSizes.push_back(flattenDimSize); for (int64_t k = end + 1; k < rank; ++k) { Value dim = rewriter.create(loc, rewriter.getI64IntegerAttr(k)); newSizes.push_back( rewriter.create(loc, self, /*dim=*/dim)); } } Value newSizeList = rewriter.create( loc, ListType::get(IntType::get(context)), newSizes); rewriter.replaceOpWithNewOp(op, op.getType(), op.getSelf(), newSizeList); return success(); } }; } // namespace // Decompose aten.expand into aten.broadcast_to op. namespace { class DecomposeAtenExpandOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenExpandOp op, PatternRewriter &rewriter) const override { bool implicit = false; if (!matchPattern(op.getImplicit(), m_TorchConstantBool(&implicit)) || implicit) { return rewriter.notifyMatchFailure( op, "unimplemented: requires implicit to be false"); } rewriter.replaceOpWithNewOp(op, op.getType(), op.getSelf(), op.getSize()); return success(); } }; } // namespace // Decompose aten.where.Scalar into aten.where.self op. namespace { class DecomposeAtenWhereScalarOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenWhereScalarOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); auto resType = op.getType().cast(); if (!resType.hasDtype()) { return rewriter.notifyMatchFailure(op, "result should have dtype"); } Value selfTensor = createRank0Tensor(rewriter, loc, resType, op.getSelf()); Value otherTensor = createRank0Tensor(rewriter, loc, resType, op.getOther()); rewriter.replaceOpWithNewOp(op, resType, op.getCondition(), selfTensor, otherTensor); return success(); } }; } // namespace // Decompose aten.where.ScalarOther into aten.where.self op. namespace { class DecomposeAtenWhereScalarOtherOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenWhereScalarOtherOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); auto resType = op.getType().cast(); if (!resType.hasDtype()) { return rewriter.notifyMatchFailure(op, "result should have dtype"); } Value otherTensor = createRank0Tensor(rewriter, loc, resType, op.getOther()); rewriter.replaceOpWithNewOp(op, resType, op.getCondition(), op.getSelf(), otherTensor); return success(); } }; } // namespace // Decompose aten.where.ScalarSelf into aten.where.self op. namespace { class DecomposeAtenWhereScalarSelfOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenWhereScalarSelfOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); auto resType = op.getType().cast(); if (!resType.hasDtype()) { return rewriter.notifyMatchFailure(op, "result should have dtype"); } Value selfTensor = createRank0Tensor(rewriter, loc, resType, op.getSelf()); rewriter.replaceOpWithNewOp(op, resType, op.getCondition(), selfTensor, op.getOther()); return success(); } }; } // namespace // Decompose aten.masked_fill.Scalar into aten.where.self op. namespace { class DecomposeAtenMaskedFillScalarOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenMaskedFillScalarOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); auto resType = op.getType().cast(); if (!resType.hasDtype()) { return rewriter.notifyMatchFailure(op, "result should have dtype"); } Value mask = op.getMask(); Value value = createRank0Tensor(rewriter, loc, resType, op.getValue()); rewriter.replaceOpWithNewOp(op, resType, mask, value, op.getSelf()); return success(); } }; } // namespace // Decompose aten._convolution-like to aten.convolution namespace { template class DecomposeAten_ConvolutionLikeOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(ConvolutionLikeOp op, PatternRewriter &rewriter) const override { rewriter.replaceOpWithNewOp( op, op->getResultTypes(), op.getInput(), op.getWeight(), op.getBias(), op.getStride(), op.getPadding(), op.getDilation(), op.getTransposed(), op.getOutputPadding(), op.getGroups()); return success(); } }; } // namespace // Decompose aten.conv2d to aten.convolution namespace { class DecomposeAtenConv2dOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenConv2dOp op, PatternRewriter &rewriter) const override { Value emptyList = rewriter.create( op.getLoc(), Torch::ListType::get(Torch::IntType::get(op.getContext())), SmallVector()); Value cstFalse = rewriter.create(op.getLoc(), false); rewriter.replaceOpWithNewOp( op, op->getResultTypes(), op.getInput(), op.getWeight(), op.getBias(), op.getStride(), op.getPadding(), op.getDilation(), cstFalse, emptyList, op.getGroups()); return success(); } }; } // namespace // Decompose aten.conv_transpose2d to aten.convolution namespace { class DecomposeAtenConvTranspose2dOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenConvTranspose2dInputOp op, PatternRewriter &rewriter) const override { Value cstTrue = rewriter.create(op.getLoc(), true); rewriter.replaceOpWithNewOp( op, op->getResultTypes(), op.getInput(), op.getWeight(), op.getBias(), op.getStride(), op.getPadding(), op.getDilation(), /*transposed=*/cstTrue, op.getOutputPadding(), op.getGroups()); return success(); } }; } // namespace static LogicalResult getTransposedType(BaseTensorType inType, int64_t dimA, int64_t dimB, Type &transposedType) { if (!inType.hasSizes()) return failure(); SmallVector shape(inType.getSizes()); int64_t tmp = shape[0]; shape[0] = shape[1]; shape[1] = tmp; transposedType = inType.getWithSizesAndDtype(llvm::ArrayRef(shape), inType.getOptionalDtype()); return success(); } // The convolution backward op is decomposed as follows: // inputH, inputW = input.shape[2:] // output_padding_ = [ // inputH // - 1 // + 2 * padding_[0] // - dilation_[0] * (weight.shape[2] - 1) // - (grad_output.shape[2] - 1) * stride_[0], // inputW // - 1 // + 2 * padding_[1] // - dilation_[1] * (weight.shape[3] - 1) // - (grad_output.shape[3] - 1) * stride_[1], // ] // // decomp_grad_input = torch.nn.functional.conv_transpose2d( // grad_output, // weight, // None, // stride_, // padding_, // output_padding_, // groups_, // dilation_, // ) // // input_transposed = torch.ops.aten.transpose(input, 0, 1) // grad_output_transposed = grad_output.view( // grad_output.shape[0] * grad_output.shape[1], 1, *grad_output.shape[2:] // ) // decomp_grad_weight = torch.ops.aten.convolution( // input_transposed, // grad_output_transposed, // bias=None, // stride=dilation_, // padding=padding_, // dilation=stride_, // transposed=False, // output_padding=[0, 0], // groups=input.shape[0], // ) // decomp_grad_weight = torch.narrow(decomp_grad_weight, 2, 0, weight.shape[2]) // decomp_grad_weight = torch.narrow(decomp_grad_weight, 3, 0, weight.shape[3]) // decomp_grad_weight = decomp_grad_weight.view( // input_transposed.shape[0], // input_transposed.shape[1], // grad_output.shape[1], // *decomp_grad_weight.shape[2:] // ) // decomp_grad_weight = decomp_grad_weight.movedim(0, 2) // decomp_grad_weight = decomp_grad_weight.sum(dim=0) // // decomp_grad_bias = torch.sum(grad_output, dim=[0, 2, 3]) namespace { class DecomposeAtenConvolutionBackwardOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenConvolutionBackwardOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); MLIRContext *context = op.getContext(); Value input = op.getInput(); Value weight = op.getWeight(); Value gradOutput = op.getGradOutput(); std::optional maybeGradRank = getTensorRank(gradOutput); if (!maybeGradRank) { return rewriter.notifyMatchFailure(op, "expected grad output to have a rank"); } unsigned gradRank = *maybeGradRank; if (gradRank != 4) return rewriter.notifyMatchFailure( op, "unimplemented: only 2D convolutions supported."); Value cstZero = rewriter.create( loc, rewriter.getI64IntegerAttr(0)); Value cstOne = rewriter.create( loc, rewriter.getI64IntegerAttr(1)); Value cstTwo = rewriter.create( loc, rewriter.getI64IntegerAttr(2)); Value cstNone = rewriter.create(loc); Value cstFalse = rewriter.create( loc, rewriter.getBoolAttr(false)); SmallVector padding, dilation, stride; SmallVector paddingInt, dilationInt, strideInt, outputPaddingInt; if (!matchPattern(op.getPadding(), m_TorchListOfConstantInts(paddingInt))) return rewriter.notifyMatchFailure( op, "padding must be a list of constant ints"); if (!matchPattern(op.getStride(), m_TorchListOfConstantInts(strideInt))) return rewriter.notifyMatchFailure( op, "stride must be a list of constant ints"); if (!matchPattern(op.getDilation(), m_TorchListOfConstantInts(dilationInt))) return rewriter.notifyMatchFailure( op, "dilation must be a list of constant ints"); if (!llvm::all_of(dilationInt, [](int64_t dilationVal) { return dilationVal == 1; })) return rewriter.notifyMatchFailure( op, "unimplemented: only dilations of 1 supported."); if (!matchPattern(op.getOutputPadding(), m_TorchListOfConstantInts(outputPaddingInt))) return rewriter.notifyMatchFailure( op, "output padding must be a list of constant ints"); if (!llvm::all_of(outputPaddingInt, [](int64_t outPad) { return outPad == 0; })) return rewriter.notifyMatchFailure( op, "unimplemented: only output padding of 0 supported."); SmallVector outMask; if (!matchPattern(op.getOutputMask(), m_TorchListOfConstantBools(outMask))) return rewriter.notifyMatchFailure( op, "only constant bool output_mask is supported."); for (unsigned i = 0; i < outMask.size(); i++) { if (outMask[i] == false) { Value result = op->getResults()[i]; if (!result.getUsers().empty()) return rewriter.notifyMatchFailure( op, "unimplemented: false value supported for output_mask only " "when the result tensor corresponding to that has no users."); } } bool transposed; if (!matchPattern(op.getTransposed(), m_TorchConstantBool(&transposed))) return rewriter.notifyMatchFailure( op, "transposed arg should be a constant bool."); if (transposed) return rewriter.notifyMatchFailure( op, "unimplemented: transposed convolutions are not supported."); getListConstructElements(op.getPadding(), padding); getListConstructElements(op.getStride(), stride); getListConstructElements(op.getDilation(), dilation); // Computing Grad Input. // Calculate output padding for first convolution. // output_padding_ = [ // inputH - 1 + (2 * padding_[0]) - (dilation_[0] * (weight.size()[2] // - 1)) - ((grad_out.size()[2] - 1) * stride_[0]), inputW - 1 + (2 * // padding_[1]) - (dilation_[1] * (weight.size()[3] - 1)) - // ((grad_out.size()[3] - 1) * stride_[1]), // ] SmallVector outputPaddingValues; for (unsigned i = 2; i < gradRank; i++) { Value dim = rewriter.create( loc, rewriter.getI64IntegerAttr(i)); Value inputVecDim = rewriter.create(loc, input, dim); Value gradOutDim = rewriter.create(loc, gradOutput, dim); Value weightDim = rewriter.create(loc, weight, dim); Value inputVecDimMinusOne = rewriter.create(loc, inputVecDim, cstOne); Value gradOutDimMinusOne = rewriter.create(loc, gradOutDim, cstOne); Value weightDimMinusOne = rewriter.create(loc, weightDim, cstOne); Value twoTimesPadding = rewriter.create(loc, padding[i - 2], cstTwo); Value tmpA = rewriter.create(loc, weightDimMinusOne, dilation[i - 2]); Value tmpB = rewriter.create(loc, gradOutDimMinusOne, stride[i - 2]); Value outputPaddingVal = rewriter.create( loc, inputVecDimMinusOne, twoTimesPadding); outputPaddingVal = rewriter.create(loc, outputPaddingVal, tmpA); outputPaddingVal = rewriter.create(loc, outputPaddingVal, tmpB); outputPaddingValues.push_back(outputPaddingVal); } Value outputPaddingForGradInput = rewriter.create( loc, ListType::get(IntType::get(context)), outputPaddingValues); Value gradInput = rewriter.create( loc, op.getResultTypes()[0], gradOutput, weight, cstNone, op.getStride(), op.getPadding(), outputPaddingForGradInput, op.getGroups(), op.getDilation()); Type transposedType; if (failed(getTransposedType(input.getType().cast(), 0, 1, transposedType))) return failure(); Value inputTransposed = rewriter.create( loc, transposedType, input, cstZero, cstOne); // For the cases where the stride is non-unit, we compute the `GradWeight` // through this implementation. Value gradWeight; if (!llvm::all_of(strideInt, [](int64_t stride) { return stride == 1; })) { // Computing Grad Weight. SmallVector gradOutputSize; for (unsigned i = 0; i < gradRank; i++) { gradOutputSize.push_back(rewriter.create( loc, gradOutput, rewriter.create( loc, rewriter.getI64IntegerAttr(i)))); } Value gradOutputViewDimZero = rewriter.create( loc, gradOutputSize[0], gradOutputSize[1]); Value gradOutputViewShapeList = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(op.getContext())), ValueRange{gradOutputViewDimZero, cstOne, gradOutputSize[2], gradOutputSize[3]}); BaseTensorType gradOutputTy = gradOutput.getType().cast(); if (!gradOutputTy.hasSizes()) return failure(); SmallVector gradOutputSizesInt(gradOutputTy.getSizes()); SmallVector gradOutputViewSizesInt(gradOutputSizesInt); if (gradOutputViewSizesInt[0] != kUnknownSize && gradOutputViewSizesInt[1] != kUnknownSize) gradOutputViewSizesInt[0] *= gradOutputViewSizesInt[1]; else gradOutputViewSizesInt[0] = kUnknownSize; gradOutputViewSizesInt[1] = 1; BaseTensorType gradOutputTypeForView = gradOutputTy .getWithSizesAndDtype(llvm::ArrayRef(gradOutputViewSizesInt), gradOutputTy.getOptionalDtype()) .cast(); Value gradOutputView = rewriter.create( loc, gradOutputTypeForView, gradOutput, gradOutputViewShapeList); BaseTensorType inputTransposedTy = inputTransposed.getType().cast(); if (!inputTransposedTy.hasSizes()) return failure(); SmallVector inputTransposedSizesInt( inputTransposedTy.getSizes()); SmallVector gradWeightSizesInt{inputTransposedSizesInt[0], gradOutputViewSizesInt[0]}; for (unsigned i = 2; i < gradRank; i++) { if (inputTransposedSizesInt[i] != kUnknownSize && gradOutputViewSizesInt[i] != kUnknownSize) { int64_t kernelSizeInt = strideInt[i - 2] * (gradOutputViewSizesInt[i] - 1) + 1; gradWeightSizesInt.push_back( ((inputTransposedSizesInt[i] + (paddingInt[i - 2] * 2) - kernelSizeInt) / dilationInt[i - 2]) + 1); } else { gradWeightSizesInt.push_back(kUnknownSize); } } BaseTensorType gradWeightTy = inputTransposedTy .getWithSizesAndDtype(llvm::ArrayRef(gradWeightSizesInt), inputTransposedTy.getOptionalDtype()) .cast(); Value numGroup = rewriter.create(loc, input, cstZero); gradWeight = rewriter.create( loc, gradWeightTy, inputTransposed, gradOutputView, cstNone, /*stride=*/op.getDilation(), op.getPadding(), /*dilation=*/op.getStride(), op.getTransposed(), op.getOutputPadding(), numGroup); BaseTensorType weightTy = weight.getType().cast(); if (!weightTy.hasSizes()) return failure(); SmallVector weightSizes(weightTy.getSizes()); for (unsigned i = 0; i < gradWeightTy.getSizes().size() - 2; i++) { gradWeightSizesInt[i + 2] = weightSizes[i + 2]; BaseTensorType gradWeightNarrowTy = gradWeightTy .getWithSizesAndDtype(llvm::ArrayRef(gradWeightSizesInt), gradWeightTy.getOptionalDtype()) .cast(); Value dim = rewriter.create( loc, rewriter.getI64IntegerAttr(i + 2)); Value length = rewriter.create(loc, weight, dim); gradWeight = rewriter.create( loc, gradWeightNarrowTy, gradWeight, dim, /*start=*/cstZero, length); } SmallVector gradWeightViewShapeInt{ inputTransposedSizesInt[0], inputTransposedSizesInt[1]}; gradWeightViewShapeInt.push_back(gradOutputSizesInt[1]); gradWeightViewShapeInt.insert( gradWeightViewShapeInt.end(), {gradWeightSizesInt[2], gradWeightSizesInt[3]}); SmallVector gradWeightViewShapeValue; for (unsigned i = 0; i < gradWeightViewShapeInt.size(); i++) { gradWeightViewShapeValue.push_back( rewriter.create( loc, rewriter.getI64IntegerAttr(gradWeightViewShapeInt[i]))); } Value gradWeightViewShapeList = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(op.getContext())), gradWeightViewShapeValue); BaseTensorType gradWeightTypeForView = gradWeightTy .getWithSizesAndDtype(llvm::ArrayRef(gradWeightViewShapeInt), gradWeightTy.getOptionalDtype()) .cast(); gradWeight = rewriter.create( loc, gradWeightTypeForView, gradWeight, gradWeightViewShapeList); gradWeightTy = gradWeight.getType().cast(); SmallVector gradWeightDimsOrder = computeDimsOrderForMoveDim(0, 2, gradWeightViewShapeInt.size()); SmallVector gradWeightMoveDimShape; for (unsigned i = 0; i < gradWeightDimsOrder.size(); i++) { gradWeightMoveDimShape.push_back( gradWeightViewShapeInt[gradWeightDimsOrder[i]]); } BaseTensorType gradWeightTypeForMoveDim = gradWeightTy .getWithSizesAndDtype(llvm::ArrayRef(gradWeightMoveDimShape), gradWeightTy.getOptionalDtype()) .cast(); gradWeight = rewriter.create( loc, gradWeightTypeForMoveDim, gradWeight, /*source=*/cstZero, /*destination=*/cstTwo); Value gradIntList = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(op.getContext())), llvm::ArrayRef{cstZero}); gradWeight = rewriter.create( loc, op.getResultTypes()[1], /*self=*/gradWeight, /*dim=*/gradIntList, /*keepdim=*/cstFalse, /*dtype=*/cstNone); } else { if (failed(getTransposedType(gradOutput.getType().cast(), 0, 1, transposedType))) return failure(); Value gradOutputTransposed = rewriter.create( loc, transposedType, gradOutput, cstZero, cstOne); // Convolve input with grad_output. if (failed( getTransposedType(op.getResultTypes()[1].cast(), 0, 1, transposedType))) return failure(); gradWeight = rewriter.create( loc, transposedType, inputTransposed, gradOutputTransposed, cstNone, op.getStride(), op.getPadding(), op.getDilation(), op.getTransposed(), op.getOutputPadding(), op.getGroups()); gradWeight = rewriter.create( loc, op.getResultTypes()[1], gradWeight, cstZero, cstOne); } // Computing Grad Bias. SmallVector dimIntList{cstZero}; for (unsigned i = 2; i < gradRank; i++) dimIntList.push_back(rewriter.create( loc, rewriter.getI64IntegerAttr(i))); Value gradIntList = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(op.getContext())), dimIntList); // Sum grad_output along dim 1. Value gradBias = rewriter.create( loc, op.getResultTypes()[2], gradOutput, gradIntList, cstFalse, cstNone); rewriter.replaceOp(op, {gradInput, gradWeight, gradBias}); return success(); } }; } // namespace // Decompose aten.addmm into aten.mm and aten.add.Tensor op. namespace { class DecomposeAtenAddmmOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenAddmmOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value input = op.getSelf(); Value mat1 = op.getMat1(); Value mat2 = op.getMat2(); std::optional mat1Rank = getTensorRank(mat1); std::optional mat2Rank = getTensorRank(mat2); // The operands `mat1`, `mat2` to aten.addmm must be of rank 2. if (!mat1Rank || !mat2Rank || *mat1Rank != 2 || *mat2Rank != 2) { return rewriter.notifyMatchFailure( op, "expected mat1, mat2 operands to aten.addmm to be rank 2"); } // TODO: Handle integer type operands. auto inputType = input.getType().cast(); if (!inputType.hasDtype() || !inputType.getDtype().isa()) { return rewriter.notifyMatchFailure( op, "unimplemented: non-floating point dtype"); } // matrix multiplication: matmul = mat1 @ mat2 Value matmul = rewriter.create(loc, op.getType(), mat1, mat2); // scaledInput = self * beta Value scaledInput = rewriter.create(loc, input.getType(), input, op.getBeta()); // result = scaledInput + alpha * matmul rewriter.replaceOpWithNewOp(op, op.getType(), scaledInput, matmul, op.getAlpha()); return success(); } }; } // namespace // Decompose aten.mean into: sum(x)/div(numTensorElements). namespace { class DecomposeAtenMeanOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenMeanOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value input = op.getSelf(); Value output = op.getResult(); BaseTensorType outputTensorType = output.getType().cast(); Value sum = rewriter.create(loc, outputTensorType, input, op.getDtype()); Value numTensorElements = rewriter.create(loc, input); rewriter.replaceOpWithNewOp(op, outputTensorType, sum, numTensorElements); return success(); } }; } // namespace // productDimSize = product(size(dim) for dim in dims) // aten.mean(x, dims) = aten.sum(x, dims) / productDimSize. namespace { class DecomposeAtenMeanDimOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenMeanDimOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value input = op.getSelf(); std::optional maybeInputRank = getTensorRank(input); if (!maybeInputRank) { return rewriter.notifyMatchFailure(op, "expected input to have a rank"); } unsigned inputRank = *maybeInputRank; Value dimList = op.getDim(); Value keepDim = op.getKeepdim(); Value dtype = op.getDtype(); Type outputType = op.getType(); MLIRContext *context = op.getContext(); BaseTensorType inputType = input.getType().cast(); if (!inputType.hasDtype() || !inputType.getDtype().isa() || !isNoneOrFloatDtype(context, dtype)) { return rewriter.notifyMatchFailure( op, "only floating-point type is supported"); } SmallVector dimListElements; if (!getListConstructElements(dimList, dimListElements) && !dimList.getType().isa()) { return rewriter.notifyMatchFailure( op, "expected `dim` to be `None` or constructed from list construct"); } // Compute sum along dimensions specified in `dimList`. Value sumAlongDims = rewriter.create( loc, outputType, input, dimList, keepDim, dtype); // `productDimSize` is product of sizes of dimensions to be reduced. Value productDimSize; // Case: Reduce along all dims. if (dimListElements.empty() && inputRank != 0) { productDimSize = rewriter.create(loc, input); } else { productDimSize = rewriter.create( loc, rewriter.getI64IntegerAttr(1)); for (Value dim : dimListElements) { Value dimSize = rewriter.create(loc, input, dim); productDimSize = rewriter.create(loc, productDimSize, dimSize); } } rewriter.replaceOpWithNewOp(op, outputType, sumAlongDims, productDimSize); return success(); } }; } // namespace namespace { class DecomposeAtenSquareOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenSquareOp op, PatternRewriter &rewriter) const override { Value self = op.getSelf(); rewriter.replaceOpWithNewOp(op, op.getType(), self, self); return success(); } }; } // namespace // Silu(x) = sigmoid(x) * x namespace { class DecomposeAtenSiluOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenSiluOp op, PatternRewriter &rewriter) const override { Value self = op.getSelf(); Value sigmoid = rewriter.create(op.getLoc(), op.getType(), self); rewriter.replaceOpWithNewOp(op, op.getType(), sigmoid, self); return success(); } }; } // namespace // pDash = 1.0 - p // boolMask = aten.rand_like(input) < pDash // dropout(input, p, train=True) = (boolMask * input) / pDash // dropout(input, p, train=False) = input namespace { class DecomposeAtenDropoutOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenDropoutOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); Value input = op.getInput(); Value prob = op.getP(); bool train = false; if (!matchPattern(op.getTrain(), m_TorchConstantBool(&train))) return rewriter.notifyMatchFailure(op, "train must be a boolean constant"); if (!train) { rewriter.replaceOp(op, input); return success(); } BaseTensorType inputType = input.getType().cast(); if (!inputType.hasDtype() || !inputType.getDtype().isa()) return rewriter.notifyMatchFailure( op, "only support floating type input for training mode"); Value noneVal = rewriter.create(loc); Value floatOne = rewriter.create(loc, rewriter.getF64FloatAttr(1.0)); Value oneMinusP = rewriter.create(loc, floatOne, prob); Value boolMask = rewriter.create( loc, inputType, input, oneMinusP, /*generator=*/noneVal); Value maskedInput = rewriter.create(loc, inputType, boolMask, input); rewriter.replaceOpWithNewOp(op, op.getType(), maskedInput, oneMinusP); return success(); } }; class DeomposeAtenNativeDropoutOp : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(AtenNativeDropoutOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); MLIRContext *context = op->getContext(); Value input = op.getInput(); Value prob = op.getP(); bool train = false; if (!op.getTrain().getType().isa()) { if (!matchPattern(op.getTrain(), m_TorchConstantBool(&train))) { return rewriter.notifyMatchFailure( op, "train must be a boolean constant or none"); } } Value noneVal = rewriter.create(loc); if (!train) { Value i1Type = getDtypeIntValueForType(rewriter, loc, IntegerType::get(context, 1)); Value inputSize = rewriter.create( loc, Torch::ListType::get(Torch::IntType::get(context)), input); Value trueValue = rewriter.create(loc, 1); Value trueMask = rewriter.create( loc, op->getResultTypes()[1], inputSize, trueValue, i1Type, /*layout=*/noneVal, /*device=*/noneVal, /*pin_memory=*/noneVal); rewriter.replaceOp(op, ArrayRef{input, trueMask}); return success(); } BaseTensorType inputType = input.getType().cast(); if (!inputType.hasDtype() || !inputType.getDtype().isa()) { return rewriter.notifyMatchFailure( op, "only support floating type input for training mode"); } Value floatOne = rewriter.create(loc, rewriter.getF64FloatAttr(1.0)); Value oneMinusP = rewriter.create(loc, floatOne, prob); Value boolMask = rewriter.create( loc, inputType, input, oneMinusP, /*generator=*/noneVal); Value maskedInput = rewriter.create(loc, inputType, boolMask, input); Value output = rewriter.create( loc, op->getResultTypes()[0], maskedInput, oneMinusP); rewriter.replaceOp( op, ArrayRef{ output, convertTensorToDtype(rewriter, loc, boolMask, IntegerType::get(context, 1))}); return success(); } }; } // namespace // Decompose aten.var into: aten.var.dim op. namespace { class DecomposeAtenVarOp : public OpRewritePattern