//===------------------------------------------------------------*- C++ -*-===// // // This file is licensed under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // Also available under a BSD-style license. See LICENSE. // //===----------------------------------------------------------------------===// #include "torch-mlir/Conversion/TorchOnnxToTorch/Patterns.h" #include "torch-mlir/Conversion/TorchOnnxToTorch/Utils.h" #include "torch-mlir/Dialect/Torch/Utils/Utils.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" using namespace mlir; using namespace mlir::torch; using namespace mlir::torch::onnx_c; // Simple rewrites for the default domain. // See: https://onnx.ai/onnx/operators/ // For operators that are effectively version invariant, we register with // sinceVersion==1. We interpret this to include the following spec // diffs that are irrelevant to this level of lowering: // * Supported element types. // * Limited broadcasting to full broadcasting support. // // There are a lot of spec revisions that basically generalized elementwise // to be more normal and a direct translation vs a special case. This // results in a lot of ONNX test cases that all reduce to the exact same // thing here, so we simplify. // utilities namespace { // In case the ReduceSum Op was not the first operation performed on the data, // we provide the original operand through storeResult, which will be modified // if the result will be passed onto another operation, and will be used for // noop_with_empty_axes handling before that. LogicalResult reducedSumImpl(OpBinder binder, ConversionPatternRewriter &rewriter, Value data, Torch::ValueTensorType resultType, Value &storeResult, int64_t keepDims, int64_t noop_with_empty_axes, bool isIntermediateOp) { SmallVector axesList; Value axesVal; if (!binder.tensorOperandAtIndex(axesVal, 1)) { auto inputType = dyn_cast(data.getType()); if (!inputType.hasSizes() || !resultType.hasSizes()) { return rewriter.notifyMatchFailure( binder.op, "unimplemented: expected input and result to have shapes"); } if (inputType.areAllSizesKnown() && resultType.areAllSizesKnown()) { SmallVector inputShape{inputType.getSizes()}; SmallVector resultShape{resultType.getSizes()}; // if the shapes are equal, none of the dims is reduced if (llvm::equal(inputShape, resultShape)) { // simply fill in the op and return rewriter.replaceOp(binder.op, data); return success(); } if (areAllElementsDistinct(inputShape)) { // The check for the input shape elements to be distinct is added // for the cases like: // Input: [3, 2, 2] -> Output: [3, 2] // For the above case, from the input and output shape it can't be // inferred whether the dim:1 is reduced or dim:2. To avoid these // type of cases, the check has been placed. SmallVector reduceDims; unsigned resultShapeCounter = 0; for (unsigned i = 0; i < inputShape.size(); i++) { if (resultShapeCounter < resultShape.size() && inputShape[i] == resultShape[resultShapeCounter]) { resultShapeCounter++; } else { reduceDims.push_back(i); if (resultShapeCounter < resultShape.size() && resultShape[resultShapeCounter] == 1) resultShapeCounter++; } } for (auto i : reduceDims) { axesList.push_back(rewriter.create( binder.getLoc(), rewriter.getI64IntegerAttr(i))); } } } if (axesList.empty()) { Torch::BaseTensorType axesType = cast(axesVal.getType()); auto axesTy = dyn_cast(axesVal.getType()); auto axesShape = axesTy.getSizes(); if (axesShape.size() != 1 || axesShape[0] == Torch::kUnknownSize) return failure(); Value zero = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getI64IntegerAttr(0)); SmallVector selectSizes{1}; auto selType = rewriter.getType( selectSizes, axesType.getOptionalDtype()); int64_t numAxes = axesShape[0]; for (int64_t i = 0; i < numAxes; ++i) { Value iv = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getI64IntegerAttr(i)); Value extract = rewriter.create( binder.getLoc(), selType, axesVal, zero, iv); Value dim = rewriter.create( binder.getLoc(), rewriter.getType(), extract); axesList.push_back(dim); } } } SmallVector axesInts; if (!binder.s64IntegerArrayAttr(axesInts, "axes", {})) { for (int64_t i = 0, s = axesInts.size(); i < s; ++i) { Value iv = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getI64IntegerAttr(axesInts[i])); axesList.push_back(iv); } } // Do not include absolute value in the noop if (axesList.empty() && noop_with_empty_axes) { rewriter.replaceOp(binder.op, storeResult); return success(); } Value dimValueList = rewriter.create( binder.getLoc(), Torch::ListType::get(Torch::IntType::get(binder.op->getContext())), axesList); Value keepDimBool = rewriter.create(binder.getLoc(), keepDims); Value dType = rewriter.create(binder.getLoc()); // If we are using the ReducedSum as an intermediate op to be passed into // another operation, we might not want to replace the Op. So we create a new // Op and store the result in a variable. if (!isIntermediateOp) { rewriter.replaceOpWithNewOp( binder.op, resultType, data, dimValueList, keepDimBool, /*dtype=*/dType); } else { storeResult = rewriter.create( binder.getLoc(), resultType, data, dimValueList, keepDimBool, /*dtype=*/dType); } return success(); } Value getValueList(OpBinder binder, ConversionPatternRewriter &rewriter, Value operand) { SmallVector itemList; auto sizes = dyn_cast(operand.getType()).getSizes(); Torch::BaseTensorType operandType = cast(operand.getType()); SmallVector selectSizes; selectSizes.push_back(1); Type selectResultType = operandType.getWithSizesAndDtype( llvm::ArrayRef(selectSizes), operandType.getOptionalDtype()); auto extract = [&rewriter, &binder](Value x, Value v) { auto xTy = cast(x.getType()); Type extractTy = rewriter.getType(); if (isa(xTy.getDtype())) extractTy = rewriter.getType(); return rewriter.create(binder.getLoc(), extractTy, v); }; Value zero = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0)); MLIRContext *context = binder.op->getContext(); for (int i = 2; i < sizes[0]; i++) { Value selectIndex = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), i)); Value ext = rewriter.create( binder.getLoc(), selectResultType, operand, zero, selectIndex); Value item = extract(operand, ext); itemList.push_back(item); } auto xTy = cast(operand.getType()); Value ValueList; if (isa(xTy.getDtype())) { ValueList = rewriter.create( binder.getLoc(), Torch::ListType::get(Torch::IntType::get(context)), itemList); } else { ValueList = rewriter.create( binder.getLoc(), Torch::ListType::get(Torch::FloatType::get(context)), itemList); } return ValueList; } } // namespace void mlir::torch::onnx_c::populateDefaultDomainQtoZ( OnnxCustomOpConversionPattern &patterns) { patterns.onOp( "QuantizeLinear", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; llvm::SmallVector operands; if (binder.tensorOperands(operands, 3) || binder.tensorResultType(resultType)) return failure(); auto loc = binder.getLoc(); Value operand = operands[0]; Value scale = operands[1]; Value zeropoint = operands[2]; auto scaleTy = dyn_cast(scale.getType()); if (!scaleTy || !scaleTy.hasSizes()) return rewriter.notifyMatchFailure(binder.op, "requires known rank"); if (!resultType.hasDtype()) return rewriter.notifyMatchFailure(binder.op, "requires known result dtype"); auto resultETy = resultType.getDtype(); bool rank0 = scaleTy.getSizes().size() == 0; bool length1 = scaleTy.getSizes().size() == 1 && scaleTy.getSizes()[0] == 1; if (!rank0 && !length1) return rewriter.notifyMatchFailure(binder.op, "unimplemented: non-scalar scale"); auto qTensorTy = getQTorchTypeFromTorchIntType(resultType); if (!qTensorTy) { return rewriter.notifyMatchFailure(binder.op, "unsupported result dtype"); } auto torchqTy = Torch::getScalarTypeForType(qTensorTy.getDtype()); Value tyConst = rewriter.create( loc, rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), static_cast(torchqTy))); scale = rewriter.create( loc, rewriter.getType(), scale); bool fpResult = isa(resultETy); Type zeropointTy = rewriter.getType(); if (fpResult) zeropointTy = rewriter.getType(); zeropoint = rewriter.create(loc, zeropointTy, zeropoint); if (fpResult) { Value none = rewriter.create(loc); Value cstFalse = rewriter.create(loc, false); Value one = rewriter.create( loc, rewriter.getF64FloatAttr(1.0)); Value div = rewriter.create( loc, operand.getType(), operand, scale); Value add = rewriter.create( loc, operand.getType(), div, zeropoint, one); rewriter.replaceOpWithNewOp( binder.op, resultType, add, tyConst, /*non_blocking=*/cstFalse, /*copy=*/cstFalse, /*memory_format=*/none); return success(); } auto quantize = rewriter.create( loc, qTensorTy, operand, scale, zeropoint, tyConst); rewriter.replaceOpWithNewOp(binder.op, resultType, quantize); return success(); }); patterns.onOp( "QLinearConv", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; llvm::SmallVector operands; if ((binder.tensorOperands(operands, 8) && binder.tensorOperands(operands, 9)) || binder.tensorResultType(resultType)) return failure(); Value a = operands[0]; Value aScale = operands[1]; Value aZp = operands[2]; Value b = operands[3]; Value bScale = operands[4]; Value bZp = operands[5]; Value cScale = operands[6]; Value cZp = operands[7]; Value c = operands.size() == 9 ? operands[8] : nullptr; auto check = [](Value v) { auto vTy = cast(v.getType()); return llvm::all_of(vTy.getSizes(), [](int64_t d) { return d == 1; }); }; if (!check(aScale) || !check(aZp) || !check(bScale) || !check(bZp) || !check(cScale) || !check(cScale)) return rewriter.notifyMatchFailure( binder.op, "not supported for non per-tensor quantization"); auto extract = [&rewriter, &binder](Value v) { auto vTy = cast(v.getType()); Type extractTy = rewriter.getType(); if (isa(vTy.getDtype())) extractTy = rewriter.getType(); return rewriter.create(binder.getLoc(), extractTy, v); }; aZp = extract(aZp); bZp = extract(bZp); cZp = extract(cZp); aScale = extract(aScale); bScale = extract(bScale); cScale = extract(cScale); auto make = [&rewriter, &binder](Value v, Value scale, Value zp) -> Value { auto ty = cast(v.getType()); auto newTy = getQTorchTypeFromTorchIntType(ty); return rewriter.create( binder.getLoc(), newTy, v, scale, zp); }; a = make(a, aScale, aZp); b = make(b, bScale, bZp); auto cTy = rewriter.getType( resultType.getOptionalSizes(), rewriter.getIntegerType(32, /*issigned=*/true)); // TODO(suderman): insert convolution operator. llvm::SmallVector newOperands = {a, b}; if (c) newOperands.push_back(c); cTy = rewriter.getType( resultType.getOptionalSizes(), rewriter.getType()); llvm::SmallVector newAttributes; newAttributes.push_back( rewriter.getNamedAttr("name", rewriter.getStringAttr("onnx.Conv"))); for (auto namedAttr : binder.op->getAttrDictionary()) { if (namedAttr.getName().getValue().compare("name") == 0) continue; llvm::errs() << namedAttr.getName() << "\n"; newAttributes.push_back(namedAttr); } c = rewriter .create(binder.getLoc(), cTy, newOperands, newAttributes, binder.op->getRegions().size()) .getResult(0); Value outScale = rewriter.create( binder.getLoc(), rewriter.getType(), aScale, bScale); Value outZp = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0)); c = rewriter.create( binder.getLoc(), cTy, c, outScale, outZp); cTy = rewriter.getType( resultType.getOptionalSizes(), rewriter.getF32Type()); c = rewriter.create(binder.getLoc(), cTy, c); cTy = getQTorchTypeFromTorchIntType(resultType); Value dtyVal = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr( rewriter.getIntegerType(64), static_cast( Torch::getScalarTypeForType(cTy.getDtype())))); c = rewriter.create( binder.getLoc(), cTy, c, cScale, cZp, dtyVal); rewriter.replaceOpWithNewOp(binder.op, resultType, c); return success(); }); patterns.onOp( "QLinearMatMul", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; llvm::SmallVector operands; if (binder.tensorOperands(operands, 8) || binder.tensorResultType(resultType)) return failure(); Value a = operands[0]; Value aScale = operands[1]; Value aZp = operands[2]; Value b = operands[3]; Value bScale = operands[4]; Value bZp = operands[5]; Value cScale = operands[6]; Value cZp = operands[7]; auto check = [](Value v) { auto vTy = cast(v.getType()); for (auto dim : vTy.getSizes()) if (dim != 1) return false; return true; }; if (!check(aScale) || !check(aZp) || !check(bScale) || !check(bZp) || !check(cScale) || !check(cScale)) return rewriter.notifyMatchFailure( binder.op, "not supported for non per-tensor quantization"); Value emptyList = rewriter.create( binder.getLoc(), rewriter.getType( rewriter.getType()), ValueRange{}); auto extract = [&rewriter, &binder, &emptyList](Value v) { auto vTy = cast(v.getType()); if (!vTy.getSizes().empty()) { vTy = rewriter.getType( ArrayRef({}), vTy.getOptionalDtype()); v = rewriter.create(binder.getLoc(), vTy, v, emptyList); } Type extractTy = rewriter.getType(); if (isa(vTy.getDtype())) extractTy = rewriter.getType(); return rewriter.create(binder.getLoc(), extractTy, v); }; aZp = extract(aZp); bZp = extract(bZp); cZp = extract(cZp); aScale = extract(aScale); bScale = extract(bScale); cScale = extract(cScale); auto make = [&rewriter, &binder](Value v, Value scale, Value zp) -> Value { auto ty = cast(v.getType()); auto newTy = getQTorchTypeFromTorchIntType(ty); return rewriter.create( binder.getLoc(), newTy, v, scale, zp); }; a = make(a, aScale, aZp); b = make(b, bScale, bZp); auto cTy = rewriter.getType( resultType.getOptionalSizes(), rewriter.getIntegerType(32, /*issigned=*/true)); Value c; if (cTy.getSizes().size() == 2) { c = rewriter.create(binder.getLoc(), cTy, a, b); } else { c = rewriter.create(binder.getLoc(), cTy, a, b); } cTy = rewriter.getType( resultType.getOptionalSizes(), rewriter.getType()); Value mmScale = rewriter.create( binder.getLoc(), rewriter.getType(), aScale, bScale); Value mmZp = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0)); c = rewriter.create( binder.getLoc(), cTy, c, mmScale, mmZp); cTy = rewriter.getType( resultType.getOptionalSizes(), rewriter.getF32Type()); c = rewriter.create(binder.getLoc(), cTy, c); cTy = dyn_cast( getQTorchTypeFromTorchIntType(resultType)); Value dtyVal = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr( rewriter.getIntegerType(64), static_cast( Torch::getScalarTypeForType(cTy.getDtype())))); c = rewriter.create( binder.getLoc(), cTy, c, cScale, cZp, dtyVal); rewriter.replaceOpWithNewOp(binder.op, resultType, c); return success(); }); patterns.onOp("Reciprocal", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value operand; if (binder.tensorOperand(operand) || binder.tensorResultType(resultType)) return failure(); rewriter.replaceOpWithNewOp( binder.op, resultType, operand); return success(); }); patterns.onOp( "Relu", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value x; if (binder.tensorOperand(x) || binder.tensorResultType(resultType)) return failure(); rewriter.replaceOpWithNewOp(binder.op, resultType, x); return success(); }); patterns.onOp("Round", 11, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value operand; if (binder.tensorOperand(operand) || binder.tensorResultType(resultType)) return failure(); rewriter.replaceOpWithNewOp( binder.op, resultType, operand); return success(); }); patterns.onOp("RNN", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { return OnnxRnnExpander(binder, rewriter); }); patterns.onOp( "Scatter", 9, [](OpBinder binder, ConversionPatternRewriter &rewriter) { int64_t axis; if (binder.s64IntegerAttr(axis, "axis", {})) return rewriter.notifyMatchFailure(binder.op, "axis bind failure"); Torch::ValueTensorType resultTy; Value data, indices, updates; if (binder.tensorOperandAtIndex(data, 0) || binder.tensorOperandAtIndex(indices, 1) || binder.tensorOperandAtIndex(updates, 2) || binder.tensorResultType(resultTy)) return failure(); auto dataTy = cast(data.getType()), indicesTy = cast(indices.getType()), updatesTy = cast(updates.getType()); int64_t dataRank = dataTy.getSizes().size(), indicesRank = indicesTy.getSizes().size(), updatesRank = updatesTy.getSizes().size(); if ((dataRank < 1) || (indicesRank < 1) || (updatesRank < 1) || (axis < -dataRank) || (axis >= dataRank)) return failure(); Value axisValue = rewriter.create( binder.getLoc(), rewriter.getI64IntegerAttr(axis)); rewriter.replaceOpWithNewOp( binder.op, resultTy, data, axisValue, indices, updates); return success(); }); patterns.onOp( "ScatterElements", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; SmallVector valList; int64_t axis; std::string reduction; int64_t numOperands = binder.op->getNumOperands(); if (binder.tensorOperands(valList, numOperands) || binder.s64IntegerAttr(axis, "axis", 0) || binder.customOpNameStringAttr(reduction, "reduction", "none") || binder.tensorResultType(resultType)) return failure(); auto loc = binder.getLoc(); Value data = valList[0]; Value indices = valList[1]; Value updates = valList[2]; // ONNX allows negative axis. if (axis < 0) axis += cast(data.getType()).getSizes().size(); Value constAxis = rewriter.create( loc, rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), axis)); Value zero = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getI64IntegerAttr(0)); Value one = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getI64IntegerAttr(1)); Value axisSize = rewriter.create( binder.getLoc(), rewriter.getType(), data, constAxis); auto indicesTy = cast(indices.getType()); Value indicesAdd = rewriter.create( loc, indicesTy, indices, axisSize, one); Value inputNeg = rewriter.create( loc, rewriter.getType(indicesTy.getSizes(), rewriter.getI1Type()), indices, zero); indices = rewriter.create( loc, indicesTy, inputNeg, indicesAdd, indices); if (reduction == "none") { rewriter.replaceOpWithNewOp( binder.op, resultType, data, constAxis, indices, updates); return success(); } // TODO: Implement max and min cases if (reduction == "mul") { reduction = "multiply"; } else if (reduction == "max" || reduction == "min") { return rewriter.notifyMatchFailure( binder.op, "max/min reduction unsupported for scatter elements"); } Value cstStrReduction = rewriter.create(binder.getLoc(), reduction); rewriter.replaceOpWithNewOp( binder.op, resultType, data, constAxis, indices, updates, cstStrReduction); return success(); }); patterns.onOp( "SequenceConstruct", 11, [](OpBinder binder, ConversionPatternRewriter &rewriter) { SmallVector operands; Torch::ListType resultType; if (binder.tensorOperands(operands, binder.getNumOperands()) || binder.tensorListResultType(resultType)) return failure(); rewriter.replaceOpWithNewOp( binder.op, resultType, operands); return success(); }); patterns.onOp( "SequenceLength", 11, [](OpBinder binder, ConversionPatternRewriter &rewriter) { // onnx.SequenceLength takes a sequence(list) of tensors, and returns // a zero rank tensor with the length. Torch::ValueTensorType resultType; Value x; if (binder.tensorListOperand(x) || binder.tensorResultType(resultType)) return failure(); Value cstFalse = rewriter.create(binder.getLoc(), false); Value none = rewriter.create(binder.getLoc()); Value len = rewriter.create( binder.getLoc(), rewriter.getType(), x); // AtenLenTOp returns a torch.int, so we have to // put that in a tensor. rewriter.replaceOpWithNewOp( binder.op, resultType, len, none, none, cstFalse); return success(); }); patterns.onOp( "Sigmoid", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value x; if (binder.tensorOperand(x) || binder.tensorResultType(resultType)) return failure(); rewriter.replaceOpWithNewOp(binder.op, resultType, x); return success(); }); patterns.onOp("Sin", 7, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value operand; if (binder.tensorOperand(operand) || binder.tensorResultType(resultType)) return failure(); rewriter.replaceOpWithNewOp( binder.op, resultType, operand); return success(); }); patterns.onOp("Tanh", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value operand; if (binder.tensorOperand(operand) || binder.tensorResultType(resultType)) return failure(); rewriter.replaceOpWithNewOp( binder.op, resultType, operand); return success(); }); patterns.onOp("Sqrt", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value operand; if (binder.tensorOperand(operand) || binder.tensorResultType(resultType)) return failure(); rewriter.replaceOpWithNewOp( binder.op, resultType, operand); return success(); }); patterns.onOp( "Sub", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value x; Value y; if (binder.tensorOperands(x, y) || binder.tensorResultType(resultType)) return failure(); Value const1 = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 1)); rewriter.replaceOpWithNewOp( binder.op, resultType, x, y, const1); return success(); }); patterns.onOp( "Sum", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { if (binder.op->getNumOperands() == 1) { Torch::ValueTensorType resultType; Value x; if (binder.tensorOperand(x) || binder.tensorResultType(resultType)) return failure(); rewriter.replaceOp(binder.op, x); return success(); } Torch::ValueTensorType resultType; SmallVector valList; int64_t numOperands = binder.op->getNumOperands(); if (binder.tensorOperands(valList, numOperands) || binder.tensorResultType(resultType)) return failure(); Value const1 = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 1)); // Short circuit to binary add if (numOperands == 2) { rewriter.replaceOpWithNewOp( binder.op, resultType, valList[0], valList[1], const1); return success(); } // When binder.op->getNumOperands() > 2 Value curr = rewriter.create( binder.getLoc(), resultType, valList[0], valList[1], const1); for (int i = 2; i < numOperands; i++) { if (i == numOperands - 1) { curr = rewriter.create( binder.getLoc(), resultType, curr, valList[i], const1); } else { SmallVector resultBroadcastShapeInt; SmallVector resultBroadcastShapeValue; Torch::computeBroadcastShape(rewriter, binder.getLoc(), curr, valList[i], resultBroadcastShapeInt, resultBroadcastShapeValue); auto baseType = Torch::ValueTensorType::get( binder.op->getContext(), resultBroadcastShapeInt, resultType.getOptionalDtype()); curr = rewriter.create( binder.getLoc(), baseType, curr, valList[i], const1); } } rewriter.replaceOp(binder.op, curr); return success(); }); patterns.onOp("Where", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; SmallVector valList; int64_t numOperands = binder.op->getNumOperands(); if (binder.tensorOperands(valList, numOperands) || binder.tensorResultType(resultType)) return failure(); Value condition = valList[0]; Value x = valList[1]; Value y = valList[2]; rewriter.replaceOpWithNewOp( binder.op, resultType, condition, x, y); return success(); }); patterns.onOp( "Xor", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value x; Value y; if (binder.tensorOperands(x, y) || binder.tensorResultType(resultType)) return failure(); rewriter.replaceOpWithNewOp(binder.op, resultType, x, y); return success(); }); patterns.onOp( "Squeeze", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; SmallVector inputOperands; if (binder.tensorOperands(inputOperands, binder.op->getNumOperands()) || binder.tensorResultType(resultType)) return failure(); Value data = inputOperands[0]; auto inputType = dyn_cast(data.getType()); if (!inputType.hasSizes() || !resultType.hasSizes()) return rewriter.notifyMatchFailure( binder.op, "unimplemented: expected input and result to have shapes"); int64_t inputRank = inputType.getSizes().size(); int64_t resultRank = resultType.getSizes().size(); int64_t rankDiff = inputRank - resultRank; if (rankDiff == 0) { // In this case, no dimension is squeezed. Hence just replace the op // with input. rewriter.replaceOp(binder.op, data); return success(); } if (inputOperands.size() == 1) { // Case: `axes` value is not present which means squeeze all the // dimensions with shape value 1. rewriter.replaceOpWithNewOp(binder.op, resultType, data); return success(); } SmallVector dimList; if (inputType.areAllSizesKnown() && resultType.areAllSizesKnown()) { // If the input shape and result shape is statically known then the // list of dims to be squeezed can be derived from those shapes. As a // result, we don't have to wait for the dim values to be known at // runtime which is also expected by the downstream pipeline. SmallVector inputShape(inputType.getSizes()); SmallVector resultShape(resultType.getSizes()); SmallVector squeezeDims; unsigned resultShapeCounter = 0; for (unsigned i = 0; i < inputRank; i++) { if (resultShapeCounter < resultRank && inputShape[i] == resultShape[resultShapeCounter]) { resultShapeCounter++; } else { squeezeDims.push_back(i); } } for (auto i : squeezeDims) { dimList.push_back(rewriter.create( binder.getLoc(), rewriter.getI64IntegerAttr(i))); } } if (dimList.empty()) { Value axes = inputOperands[1]; Torch::BaseTensorType axesType = cast(axes.getType()); SmallVector selectSizes{1}; Type selectResultType = axesType.getWithSizesAndDtype( selectSizes, axesType.getOptionalDtype()); Value zero = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0)); for (int i = 0; i < rankDiff; i++) { // Go through the axes list and get each dim in the list Value selectIndex = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), i)); Value extract = rewriter.create( binder.getLoc(), selectResultType, axes, zero, selectIndex); Value dim = rewriter.create( binder.getLoc(), rewriter.getType(), extract); dimList.push_back(dim); } } Value dimValueList = rewriter.create( binder.getLoc(), rewriter.getType( rewriter.getType()), dimList); rewriter.replaceOpWithNewOp( binder.op, resultType, data, dimValueList); return success(); }); patterns.onOp( "Unsqueeze", 13, [](OpBinder binder, ConversionPatternRewriter &rewriter) { // Unlike squeeze where we are able to lower to Torch::PrimsSqueezeOp, // pytorch does not support torch.unsqueeze to insert multiple new dims. // discussion can be found here: // https://github.com/pytorch/pytorch/issues/9410 // So, for now, we unroll into multiple unsqueezes. Location loc = binder.getLoc(); Torch::ValueTensorType resultType; Value data, axes; if (binder.tensorOperands(data, axes) || binder.tensorResultType(resultType)) return failure(); auto inputType = dyn_cast(data.getType()); if (!inputType.hasSizes() || !resultType.hasSizes()) return rewriter.notifyMatchFailure( binder.op, "unimplemented: expected input and result to have shapes"); int64_t inputRank = inputType.getSizes().size(); int64_t resultRank = resultType.getSizes().size(); int64_t rankDiff = resultRank - inputRank; if (rankDiff == 0) { // In this case, no dimension is unsqueezed. Hence just replace the op // with input. rewriter.replaceOp(binder.op, data); return success(); } SmallVector unsqueezeDims; SmallVector inputShape(inputType.getSizes()); if (inputType.areAllSizesKnown() && resultType.areAllSizesKnown()) { // If the input shape and result shape is statically known then the // list of dims to be squeezed can be derived from those shapes. As a // result, we don't have to wait for the dim values to be known at // runtime which is also expected by the downstream pipeline. SmallVector resultShape(resultType.getSizes()); unsigned inputShapeCounter = 0; for (unsigned i = 0; i < resultRank; i++) { if (inputShapeCounter < inputRank && inputShape[inputShapeCounter] == resultShape[i]) { inputShapeCounter++; } else { unsqueezeDims.push_back(i); } } } else { SmallVector unsqueezeDimsInts; if (!matchPattern(axes, m_OnnxListOfConstantInts(unsqueezeDimsInts))) return rewriter.notifyMatchFailure( binder.op, "only support constant int axes values"); for (auto dim : unsqueezeDimsInts) unsqueezeDims.push_back(dim < 0 ? dim + resultRank : dim); // If we don't sort, unsqueezing first on 4 and then on 0 would fail // for shape = {x,y,z}, and axes [4,0] llvm::sort(unsqueezeDims.begin(), unsqueezeDims.end()); } Value result = data; SmallVector unsqueezeShape = inputShape; for (auto dim : unsqueezeDims) { unsqueezeShape.insert(unsqueezeShape.begin() + dim, 1); Type unsqueezeType = resultType.getWithSizesAndDtype( unsqueezeShape, resultType.getOptionalDtype()); Value cstDim = rewriter.create( loc, rewriter.getI64IntegerAttr(dim)); result = rewriter.create(loc, unsqueezeType, result, cstDim); } rewriter.replaceOp(binder.op, result); return success(); }); patterns.onOp( "Softmax", 13, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value input; int64_t axis; if (binder.tensorOperand(input) || binder.s64IntegerAttr(axis, "axis", -1) || binder.tensorResultType(resultType)) return failure(); // ONNX allows negative axis. if (axis < 0) axis += cast(input.getType()).getSizes().size(); Value constAxis = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), axis)); Value noneVal = rewriter.create(binder.getLoc()); rewriter.replaceOpWithNewOp( binder.op, resultType, input, constAxis, /*dtype=*/noneVal); return success(); }); patterns.onOp( "Selu", 6, [](OpBinder binder, ConversionPatternRewriter &rewriter) { // y = gamma * (alpha * e^x - alpha) for x <= 0, y = gamma * x for x > 0 Torch::ValueTensorType resultType; float alpha, gamma; Value operand; // Refer https://onnx.ai/onnx/operators/onnx__Selu.html for the default // alpha and gamma values. if (binder.tensorOperand(operand) || binder.f32FloatAttr(alpha, "alpha", 1.67326) || binder.f32FloatAttr(gamma, "gamma", 1.0507) || binder.tensorResultType(resultType)) return failure(); Value vAlpha = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getFloatAttr(rewriter.getF64Type(), alpha)); Value vScale = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getFloatAttr(rewriter.getF64Type(), gamma)); Value vInputScale = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getFloatAttr(rewriter.getF64Type(), 1.0)); rewriter.replaceOpWithNewOp( binder.op, resultType, operand, vAlpha, vScale, vInputScale); return success(); }); patterns.onOp("ReduceL1", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; int64_t keepDims, noop_with_empty_axes; Value operand; if (binder.tensorOperandAtIndex(operand, 0) || binder.tensorResultType(resultType) || binder.s64IntegerAttr(keepDims, "keepdims", 1) || binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes", 0)) return failure(); Value data = rewriter.create( binder.getLoc(), operand.getType(), operand); return reducedSumImpl(binder, rewriter, data, resultType, /*storeValue=*/operand, keepDims, noop_with_empty_axes, false); }); patterns.onOp( "ReduceL2", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value operand; int64_t keepDims, noop_with_empty_axes; if (binder.tensorOperandAtIndex(operand, 0) || binder.tensorResultType(resultType) || binder.s64IntegerAttr(keepDims, "keepdims", 1) || binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes", 0)) return failure(); // A ReduceL2 op is equivalent to the following sequence of operations: // Mul(x, x) -> ReduceSum -> CastF32 -> Sqrt -> CastLike(resultType) Value squareOfOperand = rewriter.create( binder.getLoc(), operand.getType(), operand, operand); auto reducedSum = reducedSumImpl(binder, rewriter, squareOfOperand, resultType, operand, keepDims, noop_with_empty_axes, true); if (failed(reducedSum)) return rewriter.notifyMatchFailure( binder.op, "Failed to perform sum operation on square of operand"); Value castDType = rewriter.create( binder.getLoc(), rewriter.getI64IntegerAttr(/*Float32Type*/ 6)); Value noneVal = rewriter.create(binder.getLoc()); Value constFalse = rewriter.create(binder.getLoc(), false); // Perform an AtenToDtype op on the squared sum of the operand, stored // now in operand itself. auto size = dyn_cast(operand.getType()) .getOptionalSizes(); auto f32ResultType = rewriter.getType( size, rewriter.getF32Type()); Value operandCast = rewriter.create( binder.getLoc(), f32ResultType, operand, castDType, /*non_blocking=*/constFalse, /*copy=*/constFalse, /*memory_format=*/noneVal); Value operandSqrt = rewriter.create( binder.getLoc(), f32ResultType, operandCast); Value resultDtype = Torch::getDtypeIntValueForType( rewriter, binder.getLoc(), resultType.getDtype()); rewriter.replaceOpWithNewOp( binder.op, resultType, operandSqrt, resultDtype, /*non_blocking=*/constFalse, /*copy=*/constFalse, /*memory_format=*/noneVal); return success(); }); patterns.onOp("ReduceLogSum", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value data; int64_t keepDims, noop_with_empty_axes; if (binder.tensorOperandAtIndex(data, 0) || binder.tensorResultType(resultType) || binder.s64IntegerAttr(keepDims, "keepdims", 1) || binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes", 0)) return failure(); auto reducedSumBool = reducedSumImpl(binder, rewriter, data, resultType, /*storeValue=*/data, keepDims, noop_with_empty_axes, true); if (failed(reducedSumBool)) return rewriter.notifyMatchFailure( binder.op, "Failed to perform sum operation on square of operand"); rewriter.replaceOpWithNewOp( binder.op, resultType, data); return success(); }); patterns.onOp( "ReduceLogSumExp", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value data; int64_t keepDims, noop_with_empty_axes; if (binder.tensorOperandAtIndex(data, 0) || binder.tensorResultType(resultType) || binder.s64IntegerAttr(keepDims, "keepdims", 1) || binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes", 0)) return failure(); // out = Log(reducesum(exp(data))) Value castDType = rewriter.create( binder.getLoc(), rewriter.getI64IntegerAttr(/*Float64Type*/ 7)); Value noneVal = rewriter.create(binder.getLoc()); Value constFalse = rewriter.create(binder.getLoc(), false); auto size = dyn_cast(data.getType()).getOptionalSizes(); auto f64ResultType = rewriter.getType( size, rewriter.getF64Type()); Value dataCast = rewriter.create( binder.getLoc(), f64ResultType, data, castDType, /*non_blocking=*/constFalse, /*copy=*/constFalse, /*memory_format=*/noneVal); Value dataExp = rewriter.create( binder.getLoc(), f64ResultType, dataCast); auto f64ReduceType = rewriter.getType( resultType.getOptionalSizes(), rewriter.getF64Type()); auto reducedSumBool = reducedSumImpl( binder, rewriter, dataExp, f64ReduceType, /*storeValue=*/data, keepDims, noop_with_empty_axes, true); if (failed(reducedSumBool)) return rewriter.notifyMatchFailure( binder.op, "Failed to perform sum operation on square of operand"); Value finalResult = rewriter.create( binder.getLoc(), f64ReduceType, data); Value resultDtype = Torch::getDtypeIntValueForType( rewriter, binder.getLoc(), resultType.getDtype()); rewriter.replaceOpWithNewOp( binder.op, resultType, finalResult, resultDtype, /*non_blocking=*/constFalse, /*copy=*/constFalse, /*memory_format=*/noneVal); return success(); }); patterns.onOp("ReduceSum", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value data; int64_t keepDims, noop_with_empty_axes; if (binder.tensorOperandAtIndex(data, 0) || binder.tensorResultType(resultType) || binder.s64IntegerAttr(keepDims, "keepdims", 1) || binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes", 0)) return failure(); return reducedSumImpl(binder, rewriter, data, resultType, /*storeValue=*/data, keepDims, noop_with_empty_axes, false); }); patterns.onOp("ReduceSumSquare", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value data; int64_t keepDims, noop_with_empty_axes; if (binder.tensorOperandAtIndex(data, 0) || binder.tensorResultType(resultType) || binder.s64IntegerAttr(keepDims, "keepdims", 1) || binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes", 0)) return failure(); Value dataSquare = rewriter.create( binder.getLoc(), data.getType(), data, data); return reducedSumImpl(binder, rewriter, dataSquare, resultType, /*storeValue=*/data, keepDims, noop_with_empty_axes, false); }); patterns.onOp( "ReduceMean", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value data; int64_t keepDims, noop_with_empty_axes; if (binder.tensorOperandAtIndex(data, 0) || binder.tensorResultType(resultType) || binder.s64IntegerAttr(keepDims, "keepdims", 1) || binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes", 0)) return failure(); SmallVector axesList; Value axesVal; if (!binder.tensorOperandAtIndex(axesVal, 1)) { auto inputType = dyn_cast(data.getType()); if (!inputType.hasSizes() || !resultType.hasSizes()) { return rewriter.notifyMatchFailure( binder.op, "unimplemented: expected input and result to have shapes"); } // If the input shape and result shape is statically known then the // list of dims to be squeezed can be derived from those shapes. As a // result, we don't have to wait for the dim values to be known at // runtime which is also expected by the downstream pipeline. if (inputType.areAllSizesKnown() && resultType.areAllSizesKnown()) { SmallVector inputShape{inputType.getSizes()}; SmallVector resultShape{resultType.getSizes()}; if (llvm::equal(inputShape, resultShape)) { // Case: none of the dimension is reduced. rewriter.replaceOp(binder.op, data); return success(); } if (areAllElementsDistinct(inputShape)) { // The check for the input shape elements to be distinct is added // for the cases like: // Input: [3, 2, 2] -> Output: [3, 2] // For the above case, from the input and output shape it can't be // inferred whether the dim:1 is reduced or dim:2. To avoid these // type of cases, the check has been placed. SmallVector reduceDims; unsigned resultShapeCounter = 0; for (unsigned i = 0; i < inputShape.size(); i++) { if (resultShapeCounter < resultShape.size() && inputShape[i] == resultShape[resultShapeCounter]) { resultShapeCounter++; } else { reduceDims.push_back(i); if (resultShapeCounter < resultShape.size() && resultShape[resultShapeCounter] == 1) resultShapeCounter++; } } for (auto i : reduceDims) { axesList.push_back(rewriter.create( binder.getLoc(), rewriter.getI64IntegerAttr(i))); } } } if (axesList.empty()) { Torch::BaseTensorType axesType = cast(axesVal.getType()); auto axesTy = dyn_cast(axesVal.getType()); auto axesShape = axesTy.getSizes(); if (axesShape.size() != 1 || axesShape[0] == Torch::kUnknownSize) return failure(); Value zero = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getI64IntegerAttr(0)); SmallVector selectSizes{1}; auto selType = rewriter.getType( selectSizes, axesType.getOptionalDtype()); int64_t numAxes = axesShape[0]; for (int64_t i = 0; i < numAxes; ++i) { Value iv = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getI64IntegerAttr(i)); Value extract = rewriter.create( binder.getLoc(), selType, axesVal, zero, iv); Value dim = rewriter.create( binder.getLoc(), rewriter.getType(), extract); axesList.push_back(dim); } } } SmallVector axesInts; if (!binder.s64IntegerArrayAttr(axesInts, "axes", {})) { for (int64_t i = 0, s = axesInts.size(); i < s; ++i) { Value iv = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getI64IntegerAttr(axesInts[i])); axesList.push_back(iv); } } // deal with case when axes is empty if (axesList.empty() && noop_with_empty_axes) { rewriter.replaceOp(binder.op, data); return success(); } Value dimValueList = rewriter.create( binder.getLoc(), Torch::ListType::get(Torch::IntType::get(binder.op->getContext())), axesList); Value keepDimBool = rewriter.create(binder.getLoc(), keepDims); Value noneVal = rewriter.create(binder.getLoc()); rewriter.replaceOpWithNewOp( binder.op, resultType, data, dimValueList, keepDimBool, /*dtype=*/noneVal); return success(); }); patterns.onOp( "ReduceMax", 13, [](OpBinder binder, ConversionPatternRewriter &rewriter) { // AtenAmaxOp allows us to pass a list of dims Torch::ValueTensorType resultType; Value data; Value axes; int64_t keepDims; int64_t noop_with_empty_axes; if (binder.tensorOperandAtIndex(data, 0) || binder.tensorResultType(resultType) || binder.s64IntegerAttr(keepDims, "keepdims", 1) || binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes", 0)) return failure(); auto dataTy = cast(data.getType()); Torch::IntType torchIntTy = rewriter.getType(); // If any of the input dims are 0 we set to the lower limit: if (llvm::any_of(dataTy.getSizes(), [](int64_t d) { return d == 0; }) && (llvm::any_of(dataTy.getSizes(), [](int64_t d) { return d == Torch::kUnknownSize; }) || keepDims)) { auto dty = dataTy.getDtype(); Value scalar; if (FloatType fpTy = dyn_cast(dty)) { auto inf = APFloat::getInf(fpTy.getFloatSemantics(), /*Negative=*/true); scalar = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getFloatAttr(rewriter.getF64Type(), inf.convertToDouble())); } if (IntegerType intTy = dyn_cast(dty)) { auto minInt = intTy.isSigned() ? APInt::getSignedMinValue(intTy.getIntOrFloatBitWidth()) : APInt::getMinValue(intTy.getIntOrFloatBitWidth()); scalar = rewriter.create( binder.getLoc(), torchIntTy, rewriter.getIntegerAttr(rewriter.getIntegerType(64), minInt.getSExtValue())); } llvm::SmallVector fillDims; for (int i = 0, s = resultType.getSizes().size(); i < s; ++i) { auto staticDim = resultType.getSizes()[i]; if (staticDim != Torch::kUnknownSize) { fillDims.push_back(rewriter.create( binder.getLoc(), torchIntTy, rewriter.getI64IntegerAttr(staticDim))); continue; } Value iv = rewriter.create( binder.getLoc(), torchIntTy, rewriter.getI64IntegerAttr(i)); fillDims.push_back(rewriter.create( binder.getLoc(), torchIntTy, data, iv)); } Value none = rewriter.create(binder.getLoc()); Value fillDimsList = rewriter.create( binder.getLoc(), Torch::ListType::get(torchIntTy), fillDims); rewriter.replaceOpWithNewOp( binder.op, resultType, fillDimsList, scalar, none, none, none, none); return success(); } // Previous version of the operation had the axes as an attribute: SmallVector axesList; llvm::SmallVector axesAttr; if (!binder.s64IntegerArrayAttr(axesAttr, "axes", {})) { for (int i = 0, s = axesAttr.size(); i < s; ++i) { axesList.push_back(rewriter.create( binder.getLoc(), torchIntTy, rewriter.getI64IntegerAttr(axesAttr[i]))); } } // Extract the axes values from the axes operand: if (!binder.tensorOperandAtIndex(axes, 1)) { Torch::BaseTensorType axesType = cast(axes.getType()); SmallVector selectSizes{1}; Type selectResultType = axesType.getWithSizesAndDtype( selectSizes, axesType.getOptionalDtype()); auto sizes = axesType.getSizes(); Value zero = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0)); // Extract the value of each axes: for (int i = 0; i < sizes[0]; i++) { // Go through the axes list and get each dim in the list Value selectIndex = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), i)); Value extract = rewriter.create( binder.getLoc(), selectResultType, axes, zero, selectIndex); Value dim = rewriter.create( binder.getLoc(), rewriter.getType(), extract); axesList.push_back(dim); } } // Handle the noop case: if (axesList.empty() && noop_with_empty_axes) { rewriter.replaceOp(binder.op, data); return success(); } // Deal with case when no axes arg is passed but not a noop: if (axesList.empty()) { int64_t numDims = dyn_cast(data.getType()) .getSizes() .size(); for (int i = 0; i < numDims; i++) { Value curr = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), i)); axesList.push_back(curr); } } // Handle negative axis: Value rankVal = rewriter.create(binder.getLoc(), torchIntTy, data); Value zero = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getI64IntegerAttr(0)); for (Value &axes : axesList) { Value isNegative = rewriter.create(binder.getLoc(), axes, zero); isNegative = rewriter.create(binder.getLoc(), isNegative); Value finalOffset = rewriter.create( binder.getLoc(), isNegative, rankVal); axes = rewriter.create(binder.getLoc(), axes, finalOffset); } Value dimValueList = rewriter.create( binder.getLoc(), Torch::ListType::get(torchIntTy), axesList); Value keepDimBool = rewriter.create(binder.getLoc(), keepDims); rewriter.replaceOpWithNewOp( binder.op, resultType, data, dimValueList, keepDimBool); return success(); }); patterns.onOp( "ReduceMin", 13, [](OpBinder binder, ConversionPatternRewriter &rewriter) { // AtenAminOp allows us to pass a list of dims Torch::ValueTensorType resultType; Value data; Value axes; int64_t keepDims; int64_t noop_with_empty_axes; if (binder.tensorOperandAtIndex(data, 0) || binder.tensorResultType(resultType) || binder.s64IntegerAttr(keepDims, "keepdims", 1) || binder.s64IntegerAttr(noop_with_empty_axes, "noop_with_empty_axes", 0)) return failure(); auto dataTy = cast(data.getType()); Torch::IntType torchIntTy = rewriter.getType(); // If any of the input dims are 0 we set to the upper limit: if (llvm::any_of(dataTy.getSizes(), [](int64_t d) { return d == 0; }) && (llvm::any_of(dataTy.getSizes(), [](int64_t d) { return d == Torch::kUnknownSize; }) || keepDims)) { auto dty = dataTy.getDtype(); Value scalar; if (FloatType fpTy = dyn_cast(dty)) { auto inf = APFloat::getInf(fpTy.getFloatSemantics()); scalar = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getFloatAttr(rewriter.getF64Type(), inf.convertToDouble())); } if (IntegerType intTy = dyn_cast(dty)) { auto mx = intTy.isSigned() ? APInt::getSignedMaxValue(intTy.getIntOrFloatBitWidth()) : APInt::getMaxValue(intTy.getIntOrFloatBitWidth()); scalar = rewriter.create( binder.getLoc(), torchIntTy, rewriter.getIntegerAttr(rewriter.getIntegerType(64), mx.getSExtValue())); } llvm::SmallVector fillDims; for (int i = 0, s = resultType.getSizes().size(); i < s; ++i) { auto staticDim = resultType.getSizes()[i]; if (staticDim != Torch::kUnknownSize) { fillDims.push_back(rewriter.create( binder.getLoc(), torchIntTy, rewriter.getI64IntegerAttr(staticDim))); continue; } Value iv = rewriter.create( binder.getLoc(), torchIntTy, rewriter.getI64IntegerAttr(i)); fillDims.push_back(rewriter.create( binder.getLoc(), torchIntTy, data, iv)); } Value none = rewriter.create(binder.getLoc()); Value fillDimsList = rewriter.create( binder.getLoc(), Torch::ListType::get(torchIntTy), fillDims); rewriter.replaceOpWithNewOp( binder.op, resultType, fillDimsList, scalar, none, none, none, none); return success(); } // Previous version of the operation had the axes as an attribute: SmallVector axesList; llvm::SmallVector axesAttr; if (!binder.s64IntegerArrayAttr(axesAttr, "axes", {})) { for (int i = 0, s = axesAttr.size(); i < s; ++i) { axesList.push_back(rewriter.create( binder.getLoc(), torchIntTy, rewriter.getI64IntegerAttr(axesAttr[i]))); } } // Extract the axes values from the axes operand: if (!binder.tensorOperandAtIndex(axes, 1)) { Torch::BaseTensorType axesType = cast(axes.getType()); SmallVector selectSizes{1}; Type selectResultType = axesType.getWithSizesAndDtype( selectSizes, axesType.getOptionalDtype()); auto sizes = axesType.getSizes(); Value zero = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0)); // Extract the value of each axes: for (int i = 0; i < sizes[0]; i++) { // Go through the axes list and get each dim in the list Value selectIndex = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), i)); Value extract = rewriter.create( binder.getLoc(), selectResultType, axes, zero, selectIndex); Value dim = rewriter.create( binder.getLoc(), rewriter.getType(), extract); axesList.push_back(dim); } } // Handle the noop case: if (axesList.empty() && noop_with_empty_axes) { rewriter.replaceOp(binder.op, data); return success(); } // Deal with case when no axes arg is passed but not a noop: if (axesList.empty()) { int64_t numDims = dyn_cast(data.getType()) .getSizes() .size(); for (int i = 0; i < numDims; i++) { Value curr = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), i)); axesList.push_back(curr); } } // Handle negative axis: Value rankVal = rewriter.create(binder.getLoc(), torchIntTy, data); Value zero = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getI64IntegerAttr(0)); for (Value &axes : axesList) { Value isNegative = rewriter.create(binder.getLoc(), axes, zero); isNegative = rewriter.create(binder.getLoc(), isNegative); Value finalOffset = rewriter.create( binder.getLoc(), isNegative, rankVal); axes = rewriter.create(binder.getLoc(), axes, finalOffset); } Value dimValueList = rewriter.create( binder.getLoc(), Torch::ListType::get(torchIntTy), axesList); Value keepDimBool = rewriter.create(binder.getLoc(), keepDims); rewriter.replaceOpWithNewOp( binder.op, resultType, data, dimValueList, keepDimBool); return success(); }); patterns.onOp( "Shape", 9, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value operand; int64_t start, end; if (binder.tensorOperand(operand) || binder.tensorResultType(resultType) || binder.s64IntegerAttr(start, "start", 0) || binder.s64IntegerAttr(end, "end", -1)) return failure(); auto inputType = dyn_cast(operand.getType()); int64_t inputRank = inputType.getSizes().size(); auto shapeType = Torch::ValueTensorType::get( binder.op->getContext(), SmallVector{inputRank}, resultType.getOptionalDtype()); Value shape = rewriter.create( binder.getLoc(), shapeType, operand); if (start == 0 && end == -1) { rewriter.replaceOp(binder.op, shape); return success(); } Value sv = rewriter.create( binder.getLoc(), rewriter.getI64IntegerAttr(start)); Value ev = rewriter.create( binder.getLoc(), rewriter.getI64IntegerAttr(end)); Value step = rewriter.create(binder.getLoc(), 1); Value dim = rewriter.create(binder.getLoc(), 0); shape = rewriter.create( binder.getLoc(), resultType, shape, dim, sv, ev, step); rewriter.replaceOp(binder.op, shape); return success(); }); patterns.onOp("Sinh", 9, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value operand; if (binder.tensorOperand(operand) || binder.tensorResultType(resultType)) return failure(); rewriter.replaceOpWithNewOp( binder.op, resultType, operand); return success(); }); // split with fixed-size parts // Arguments: // - input: the tensor to split // Attributes: // - axis: the axis along which to split the input // - num_outputs: the number of outputs to produce // Outputs: // - outputs: the produced outputs. Variadic with num_outputs elements. // Note: torch.aten gives a list of tensors, but ONNX gives a variadic list of // tensors // so we need to unpack the list patterns.onOp( "Split", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Value self; int64_t axis; int64_t numOutputs; if (binder.tensorOperand(self)) return rewriter.notifyMatchFailure( binder.op, "Not converting to AtenSplitTensorOp due to input " "tensor mismatch"); if (binder.s64IntegerAttr(axis, "axis", 0)) return rewriter.notifyMatchFailure(binder.op, "Failed to get axis attribute"); numOutputs = binder.op->getNumResults(); if (binder.s64IntegerAttr(numOutputs, "num_outputs", numOutputs)) return rewriter.notifyMatchFailure( binder.op, "Failed to get num_outputs attribute"); auto loc = binder.getLoc(); auto result0Ty = cast(binder.op->getResult(0).getType()); auto resultNTy = cast( binder.op->getResults().back().getType()); auto selfTy = cast(self.getType()); int64_t dim = axis; if (dim < 0) dim += selfTy.getSizes().size(); Value dimValue = rewriter.create( loc, rewriter.getType(), rewriter.getI64IntegerAttr(dim)); Value vNumOutputs = rewriter.create( loc, rewriter.getType(), rewriter.getI64IntegerAttr(numOutputs)); Value one = rewriter.create( loc, rewriter.getI64IntegerAttr(1)); Value zero = rewriter.create( loc, rewriter.getI64IntegerAttr(0)); Value vDimSize = rewriter.create( loc, rewriter.getType(), self, dimValue); Value addNumOutputs = rewriter.create(loc, vDimSize, vNumOutputs); Value subOne = rewriter.create(loc, addNumOutputs, one); Value splitSize = rewriter.create(loc, subOne, vNumOutputs); llvm::SmallVector outputs; Value step = one; Value start = zero; for (int i = 0; i < numOutputs - 1; ++i) { Value end = rewriter.create(loc, start, splitSize); Value slice = rewriter.create( loc, result0Ty, self, dimValue, start, end, step); start = end; outputs.push_back(slice); } Value end = vDimSize; Value lastSlice = rewriter.create( loc, resultNTy, self, dimValue, start, end, step); outputs.push_back(lastSlice); rewriter.replaceOp(binder.op, outputs); return success(); }); // split with variable parts // Arguments: // - input: the tensor to split // - split: the sizes of the splits to be produced // Attributes: // - axis: the axis along which to split the input // - num_outputs: the number of outputs to produce // Outputs: // - outputs: the produced outputs. Variadic with num_outputs elements. // Note: torch.aten gives a list of tensors, but ONNX gives a variadic list of // tensors // so we need to unpack the list patterns.onOp( "Split", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Value self; Value split; int64_t axis; int64_t num_outputs; if (binder.tensorOperandAtIndex(self, 0) || binder.tensorOperandAtIndex(split, 1)) return rewriter.notifyMatchFailure( binder.op, "Not converting to AtenSplitWithSizesOp due to input " "tensor mismatch"); if (binder.s64IntegerAttr(axis, "axis", 0)) return rewriter.notifyMatchFailure(binder.op, "Failed to get axis attribute"); if (binder.s64IntegerAttr(num_outputs, "num_outputs", 0)) return rewriter.notifyMatchFailure( binder.op, "Failed to get num_outputs attribute"); auto result0Ty = cast(binder.op->getResult(0).getType()); auto selfTy = cast(binder.op->getOperand(0).getType()); int64_t dim = axis; if (dim < 0) dim += selfTy.getSizes().size(); llvm::SmallVector intermediateShape(result0Ty.getSizes()); for (auto result : binder.op->getResultTypes()) { int64_t d = cast(result).getSizes()[dim]; intermediateShape[dim] = d == intermediateShape[dim] ? d : -1; } Torch::PrimTolistOp splitToList = rewriter.create( binder.getLoc(), Torch::ListType::get(rewriter.getType()), split); Value dimValue = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), dim)); // TODO: Attempting to use the shape expected by the ONNX mlir as ground // truth. For now just use dynamic shapes. auto resultOuterType = Torch::ListType::get(rewriter.getType( /*std::optional>=*/intermediateShape, result0Ty.getOptionalDtype())); Torch::AtenSplitWithSizesOp new_op = rewriter.create( binder.getLoc(), resultOuterType, self, splitToList.getResult(0), dimValue); // the onnx op is variadic with multiple results, but AtenSplitWithSizes // outputs a list so we need to unpack the list rewriter.replaceOpWithNewOp( binder.op, binder.op->getResults().getType(), new_op.getResult()); return success(); }); patterns.onOp("Tan", 7, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; Value operand; if (binder.tensorOperand(operand) || binder.tensorResultType(resultType)) return failure(); rewriter.replaceOpWithNewOp( binder.op, resultType, operand); return success(); }); patterns.onOp( "Transpose", 1, [](OpBinder binder, ConversionPatternRewriter &rewriter) { auto loc = binder.getLoc(); Torch::ValueTensorType resultType; Value operand; if (binder.tensorOperand(operand) || binder.tensorResultType(resultType)) return failure(); auto operandType = cast(operand.getType()); TensorType tensorType = operandType.toBuiltinTensor(); if (!tensorType || !tensorType.hasRank()) return failure(); // Default permutation is to reverse orders: int64_t rank = tensorType.getRank(); llvm::SmallVector reverse(rank); for (int64_t i = 0; i < rank; ++i) { reverse[i] = rank - i - 1; } llvm::SmallVector permutations; if (failed(binder.s64IntegerArrayAttr(permutations, "perm", reverse))) return rewriter.notifyMatchFailure(binder.op, "Failed to obtain permutations"); if (static_cast(permutations.size()) != rank) return rewriter.notifyMatchFailure( binder.op, "Permutation length does not match operand rank"); llvm::SmallVector shape(tensorType.getShape()); llvm::SmallVector current(rank); for (int64_t i = 0; i < rank; ++i) { current[i] = i; } for (auto &dim : permutations) dim = dim < 0 ? dim + rank : dim; // We need to override to the destination if known: if (resultType.hasSizes()) { for (int i = 0; i < rank; ++i) { shape[permutations[i]] = resultType.getSizes()[i]; } } // Convert dynamic shape dimension: for (unsigned i = 0; i < shape.size(); i++) { if (shape[i] == ShapedType::kDynamic) shape[i] = Torch::kUnknownSize; } for (int64_t i = 0; i < rank; ++i) { if (current[i] == permutations[i]) continue; int64_t target = i + 1; for (; target < rank; ++target) { if (current[target] == permutations[i]) break; } std::swap(shape[i], shape[target]); std::swap(current[i], current[target]); Value dim0 = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), i)); Value dim1 = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), target)); operand = rewriter.create( loc, Torch::ValueTensorType::get(tensorType.getContext(), shape, operandType.getOptionalDtype()), operand, dim0, dim1); } rewriter.replaceOp(binder.op, operand); return success(); }); patterns.onOp( "Slice", 13, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultTorchType; Value operand, starts, ends; // Handle if axes are not provided if (binder.tensorOperandAtIndex(operand, 0) || binder.tensorOperandAtIndex(starts, 1) || binder.tensorOperandAtIndex(ends, 2) || binder.tensorResultType(resultTorchType)) { return failure(); } auto context = rewriter.getContext(); auto operandTorchTy = cast(operand.getType()); auto operandTy = dyn_cast(operandTorchTy.toBuiltinTensor()); if (!operandTy) return rewriter.notifyMatchFailure( binder.op, "Expected tensor operator argument to be a ranked tensor type"); auto startsTorchTy = cast(starts.getType()); auto startsTy = dyn_cast(startsTorchTy.toBuiltinTensor()); int startSize = startsTy.getDimSize(0); auto endsTorchTy = cast(ends.getType()); auto endsTy = dyn_cast(endsTorchTy.toBuiltinTensor()); int endSize = endsTy.getDimSize(0); auto resultTy = dyn_cast(resultTorchType.toBuiltinTensor()); if (!resultTy) return rewriter.notifyMatchFailure( binder.op, "Expected result type to be a ranked tensor type"); Location loc = binder.getLoc(); // Binding `axes` from its arguments or through a default value Value axes; if (binder.getNumOperands() >= 4) { if (binder.tensorOperandAtIndex(axes, 3)) { return failure(); } } // Binding `steps` from its arguments or through a default value Value steps; if (binder.getNumOperands() >= 5) { if (binder.tensorOperandAtIndex(steps, 4)) { return failure(); } } else { // The default `steps` value is a 1d tensor filled with ones with a // size equal to the size of `starts` and `ends`. Value none = rewriter.create(loc); Value sizeStepInput = rewriter.create( loc, rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), startSize)); Value sizeStepsInput = rewriter.create