//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // Also available under a BSD-style license. See LICENSE. // //===----------------------------------------------------------------------===// #include "mlir/IR/BuiltinTypes.h" #include "torch-mlir/Conversion/TorchToLinalg/TorchToLinalg.h" #include "PopulatePatterns.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/IR/Matchers.h" #include "torch-mlir/Conversion/TorchToLinalg/Utils.h" #include "torch-mlir/Conversion/Utils/Utils.h" #include "torch-mlir/Dialect/Torch/IR/TorchOps.h" #include "torch-mlir/Dialect/Torch/Utils/TorchUpstream.h" #include "torch-mlir/Dialect/Torch/Utils/Utils.h" #include "llvm/ADT/APSInt.h" #include #include using namespace mlir; using namespace mlir::torch; using namespace mlir::torch::Torch; // Check if a ranked-tensor has the specified element type. template static bool hasElementType(Value tensor) { auto tensorType = cast(tensor.getType()); Type tensorElementType = tensorType.getElementType(); return isa(tensorElementType); } template static Value createComparisonTemplate(OpBuilder &b, Location loc, Type type, Value lhs, Value rhs) { if (isa(type)) return b.create(loc, fpred, lhs, rhs); if (IntegerType intType = dyn_cast(type)) { if (intType.isUnsigned()) return b.create(loc, iupred, lhs, rhs); if (intType.isSigned()) return b.create(loc, ispred, lhs, rhs); assert(intType.getWidth() == 1); return b.create(loc, iupred, lhs, rhs); } llvm_unreachable("Unhandled element type for comparison"); } static Value getZeroPoint(Value value) { if (auto make = value.getDefiningOp()) { return make.getZeroPoint(); } return nullptr; } static Value createGreaterThan(OpBuilder &b, Location loc, Type elementalType, Value lhs, Value rhs) { return createComparisonTemplate( b, loc, elementalType, lhs, rhs); } static Value createGreaterThanOrEqual(OpBuilder &b, Location loc, Type elementalType, Value lhs, Value rhs) { return createComparisonTemplate( b, loc, elementalType, lhs, rhs); } static Value createLessThan(OpBuilder &b, Location loc, Type elementalType, Value lhs, Value rhs) { return createComparisonTemplate( b, loc, elementalType, lhs, rhs); } static Value createLessThanOrEqual(OpBuilder &b, Location loc, Type elementalType, Value lhs, Value rhs) { return createComparisonTemplate( b, loc, elementalType, lhs, rhs); } static Value createEqual(OpBuilder &b, Location loc, Type elementalType, Value lhs, Value rhs) { return createComparisonTemplate( b, loc, elementalType, lhs, rhs); } static Value createNotEqual(OpBuilder &b, Location loc, Type elementalType, Value lhs, Value rhs) { return createComparisonTemplate( b, loc, elementalType, lhs, rhs); } static Value buildNormalCdf(OpBuilder &b, Location &loc, Value x, Value mean, Value sigma) { Type elementType = x.getType(); Value xMinusMean = b.create(loc, x, mean); Value two = b.create(loc, FloatAttr::get(elementType, 2)); Value sqrt2 = b.create(loc, two); Value erfArg = b.create(loc, xMinusMean, sqrt2); Value erf = b.create(loc, erfArg); Value one = b.create(loc, FloatAttr::get(elementType, 1)); Value erfPlus1 = b.create(loc, one, erf); Value oneHalf = b.create(loc, FloatAttr::get(elementType, 0.5)); Value normalCdf = b.create(loc, oneHalf, erfPlus1); return normalCdf; } static Value buildUnitNormalCdf(OpBuilder &b, Location &loc, Value x) { Type elementType = x.getType(); Value zero = b.create(loc, FloatAttr::get(elementType, 0)); Value one = b.create(loc, FloatAttr::get(elementType, 1)); return buildNormalCdf(b, loc, x, zero, one); } template static Value createFpOpWithDtype(OpBuilder &b, const TypeConverter *converter, Value payloadArg, Operation *op) { Type inTTy = cast(op->getOperand(0).getType()).getDtype(); Type outTTy = cast(op->getResult(0).getType()).getDtype(); Type outTy = cast(converter->convertType(op->getResult(0).getType())) .getElementType(); Type computeTy = outTy; if (isa(computeTy)) computeTy = b.getF32Type(); Location loc = op->getLoc(); Value arg = convertScalarToDtype(b, loc, payloadArg, computeTy, inTTy); auto newOp = b.create(loc, arg); return convertScalarToDtype(b, loc, newOp, outTy, std::nullopt, outTTy); } template struct is_any_same : std::disjunction...> {}; template static Value createCompareOp(OpBuilder &b, Location loc, OpTy op, Value lhs, Value rhs) { static_assert( is_any_same(), "unimplemented: op type not supported"); Type lhsDtype = lhs.getType(); Type rhsDtype = rhs.getType(); Type elementalType = cast(op.getSelf().getType()).getDtype(); if (lhsDtype.isIntOrFloat() && rhsDtype.isIntOrFloat()) { if (isa(lhsDtype) && isa(rhsDtype)) { rhs = convertScalarToDtype(b, loc, rhs, lhsDtype); elementalType = lhsDtype; } else if (isa(lhsDtype) && isa(rhsDtype)) { lhs = convertScalarToDtype(b, loc, lhs, rhsDtype); elementalType = rhsDtype; } else { // Both are either Integer or Float types, but the bit width might be // different. if (lhsDtype.getIntOrFloatBitWidth() > rhsDtype.getIntOrFloatBitWidth()) { rhs = convertScalarToDtype(b, loc, rhs, lhsDtype); } else { lhs = convertScalarToDtype(b, loc, lhs, rhsDtype); } } } else { op.emitError("unimplemented: type promotion from tensor to scalar."); return nullptr; } if constexpr (is_any_same()) { return createLessThan(b, loc, elementalType, lhs, rhs); } if constexpr (is_any_same()) { return createLessThanOrEqual(b, loc, elementalType, lhs, rhs); } if constexpr (is_any_same()) { return createGreaterThan(b, loc, elementalType, lhs, rhs); } if constexpr (is_any_same()) { return createGreaterThanOrEqual(b, loc, elementalType, lhs, rhs); } if constexpr (is_any_same()) { return createEqual(b, loc, elementalType, lhs, rhs); } if constexpr (is_any_same()) { return createNotEqual(b, loc, elementalType, lhs, rhs); } llvm_unreachable("unimplemented: op type not supported"); } template static LogicalResult createTriangularMatrix(OpBuilder &b, Location loc, ValueRange payloadArgs, Operation *op, ArrayRef operands, Value &result) { auto inputType = cast(operands[0].getType()); uint64_t inputRank = inputType.getRank(); // Use the indices of the two innermost dimensions. auto rowIndex = b.create(loc, inputRank - 2); Value rowIndexI64 = castIndexToInt64(b, loc, rowIndex); auto colIndex = b.create(loc, inputRank - 1); Value colIndexI64 = castIndexToInt64(b, loc, colIndex); // columnIndex >= rowIndex + diagonal? auto sum = b.create(loc, rowIndexI64, /*diagonal=*/operands[1]); auto pred = b.create(loc, predicate, colIndexI64, sum); Value scalar = payloadArgs[0]; Type elementType = inputType.getElementType(); Value zero = getConstant(b, loc, 0, elementType); result = b.create(loc, pred, scalar, zero); return success(); } template Value createDivModePayload(OpBuilder &b, Location loc, const TypeConverter *converter, ValueRange payloadArgs, OpT op, ArrayRef operands) { static_assert(std::is_same_v || std::is_same_v, "template type must be a tensor/scalar div mode"); typename OpT::Adaptor adaptor(operands); Type dtype = cast(converter->convertType(op.getType())) .getElementType(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value rhs = convertScalarToDtype( b, loc, std::is_same_v ? operands[1] : payloadArgs[1], dtype); Value quotient; if (isa(dtype)) { quotient = b.create(loc, lhs, rhs); } else if (dtype.isUnsignedInteger()) { quotient = b.create(loc, lhs, rhs); } else { assert(dtype.isInteger() && "dtype should be an integer (signless or signed)"); quotient = b.create(loc, lhs, rhs); } if (isa(op.getRoundingMode().getType())) return quotient; std::string roundingMode; if (!matchPattern(op.getRoundingMode(), m_TorchConstantStr(roundingMode))) { op.emitError("only support constant str rounding mode"); return nullptr; } assert((roundingMode == "trunc" || roundingMode == "floor") && "unsupported rounding mode"); if (roundingMode == "trunc") { // "trunc" - rounds the results of the division towards zero. Equivalent // to C-style integer division. if (!isa(dtype)) { // nothing to do for integers return quotient; } // float Value ceil = b.create(loc, quotient); Value floor = b.create(loc, quotient); Value cstZero = b.create(loc, b.getZeroAttr(dtype)); Value pred = b.create(loc, arith::CmpFPredicate::ULT, quotient, cstZero); return b.create(loc, pred, ceil, floor); } if (roundingMode == "floor") { // "floor" - rounds the results of the division down. Equivalent to // floor division in Python (the // operator) if (isa(dtype)) return b.create(loc, quotient); if (!dtype.isUnsignedInteger()) { Type defaultIntToFloatType = b.getF64Type(); lhs = convertScalarToDtype(b, loc, lhs, defaultIntToFloatType); rhs = convertScalarToDtype(b, loc, rhs, defaultIntToFloatType); quotient = b.create(loc, lhs, rhs); Value floor = b.create(loc, quotient); Value convert = convertScalarToDtype(b, loc, floor, dtype); return convert; } } return quotient; } template Value createRemainderPayload(OpBuilder &b, Location loc, const TypeConverter *converter, ValueRange payloadArgs, OpT op, ArrayRef operands) { static_assert( llvm::is_one_of(), "op must be a tensor/scalar remainder op"); typename OpT::Adaptor adaptor(operands); Type dtype = cast(converter->convertType(op.getType())) .getElementType(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value rhs = convertScalarToDtype( b, loc, std::is_same_v ? operands[1] : payloadArgs[1], dtype); // The remainder op we wish to create would look roughly like this: // rem = a % b // if rem != 0 AND (rem < 0 XOR b < 0) rem += b // This is how python calucates remainders for floats and longs: // https://github.com/python/cpython/blob/2afd1751dd9a35d4ec03b708e3e5cddd72c43f7e/Objects/floatobject.c#L645 // https://github.com/python/cpython/blob/2afd1751dd9a35d4ec03b708e3e5cddd72c43f7e/Objects/longobject.c#L3662 Value result; if (isa(dtype)) { Value remainder = b.create(loc, lhs, rhs); Value zero = b.create(loc, b.getZeroAttr(dtype)); Value remainderNotEqualToZero = b.create( loc, arith::CmpFPredicate::ONE, remainder, zero); Value otherLessThanZero = b.create(loc, arith::CmpFPredicate::OLT, rhs, zero); Value remainderLessThanZero = b.create( loc, arith::CmpFPredicate::OLT, remainder, zero); Value xorCondition = b.create(loc, otherLessThanZero, remainderLessThanZero); Value condition = b.create(loc, remainderNotEqualToZero, xorCondition); Value fixedRemainder = b.create(loc, remainder, rhs); result = b.create(loc, condition, fixedRemainder, remainder); } else { assert(dtype.isInteger() && "dtype should be a float or integer (signless or signed)"); Value remainder = b.create(loc, lhs, rhs); Value zero = b.create(loc, b.getZeroAttr(dtype)); Value remainderNotEqualToZero = b.create(loc, arith::CmpIPredicate::ne, remainder, zero); Value otherLessThanZero = b.create(loc, arith::CmpIPredicate::slt, rhs, zero); Value remainderLessThanZero = b.create( loc, arith::CmpIPredicate::slt, remainder, zero); Value xorCondition = b.create(loc, otherLessThanZero, remainderLessThanZero); Value condition = b.create(loc, remainderNotEqualToZero, xorCondition); Value fixedRemainder = b.create(loc, remainder, rhs); result = b.create(loc, condition, fixedRemainder, remainder); } return result; } static Value createLinalgPayloadCalculationForElementwiseOp( OpBuilder &b, Location loc, const TypeConverter *converter, ValueRange payloadArgs, Operation *op, ArrayRef operands) { if (isa(op)) return b.create(loc, payloadArgs[0]); if (isa(op)) return b.create(loc, payloadArgs[0]); if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (isa(op)) { return createFpOpWithDtype(b, converter, payloadArgs[0], op); } if (auto clone = dyn_cast(op)) { int64_t memoryFormat; if (!isa(clone.getMemoryFormat().getType()) && (!matchPattern(clone.getMemoryFormat(), m_TorchConstantInt(&memoryFormat)) || (memoryFormat != torch_upstream::MemoryFormat::Contiguous && memoryFormat != torch_upstream::MemoryFormat::ChannelsLast))) { clone.emitError("unimplemented: only contiguous and channels last memory " "format is supported"); return nullptr; } return payloadArgs[0]; } if (auto bitwiseAndTensor = dyn_cast(op)) { if (isa( cast(bitwiseAndTensor.getType()).getDtype())) { bitwiseAndTensor.emitError( "Bitwise_And does not support floating point dtype"); return nullptr; } Type dtype = cast( converter->convertType(bitwiseAndTensor.getType())) .getElementType(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype); return b.create(loc, lhs, rhs); } if (auto bitwiseAndScalar = dyn_cast(op)) { Type dtype = cast( converter->convertType(bitwiseAndScalar.getType())) .getElementType(); if (!isa(dtype)) { bitwiseAndScalar.emitError( "bitwise_and.Scalar does not support non-integer input dtype."); return nullptr; } Type resultElementType = cast(bitwiseAndScalar.getType()).getDtype(); Value self = convertScalarToDtype(b, loc, payloadArgs[0], dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/resultElementType); Value other = convertScalarToDtype(b, loc, operands[1], dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/resultElementType); return b.create(loc, self, other); } if (auto bitwiseOrTensor = dyn_cast(op)) { if (isa( cast(bitwiseOrTensor.getType()).getDtype())) { bitwiseOrTensor.emitError( "Bitwise_Or does not support floating point dtype"); return nullptr; } Type dtype = cast( converter->convertType(bitwiseOrTensor.getType())) .getElementType(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype); return b.create(loc, lhs, rhs); } if (auto bitwiseXorTensor = dyn_cast(op)) { if (isa( cast(bitwiseXorTensor.getType()).getDtype())) { bitwiseXorTensor.emitError( "Bitwise_Xor does not support floating point dtype"); return nullptr; } Type dtype = cast( converter->convertType(bitwiseXorTensor.getType())) .getElementType(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype); return b.create(loc, lhs, rhs); } if (auto bitwiseRightShiftTensor = dyn_cast(op)) { Type dtype = cast( converter->convertType(bitwiseRightShiftTensor.getType())) .getElementType(); if (!isa(dtype)) { bitwiseRightShiftTensor.emitError( "Bitwise_Right_Shift op does not support non-integer input dtype."); return nullptr; } Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype); return b.create(loc, lhs, rhs); } if (auto bitwiseLeftShiftTensor = dyn_cast(op)) { Type dtype = cast( converter->convertType(bitwiseLeftShiftTensor.getType())) .getElementType(); if (!isa(dtype)) { bitwiseLeftShiftTensor.emitError( "Bitwise_Left_Shift op does not support non-integer input dtype."); return nullptr; } Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype); return b.create(loc, lhs, rhs); } if (isa(op)) { MLIRContext *context = op->getContext(); Type floatDtype = mlir::FloatType::getF64(context); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], floatDtype); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], floatDtype); Value zero = b.create(loc, b.getFloatAttr(floatDtype, 0)); Value lhsTest = createNotEqual(b, loc, floatDtype, lhs, zero); Value rhsTest = createNotEqual(b, loc, floatDtype, rhs, zero); if (isa(op)) { return b.create(loc, lhsTest, rhsTest); } if (isa(op)) { return b.create(loc, lhsTest, rhsTest); } if (isa(op)) { return b.create(loc, lhsTest, rhsTest); } llvm_unreachable("Unknown op type"); } if (isa(op)) { MLIRContext *context = op->getContext(); Type floatDtype = mlir::FloatType::getF64(context); Value self = convertScalarToDtype(b, loc, payloadArgs[0], floatDtype); Value zero = b.create(loc, b.getFloatAttr(floatDtype, 0)); return createEqual(b, loc, floatDtype, self, zero); } if (isa(op)) { if (isa(payloadArgs[0].getType())) return b.create(loc, payloadArgs[0]); return b.create(loc, payloadArgs[0]); } if (isa(op)) { Value abs = b.create(loc, payloadArgs[0]); Value infinity = b.create( loc, b.getFloatAttr(abs.getType(), std::numeric_limits::infinity())); return createEqual(b, loc, abs.getType(), abs, infinity); } if (isa(op)) { Type inTTy = cast(op->getOperand(0).getType()).getDtype(); Type outTTy = cast(op->getResult(0).getType()).getDtype(); Type outTy = cast( converter->convertType(op->getResult(0).getType())) .getElementType(); Type computeTy = outTy; if (isa(computeTy)) computeTy = b.getF32Type(); Value arg = payloadArgs[0]; arg = convertScalarToDtype(b, loc, payloadArgs[0], computeTy, inTTy); auto negate = b.create(loc, arg); auto one = b.create(loc, FloatAttr::get(negate.getType(), 1)); auto exp = b.create(loc, negate); auto added = b.create(loc, exp, one); auto div = b.create(loc, one, added); return convertScalarToDtype(b, loc, div, outTy, std::nullopt, outTTy); } if (auto relu = dyn_cast(op)) { Value zeroPoint = getZeroPoint(relu.getSelf()); Value arg = payloadArgs[0]; auto intType = dyn_cast(arg.getType()); if (zeroPoint && !intType) { relu.emitError("unimplemented: non-integer quantized Relu."); return nullptr; } auto reluTorchType = cast(relu.getType()); bool isUnsigned = torch_to_linalg::isUnsignedTorchType(reluTorchType.getDtype()); if (zeroPoint) { int64_t zeroPointInt; int64_t width = intType.getWidth(); assert(width < 64); int64_t minForIntType = isUnsigned ? 0 : -(1 << (width - 1)); int64_t maxForIntType = isUnsigned ? (1 << (width + 1)) - 1 : (1 << (width - 1)) - 1; // check for constant zero point edge-cases: if (matchPattern(zeroPoint, m_TorchConstantInt(&zeroPointInt))) { if (zeroPointInt > maxForIntType) { // TODO: figure out how to handle this case: // current impl. quantizes output like input. // If zero point > maxForIntType, ordinary relu should return 0. // However, 0 isn't represented in such a quantization scheme. relu.emitError( "unimplemented: quantized relu for zero-point > max qint"); return nullptr; } if (zeroPointInt < minForIntType) return arg; } zeroPoint = converter->materializeTargetConversion( b, loc, converter->convertType(zeroPoint.getType()), zeroPoint); auto minForIntTypeValue = b.create( loc, b.getIntegerAttr(zeroPoint.getType(), minForIntType)); auto maxForIntTypeValue = b.create( loc, b.getIntegerAttr(zeroPoint.getType(), maxForIntType)); auto zpLtMax = b.create(loc, arith::CmpIPredicate::slt, zeroPoint, maxForIntTypeValue); b.create( loc, zpLtMax, b.getStringAttr("Invalid Quantization: quantized relu with " "zero-point > max qint")); auto zpLtMin = b.create(loc, arith::CmpIPredicate::slt, zeroPoint, minForIntTypeValue); zeroPoint = b.create(loc, zpLtMin, minForIntTypeValue, zeroPoint); zeroPoint = b.create(loc, arg.getType(), zeroPoint); } else { zeroPoint = b.create(loc, b.getZeroAttr(arg.getType())); } Value cmp; if (intType) { auto pred = isUnsigned ? arith::CmpIPredicate::ugt : arith::CmpIPredicate::sgt; cmp = b.create(loc, pred, arg, zeroPoint); } else { cmp = b.create(loc, arith::CmpFPredicate::UGT, arg, zeroPoint); } return b.create(loc, cmp, arg, zeroPoint); } if (auto round = dyn_cast(op)) { if (!isa( cast(round.getType()).getDtype())) { round.emitError("unimplemented: non-floating point dtype"); return nullptr; } return b.create(loc, payloadArgs[0]); } if (auto prelu = dyn_cast(op)) { if (!isa( cast(prelu.getType()).getDtype())) { prelu.emitError("unimplemented: non-floating point dtype"); return nullptr; } Type elementType = payloadArgs[0].getType(); Value constZero = b.create(loc, b.getZeroAttr(elementType)); Value pred = b.create(loc, arith::CmpFPredicate::UGT, payloadArgs[0], constZero); Value positivePart = b.create(loc, pred, payloadArgs[0], constZero); Value negativePart = b.create(loc, pred, constZero, payloadArgs[0]); Value scale = convertScalarToDtype(b, loc, payloadArgs[1], elementType); Value scaledNegativePart = b.create(loc, negativePart, scale); return b.create(loc, positivePart, scaledNegativePart); } if (auto gelu = dyn_cast(op)) { if (!isa( cast(gelu.getType()).getDtype())) { gelu.emitError("unimplemented: non-floating point dtype"); return nullptr; } // TODO: Take approximation into account. std::string approximate; if (!matchPattern(gelu.getApproximate(), m_TorchConstantStr(approximate))) { gelu.emitError( "unimplemented: expected approximate to be a constant str"); return nullptr; } if (approximate == "none") { Value multiplier = buildUnitNormalCdf(b, loc, payloadArgs[0]); return b.create(loc, payloadArgs[0], multiplier); } if (approximate == "tanh") { // GELU(x)=0.5∗x∗(1+Tanh((2/π)^1/2 * (x+0.044715∗x^3))) // Ref: https://pytorch.org/docs/stable/generated/torch.nn.GELU.html Value cstThree = b.create( loc, IntegerAttr::get(IntegerType::get(op->getContext(), 64), 3)); Value xCube = b.create(loc, payloadArgs[0], cstThree); Type elementType = payloadArgs[0].getType(); Value cstAlpha = b.create( loc, FloatAttr::get(elementType, 0.044715)); Value xCubeMulAlpha = b.create(loc, xCube, cstAlpha); Value xPlusXCubeMulAlpha = b.create(loc, payloadArgs[0], xCubeMulAlpha); Value cstBeta = b.create( loc, FloatAttr::get(elementType, 0.7977240352174656)); Value betaMulX = b.create(loc, cstBeta, xPlusXCubeMulAlpha); Value tanh = b.create(loc, betaMulX); Value cstOne = b.create(loc, FloatAttr::get(elementType, 1.0)); Value onePlusTanh = b.create(loc, cstOne, tanh); Value cstHalf = b.create(loc, FloatAttr::get(elementType, 0.5)); Value multiplier = b.create(loc, cstHalf, onePlusTanh); return b.create(loc, payloadArgs[0], multiplier); } gelu.emitError("unimplemented: approximate value should be none or tanh"); return nullptr; } if (auto geluBackward = dyn_cast(op)) { if (!isa( cast(geluBackward.getType()).getDtype())) { geluBackward.emitError("unimplemented: non-floating point dtype"); return nullptr; } // TODO: Take approximation into account. std::string approximate; if (!matchPattern(geluBackward.getApproximate(), m_TorchConstantStr(approximate)) || approximate != "none") return nullptr; Type elementType = payloadArgs[1].getType(); Value cstAlpha0 = b.create( loc, FloatAttr::get(elementType, 1.12837916709551257390)); Value cstAlpha1 = b.create( loc, FloatAttr::get(elementType, 0.70710678118654752440)); Value oneHalf = b.create(loc, FloatAttr::get(elementType, 0.5)); Value kAlpha = b.create(loc, cstAlpha0, cstAlpha1); Value kAlphaHalf = b.create(loc, kAlpha, oneHalf); Value negOneHalf = b.create(loc, FloatAttr::get(elementType, -0.5)); Value inputSquared = b.create(loc, payloadArgs[1], payloadArgs[1]); Value negHalfInputSquared = b.create(loc, inputSquared, negOneHalf); Value dinput = b.create(loc, negHalfInputSquared); Value cdf = buildUnitNormalCdf(b, loc, payloadArgs[1]); Value dinputInput = b.create(loc, dinput, payloadArgs[1]); Value dinputInputAlpha = b.create(loc, dinputInput, kAlphaHalf); Value cdfExt = b.create(loc, dinputInputAlpha, cdf); return b.create(loc, payloadArgs[0], cdfExt); } if (auto hardtanhBackward = dyn_cast(op)) { AtenHardtanhBackwardOp::Adaptor adaptor(operands); if (!isa( cast(hardtanhBackward.getType()).getDtype())) { hardtanhBackward.emitError("unimplemented: non-floating point dtype"); return nullptr; } Value gradOutput = payloadArgs[0]; Type elementType = gradOutput.getType(); Value self = convertScalarToDtype(b, loc, payloadArgs[1], elementType); Value constantZero = b.create(loc, FloatAttr::get(elementType, 0.0)); Value min = convertScalarToDtype(b, loc, adaptor.getMinVal(), elementType); Value max = convertScalarToDtype(b, loc, adaptor.getMaxVal(), elementType); Value lesser = b.create(loc, arith::CmpFPredicate::ULT, self, min); Value greater = b.create(loc, arith::CmpFPredicate::UGT, self, max); Value cmp = b.create(loc, lesser, greater); return b.create(loc, cmp, constantZero, gradOutput); } if (auto add = dyn_cast(op)) { AtenAddTensorOp::Adaptor adaptor(operands); Type resultElementType = cast(add.getType()).getDtype(); Type dtype = cast(converter->convertType(add.getType())) .getElementType(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/resultElementType); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/resultElementType); Value alpha = convertScalarToDtype(b, loc, adaptor.getAlpha(), dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/resultElementType); if (isa(dtype)) { Value scaled = b.create(loc, rhs, alpha); return b.create(loc, lhs, scaled); } else { Value scaled = b.create(loc, rhs, alpha); return b.create(loc, lhs, scaled); } } if (auto sub = dyn_cast(op)) { AtenSubTensorOp::Adaptor adaptor(operands); Type dtype = cast(converter->convertType(sub.getType())) .getElementType(); Type resultElementType = cast(sub.getType()).getDtype(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/resultElementType); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/resultElementType); Value alpha = convertScalarToDtype(b, loc, adaptor.getAlpha(), dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/resultElementType, /*originalScalar=*/sub.getAlpha()); if (isa(dtype)) { Value scaled = b.create(loc, rhs, alpha); return b.create(loc, lhs, scaled); } else { Value scaled = b.create(loc, rhs, alpha); return b.create(loc, lhs, scaled); } } if (auto subScalar = dyn_cast(op)) { Type dtype = cast(converter->convertType(subScalar.getType())) .getElementType(); Value self = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value other = convertScalarToDtype(b, loc, operands[1], dtype); Value alpha = convertScalarToDtype( b, loc, operands[2], dtype, /*srcOriginalDtype=*/operands[2].getType(), /*dstOriginalDtype=*/dtype); if (isa(dtype)) { Value mult = b.create(loc, other, alpha); return b.create(loc, self, mult); } else if (isa(dtype)) { Value mult = b.create(loc, other, alpha); return b.create(loc, self, mult); } subScalar.emitError("unimplemented: dtype other than float and integer " "types are not supported."); return nullptr; } if (auto addScalar = dyn_cast(op)) { Type dtype = cast(converter->convertType(addScalar.getType())) .getElementType(); Type resultElementType = cast(addScalar.getType()).getDtype(); Value self = convertScalarToDtype(b, loc, payloadArgs[0], dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/resultElementType); Value other = convertScalarToDtype(b, loc, operands[1], dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/resultElementType); Value alpha = convertScalarToDtype(b, loc, operands[2], dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/resultElementType); if (isa(dtype)) { Value mult = b.create(loc, other, alpha); return b.create(loc, self, mult); } else if (isa(dtype)) { Value mult = b.create(loc, other, alpha); return b.create(loc, self, mult); } addScalar.emitError("unimplemented: dtype other than float and integer " "types are not supported."); return nullptr; } if (auto mul = dyn_cast(op)) { AtenMulTensorOp::Adaptor adaptor(operands); Type dtype = cast(converter->convertType(mul.getType())) .getElementType(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype); if (isa(dtype)) { return b.create(loc, lhs, rhs); } else if (isa(dtype)) { return b.create(loc, lhs, rhs); } else { return b.create(loc, lhs, rhs); } } if (auto atan2 = dyn_cast(op)) { Type dtype = cast(converter->convertType(atan2.getType())) .getElementType(); if (!isa(dtype)) { atan2.emitError("Atan2 requires floating point result type"); return nullptr; } Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype); return b.create(loc, lhs, rhs); } if (auto ltTensor = dyn_cast(op)) { return createCompareOp(b, loc, ltTensor, payloadArgs[0], payloadArgs[1]); } if (auto leTensor = dyn_cast(op)) { return createCompareOp(b, loc, leTensor, payloadArgs[0], payloadArgs[1]); } if (auto gtTensor = dyn_cast(op)) { return createCompareOp(b, loc, gtTensor, payloadArgs[0], payloadArgs[1]); } if (auto geTensor = dyn_cast(op)) { return createCompareOp(b, loc, geTensor, payloadArgs[0], payloadArgs[1]); } if (auto eqTensor = dyn_cast(op)) { return createCompareOp(b, loc, eqTensor, payloadArgs[0], payloadArgs[1]); } if (auto neTensor = dyn_cast(op)) { return createCompareOp(b, loc, neTensor, payloadArgs[0], payloadArgs[1]); } if (auto div = dyn_cast(op)) { AtenDivTensorOp::Adaptor adaptor(operands); Type dtype = cast(converter->convertType(div.getType())) .getElementType(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype); if (isa(dtype)) return b.create(loc, lhs, rhs); else if (isa(dtype)) { if (dtype.isUnsignedInteger()) return b.create(loc, lhs, rhs); return b.create(loc, lhs, rhs); } div.emitError("unimplemented: non-floating point and non-integer dtype"); return nullptr; } if (auto divScalarMode = dyn_cast(op)) { return createDivModePayload(b, loc, converter, payloadArgs, divScalarMode, operands); } if (auto divTensorMode = dyn_cast(op)) { return createDivModePayload(b, loc, converter, payloadArgs, divTensorMode, operands); } if (auto pow = dyn_cast(op)) { Type dtype = cast(pow.getType()).getDtype(); if (!isa(dtype)) { pow.emitError("unimplemented: non-floating point dtype"); return nullptr; } Value selfPromoted = convertScalarToDtype(b, loc, operands[0], dtype); Value expPromoted = convertScalarToDtype(b, loc, payloadArgs[0], dtype); return b.create(loc, selfPromoted, expPromoted); } if (auto pow = dyn_cast(op)) { if (!isa( cast(pow.getType()).getDtype())) { pow.emitError("unimplemented: non-floating point dtype"); return nullptr; } Type dtype = cast(pow.getSelf().getType()).getDtype(); Value expPromoted = convertScalarToDtype(b, loc, operands[1], dtype); return b.create(loc, payloadArgs[0], expPromoted); } if (auto pow = dyn_cast(op)) { Type dtype = cast(converter->convertType(pow.getType())) .getElementType(); if (!isa(dtype)) { pow.emitError("unimplemented: non-floating point dtype"); return nullptr; } Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], dtype); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype); return b.create(loc, lhs, rhs); } if (auto imag = dyn_cast(op)) { Type dtype = cast(converter->convertType(imag.getType())) .getElementType(); if (!isa(dtype)) { imag.emitError("unimplemented: non-floating point dtype"); return nullptr; } Value imagVal = b.create(loc, payloadArgs[0]); return imagVal; } if (auto real = dyn_cast(op)) { Type dtype = cast(converter->convertType(real.getType())) .getElementType(); if (!isa(dtype)) { real.emitError("unimplemented: non-floating point dtype"); return nullptr; } Value realVal = b.create(loc, payloadArgs[0]); return realVal; } if (auto gtScalar = dyn_cast(op)) { return createCompareOp(b, loc, gtScalar, payloadArgs[0], operands[1]); } if (auto geScalar = dyn_cast(op)) { return createCompareOp(b, loc, geScalar, payloadArgs[0], operands[1]); } if (auto eqScalar = dyn_cast(op)) { return createCompareOp(b, loc, eqScalar, payloadArgs[0], operands[1]); } if (auto neScalar = dyn_cast(op)) { return createCompareOp(b, loc, neScalar, payloadArgs[0], operands[1]); } if (auto ltScalar = dyn_cast(op)) { return createCompareOp(b, loc, ltScalar, payloadArgs[0], operands[1]); } if (auto leScalar = dyn_cast(op)) { return createCompareOp(b, loc, leScalar, payloadArgs[0], operands[1]); } if (auto whereSelf = dyn_cast(op)) { Type dtype = cast(converter->convertType(whereSelf.getType())) .getElementType(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[1], dtype); Value rhs = convertScalarToDtype(b, loc, payloadArgs[2], dtype); return b.create(loc, payloadArgs[0], lhs, rhs); } if (auto lerp = dyn_cast(op)) { if (!isa( cast(lerp.getType()).getDtype())) { lerp.emitError("unimplemented: non-floating point dtype"); return nullptr; } AtenLerpTensorOp::Adaptor adaptor(payloadArgs); auto start = adaptor.getSelf(); auto end = adaptor.getEnd(); auto weight = adaptor.getWeight(); auto delta = b.create(loc, end, start); auto weightedDelta = b.create(loc, delta, weight); return b.create(loc, start, weightedDelta); } if (auto minimum = dyn_cast(op)) { Type dtype = cast(minimum.getType()).getDtype(); Type elemTy = cast(converter->convertType(minimum.getType())) .getElementType(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], elemTy); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], elemTy); Value pred = createLessThan(b, loc, dtype, lhs, rhs); return b.create(loc, pred, lhs, rhs); } if (auto maximum = dyn_cast(op)) { Type dtype = cast(maximum.getType()).getDtype(); Type elemTy = cast(converter->convertType(maximum.getType())) .getElementType(); Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], elemTy); Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], elemTy); Value pred = createGreaterThan(b, loc, dtype, lhs, rhs); return b.create(loc, pred, lhs, rhs); } if (auto clamp = dyn_cast(op)) { AtenClampOp::Adaptor adaptor(operands); auto min = adaptor.getMin(); auto max = adaptor.getMax(); if (isa(min.getType()) || isa(max.getType())) { clamp.emitError("unimplemented: runtime optional type"); return nullptr; } Type dtype = cast(converter->convertType(clamp.getType())) .getElementType(); if (!isa(dtype)) { clamp.emitError("unimplement type for clamp"); return nullptr; } Type dstOriginalDtype = cast(clamp.getType()).getDtype(); bool isUnsigned = isa(dstOriginalDtype); if (auto intTy = dyn_cast(dstOriginalDtype)) { isUnsigned = intTy.isUnsigned(); } auto cmpSelect = [&](Value input, Value clamp, bool getMax) -> Value { clamp = convertScalarToDtype(b, loc, clamp, dtype, /*srcOriginalDtype=*/std::nullopt, /*dstOriginalDtype=*/dstOriginalDtype); Value pred; if (isa(dtype)) { auto cmp = getMax ? arith::CmpFPredicate::UGT : arith::CmpFPredicate::ULT; pred = b.create(loc, cmp, input, clamp); } else if (isa(dtype)) { auto cmp = isUnsigned ? arith::CmpIPredicate::ult : arith::CmpIPredicate::slt; if (getMax) cmp = arith::invertPredicate(cmp); pred = b.create(loc, cmp, input, clamp); } return b.create(loc, pred, clamp, input); }; auto result = payloadArgs[0]; if (!isa(min.getType())) result = cmpSelect(result, min, /*getMax=*/false); if (!isa(max.getType())) result = cmpSelect(result, max, /*getMax=*/true); return result; } if (auto clampTensor = dyn_cast(op)) { AtenClampTensorOp::Adaptor adaptor(operands); auto min = adaptor.getMin(); auto max = adaptor.getMax(); if (isa(min.getType()) || isa(max.getType())) { clampTensor.emitError("unimplemented: runtime optional type"); return nullptr; } Type dtype = cast