mirror of https://github.com/llvm/torch-mlir
530 lines
23 KiB
C++
530 lines
23 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
// Also available under a BSD-style license. See LICENSE.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "torch-mlir/Conversion/TorchToLinalg/TorchToLinalg.h"
|
|
|
|
#include "PopulatePatterns.h"
|
|
#include "mlir/Dialect/Arith/IR/Arith.h"
|
|
#include "mlir/Dialect/Linalg/IR/Linalg.h"
|
|
#include "mlir/Dialect/SCF/IR/SCF.h"
|
|
#include "mlir/IR/Matchers.h"
|
|
#include "torch-mlir/Conversion/TorchToLinalg/Utils.h"
|
|
#include "torch-mlir/Conversion/Utils/Utils.h"
|
|
#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
|
|
#include "torch-mlir/Dialect/Torch/Utils/TorchUpstream.h"
|
|
#include "torch-mlir/Dialect/TorchConversion/IR/TorchConversionOps.h"
|
|
|
|
using namespace mlir;
|
|
using namespace mlir::torch;
|
|
using namespace mlir::torch::Torch;
|
|
|
|
namespace {
|
|
// TODO: Dropout should probably be handled in DecomposeComplexOps instead of
|
|
// here.
|
|
class ConvertAtenDropoutOp : public OpConversionPattern<AtenDropoutOp> {
|
|
public:
|
|
using OpConversionPattern::OpConversionPattern;
|
|
LogicalResult
|
|
matchAndRewrite(AtenDropoutOp op, OpAdaptor adaptor,
|
|
ConversionPatternRewriter &rewriter) const override {
|
|
if (failed(verifyLinalgCompatibleTypes(op, rewriter)))
|
|
return failure();
|
|
|
|
bool train;
|
|
if (!matchPattern(op.getTrain(), m_TorchConstantBool(&train)))
|
|
return rewriter.notifyMatchFailure(op,
|
|
"Expected train to be constant bool.");
|
|
|
|
if (train)
|
|
return failure();
|
|
auto resultType = cast<RankedTensorType>(
|
|
getTypeConverter()->convertType(op->getResult(0).getType()));
|
|
rewriter.replaceOpWithNewOp<tensor::CastOp>(op, resultType,
|
|
adaptor.getInput());
|
|
return success();
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
static Value toLinearIndex(OpBuilder &b, Location loc,
|
|
ArrayRef<Value> indicesIntValues,
|
|
ArrayRef<Value> shapeIntValues) {
|
|
assert(indicesIntValues.size() == shapeIntValues.size() &&
|
|
"Expected `indices` and `shape` to have the same size");
|
|
Value result =
|
|
b.create<arith::ConstantOp>(loc, b.getZeroAttr(b.getI64Type()));
|
|
for (auto [index, stride] : llvm::zip(indicesIntValues, shapeIntValues)) {
|
|
assert(isa<mlir::IntegerType>(index.getType()) &&
|
|
isa<mlir::IntegerType>(stride.getType()) &&
|
|
"Input arrays to `toLinearIndex` must only contain values of type "
|
|
"`mlir::IntegerType`");
|
|
Value mul = b.create<arith::MulIOp>(loc, result, stride);
|
|
result = b.create<arith::AddIOp>(loc, mul, index);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// Squares64 Algorithm for generating 64-bit random numbers.
|
|
// See: https://arxiv.org/abs/2004.06278
|
|
static Value randomUniformUInt(OpBuilder &b, Location loc, Value ctr,
|
|
Value key) {
|
|
auto mul = [&](Value lhs, Value rhs) -> Value {
|
|
return b.create<arith::MulIOp>(loc, lhs, rhs);
|
|
};
|
|
auto add = [&](Value lhs, Value rhs) -> Value {
|
|
return b.create<arith::AddIOp>(loc, lhs, rhs);
|
|
};
|
|
Value cst32 = b.create<arith::ConstantOp>(loc, b.getI64IntegerAttr(32));
|
|
auto shiftRight32 = [&](Value val) -> Value {
|
|
return b.create<arith::ShRUIOp>(loc, val, cst32);
|
|
};
|
|
auto swapLoHi = [&](Value val) -> Value {
|
|
Value leftShift = b.create<arith::ShLIOp>(loc, val, cst32);
|
|
Value rightShift = shiftRight32(val);
|
|
return b.create<arith::OrIOp>(loc, leftShift, rightShift);
|
|
};
|
|
auto bitwiseXOr = [&](Value lhs, Value rhs) -> Value {
|
|
return b.create<arith::XOrIOp>(loc, lhs, rhs);
|
|
};
|
|
|
|
Value t, x, y, z;
|
|
x = mul(ctr, key);
|
|
y = x;
|
|
z = add(y, key);
|
|
x = add(mul(x, x), y);
|
|
x = swapLoHi(x);
|
|
x = add(mul(x, x), z);
|
|
x = swapLoHi(x);
|
|
x = add(mul(x, x), y);
|
|
x = swapLoHi(x);
|
|
t = x = add(mul(x, x), z);
|
|
x = swapLoHi(x);
|
|
return bitwiseXOr(t, shiftRight32(add(mul(x, x), y)));
|
|
}
|
|
|
|
// generate uniform random Float64
|
|
static Value randomUniformF64(OpBuilder &b, Location loc, Value ctr, Value key,
|
|
Value min, Value max) {
|
|
Value randomVal = randomUniformUInt(b, loc, ctr, key);
|
|
// scale = (max - min) * const(F64, 5.4210108E-20)
|
|
// which is derived from rand(min,max) =
|
|
// rand()/(RAND_MAX/(max-min)) where RAND_MAX = 2^64 - 1
|
|
Value epsilon = b.create<arith::ConstantOp>(
|
|
loc, b.getFloatAttr(b.getF64Type(), 5.4210108E-20));
|
|
Value range = b.create<arith::SubFOp>(loc, max, min);
|
|
Value scale = b.create<arith::MulFOp>(loc, range, epsilon);
|
|
// res = cast(F64, tempN) * scale + min
|
|
Value updateFloat = b.create<arith::UIToFPOp>(loc, b.getF64Type(), randomVal);
|
|
Value updateScaled = b.create<arith::MulFOp>(loc, updateFloat, scale);
|
|
Value uniformSample = b.create<arith::AddFOp>(loc, updateScaled, min);
|
|
|
|
return uniformSample;
|
|
}
|
|
|
|
namespace {
|
|
class ConvertAtenUniformOp : public OpConversionPattern<AtenUniformOp> {
|
|
public:
|
|
using OpConversionPattern::OpConversionPattern;
|
|
LogicalResult
|
|
matchAndRewrite(AtenUniformOp op, OpAdaptor adaptor,
|
|
ConversionPatternRewriter &rewriter) const override {
|
|
if (failed(verifyLinalgCompatibleTypes(op, rewriter)))
|
|
return failure();
|
|
Location loc = op.getLoc();
|
|
Value self = adaptor.getSelf();
|
|
Value from = adaptor.getFrom();
|
|
Value to = adaptor.getTo();
|
|
Value generator = adaptor.getGenerator();
|
|
RankedTensorType resultType = cast<RankedTensorType>(self.getType());
|
|
Type elemTy = resultType.getElementType();
|
|
Type f64Ty = rewriter.getF64Type();
|
|
|
|
if (!isa<mlir::FloatType>(elemTy))
|
|
return rewriter.notifyMatchFailure(op, "This op only support float type");
|
|
|
|
if (!isa<Torch::NoneType>(generator.getType()))
|
|
return rewriter.notifyMatchFailure(
|
|
op, "The generator has to be None because only global default "
|
|
"generator is supported");
|
|
// Get key, min and max used by `linalg.generic` compute payload.
|
|
Value key = rewriter.create<TorchConversion::GetNextSeedOp>(loc);
|
|
Value min = convertScalarToDtype(rewriter, loc, from, f64Ty);
|
|
Value max = convertScalarToDtype(rewriter, loc, to, f64Ty);
|
|
|
|
// Construct the `linalg.generic` op.
|
|
auto resultRank = resultType.getRank();
|
|
SmallVector<AffineMap, 1> indexingMaps(
|
|
1, rewriter.getMultiDimIdentityMap(resultRank));
|
|
SmallVector<utils::IteratorType> iteratorTypes(
|
|
resultRank, utils::IteratorType::parallel);
|
|
SmallVector<Value> sizes = getTensorSizes(rewriter, loc, self);
|
|
SmallVector<Value> sizesIntValues =
|
|
castIndexVectorToInt64Vector(rewriter, loc, sizes);
|
|
Value initTensor =
|
|
rewriter.create<tensor::EmptyOp>(loc, getAsOpFoldResult(sizes), elemTy);
|
|
Value uniformRes =
|
|
rewriter
|
|
.create<linalg::GenericOp>(
|
|
loc, initTensor.getType(), /*inputs=*/ValueRange{},
|
|
/*outputs=*/initTensor, indexingMaps, iteratorTypes,
|
|
[&](OpBuilder &b, Location loc, ValueRange args) {
|
|
SmallVector<Value> indicesIntValues;
|
|
for (int i = 0; i < resultRank; i++) {
|
|
indicesIntValues.push_back(castIndexToInt64(
|
|
b, loc, b.create<linalg::IndexOp>(loc, i)));
|
|
}
|
|
|
|
Value linearIndex =
|
|
toLinearIndex(b, loc, indicesIntValues, sizesIntValues);
|
|
|
|
Value res =
|
|
randomUniformF64(b, loc, linearIndex, key, min, max);
|
|
Value truncRes = res;
|
|
if (isa<Float16Type, Float32Type>(elemTy))
|
|
truncRes = b.create<arith::TruncFOp>(loc, elemTy, res);
|
|
b.create<linalg::YieldOp>(loc, truncRes);
|
|
})
|
|
.getResult(0);
|
|
|
|
Type newResultType = getTypeConverter()->convertType(op.getType());
|
|
rewriter.replaceOpWithNewOp<tensor::CastOp>(op, newResultType, uniformRes);
|
|
return success();
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
namespace {
|
|
class ConvertAtenMultinomialOp : public OpConversionPattern<AtenMultinomialOp> {
|
|
public:
|
|
using OpConversionPattern::OpConversionPattern;
|
|
LogicalResult
|
|
matchAndRewrite(AtenMultinomialOp op, OpAdaptor adaptor,
|
|
ConversionPatternRewriter &rewriter) const override {
|
|
if (failed(verifyLinalgCompatibleTypes(op, rewriter)))
|
|
return failure();
|
|
Location loc = op.getLoc();
|
|
Value self = adaptor.getSelf();
|
|
Value numSamples = adaptor.getNumSamples();
|
|
Value generator = adaptor.getGenerator();
|
|
RankedTensorType selfType = cast<RankedTensorType>(self.getType());
|
|
Type elemTy = selfType.getElementType();
|
|
Type f64Ty = rewriter.getF64Type();
|
|
Type i64Ty = rewriter.getI64Type();
|
|
Type indexTy = rewriter.getIndexType();
|
|
int64_t inputRank = selfType.getRank();
|
|
bool bReplacement;
|
|
|
|
if (!isa<mlir::FloatType>(elemTy))
|
|
return rewriter.notifyMatchFailure(op, "This op only support float type");
|
|
|
|
if (!mlir::isa<Torch::NoneType>(generator.getType()))
|
|
return rewriter.notifyMatchFailure(
|
|
op, "The generator has to be None because only global default "
|
|
"generator is supported");
|
|
|
|
if (!matchPattern(op.getReplacement(), m_TorchConstantBool(&bReplacement)))
|
|
return rewriter.notifyMatchFailure(
|
|
op, "Unsupported: replacement must be a boolean value");
|
|
|
|
if (!bReplacement)
|
|
return rewriter.notifyMatchFailure(op,
|
|
"Unimplemented: replacement = False");
|
|
|
|
if (!mlir::isa<mlir::IntegerType>(numSamples.getType())) {
|
|
return rewriter.notifyMatchFailure(
|
|
op, "Unsupported: num_samples must be an integer value");
|
|
}
|
|
|
|
if (!(inputRank == 1 || inputRank == 2)) {
|
|
return rewriter.notifyMatchFailure(
|
|
op, "torch.multinomial accepts only rank 1 or 2 tensors as weights");
|
|
}
|
|
|
|
Value cstZero = rewriter.create<arith::ConstantOp>(
|
|
loc, i64Ty, rewriter.getI64IntegerAttr(0));
|
|
Value cstOne = rewriter.create<arith::ConstantOp>(
|
|
loc, i64Ty, rewriter.getI64IntegerAttr(1));
|
|
Value zeroIndex = rewriter.create<arith::ConstantIndexOp>(loc, 0);
|
|
Value oneIndex = rewriter.create<arith::ConstantIndexOp>(loc, 1);
|
|
Value numSamplesIndex =
|
|
rewriter.create<arith::IndexCastOp>(loc, indexTy, numSamples);
|
|
|
|
Value numDistributions;
|
|
Value numCategoriesIndex;
|
|
ValueRange resultShape;
|
|
if (inputRank == 1) {
|
|
numDistributions = cstOne;
|
|
numCategoriesIndex =
|
|
rewriter.create<tensor::DimOp>(loc, indexTy, self, zeroIndex);
|
|
resultShape = ValueRange{numSamplesIndex};
|
|
} else {
|
|
Value numDistIndex =
|
|
rewriter.create<tensor::DimOp>(loc, indexTy, self, zeroIndex);
|
|
numCategoriesIndex =
|
|
rewriter.create<tensor::DimOp>(loc, indexTy, self, oneIndex);
|
|
numDistributions =
|
|
rewriter.create<arith::IndexCastOp>(loc, i64Ty, numDistIndex);
|
|
resultShape = ValueRange{numDistIndex, numSamplesIndex};
|
|
}
|
|
|
|
Value numCategories =
|
|
rewriter.create<arith::IndexCastOp>(loc, i64Ty, numCategoriesIndex);
|
|
Value resultTensor = rewriter.create<tensor::EmptyOp>(
|
|
loc, getAsOpFoldResult(resultShape), i64Ty);
|
|
|
|
// sum weights for normalization
|
|
torch_to_linalg::ReductionOpInfo opInfo;
|
|
if (inputRank == 1)
|
|
opInfo = {false, self, {0}};
|
|
else
|
|
opInfo = {false, self, {1}};
|
|
|
|
Value initSum = rewriter.create<arith::ConstantOp>(
|
|
loc, f64Ty, rewriter.getF64FloatAttr(0.0));
|
|
int64_t srcWidth = cast<mlir::FloatType>(elemTy).getWidth();
|
|
if (srcWidth > 64)
|
|
op->emitWarning("Op bitwidth will be truncated from " +
|
|
std::to_string(srcWidth) + " bits to 64 bits.");
|
|
auto sumBody = [&](OpBuilder &b, Location loc, ValueRange payloadArgs) {
|
|
Value input = payloadArgs[0];
|
|
if (srcWidth < 64)
|
|
input = b.create<arith::ExtFOp>(loc, f64Ty, input);
|
|
if (srcWidth > 64)
|
|
input = b.create<arith::TruncFOp>(loc, f64Ty, input);
|
|
Value result = payloadArgs[1];
|
|
Value nextSum = b.create<arith::AddFOp>(loc, input, result);
|
|
b.create<linalg::YieldOp>(loc, nextSum);
|
|
};
|
|
Value sumWeights = torch_to_linalg::createReductionLinalgGeneric(
|
|
rewriter, loc, opInfo, initSum, sumBody);
|
|
|
|
// Get multinomial samples for each weight vector
|
|
auto multinomialComputation = [&](OpBuilder &b, Location loc, Value j,
|
|
ValueRange args) {
|
|
Value jIndex = b.create<arith::IndexCastOp>(loc, indexTy, j);
|
|
|
|
Value sum;
|
|
if (inputRank == 1) {
|
|
sum = b.create<tensor::ExtractOp>(loc, sumWeights, ValueRange{});
|
|
} else {
|
|
sum = b.create<tensor::ExtractOp>(loc, sumWeights, ValueRange{jIndex});
|
|
}
|
|
|
|
// compute cdf in loop
|
|
Value initCdf = b.create<tensor::EmptyOp>(
|
|
loc, getAsOpFoldResult(ValueRange{numCategoriesIndex}), f64Ty);
|
|
Value cdf =
|
|
b.create<scf::ForOp>(
|
|
loc, cstZero, numCategories, cstOne, ValueRange{initCdf},
|
|
[&](OpBuilder &b, Location loc, Value i, ValueRange vals) {
|
|
Value distribution = vals[0];
|
|
// if (i > 0)
|
|
auto comparisonPredicate = arith::CmpIPredicateAttr::get(
|
|
b.getContext(), arith::CmpIPredicate::sgt);
|
|
Value condition = b.create<arith::CmpIOp>(
|
|
loc, comparisonPredicate, i, cstZero);
|
|
Value iIndex = b.create<arith::IndexCastOp>(loc, indexTy, i);
|
|
// curr_cum = i > 0 ? prob[i] + prob[i-1] : prob[i]
|
|
ValueRange ind;
|
|
if (inputRank == 1) {
|
|
ind = ValueRange{iIndex};
|
|
} else {
|
|
ind = ValueRange{jIndex, iIndex};
|
|
}
|
|
Value currWeight = b.create<tensor::ExtractOp>(loc, self, ind);
|
|
if (srcWidth < 64)
|
|
currWeight = b.create<arith::ExtFOp>(loc, f64Ty, currWeight);
|
|
if (srcWidth > 64)
|
|
currWeight =
|
|
b.create<arith::TruncFOp>(loc, f64Ty, currWeight);
|
|
Value currMass = b.create<arith::DivFOp>(loc, currWeight, sum);
|
|
Value currCum =
|
|
b.create<scf::IfOp>(
|
|
loc, condition,
|
|
[&](OpBuilder &b, Location loc) {
|
|
Value prevI =
|
|
b.create<arith::SubIOp>(loc, i, cstOne);
|
|
Value prevIndex = b.create<arith::IndexCastOp>(
|
|
loc, indexTy, prevI);
|
|
Value prevMass = b.create<tensor::ExtractOp>(
|
|
loc, distribution, ValueRange{prevIndex});
|
|
Value currSum = b.create<arith::AddFOp>(
|
|
loc, currMass, prevMass);
|
|
b.create<scf::YieldOp>(loc, ValueRange(currSum));
|
|
},
|
|
[&](OpBuilder &b, Location loc) {
|
|
b.create<scf::YieldOp>(loc, ValueRange{currMass});
|
|
})
|
|
.getResult(0);
|
|
|
|
Value updatedCdf = b.create<tensor::InsertOp>(
|
|
loc, currCum, distribution, ValueRange(iIndex));
|
|
b.create<scf::YieldOp>(loc, ValueRange(updatedCdf));
|
|
})
|
|
.getResult(0);
|
|
|
|
/*
|
|
* Above we've computed the CDF for the unnormalized distribution given to
|
|
* us by the user. In order to actually sample from this distribution we
|
|
* do the following below: 1) Sample a random floating point value, r in
|
|
* [0,1), from a uniform distribution. 2) Perform a binary search in the
|
|
* cdf to find the first bin in the CDF where cdf[i] < r. This guarantees
|
|
* a random sample from the provided distribution with the appropriate
|
|
* probabilities.
|
|
*
|
|
* This logic is pulled straight from PyTorch's Multinomial Kernel:
|
|
* https://github.com/pytorch/pytorch/blob/e4623de4cf6097ff399aa9eb0cef44b44ca76da4/aten/src/ATen/native/cpu/MultinomialKernel.cpp#L23
|
|
* */
|
|
|
|
// Get key, min and max used by RNG.
|
|
Value key = b.create<TorchConversion::GetNextSeedOp>(loc);
|
|
Value min = b.create<arith::ConstantOp>(loc, f64Ty,
|
|
rewriter.getF64FloatAttr(0.0));
|
|
Value max = b.create<arith::ConstantOp>(loc, f64Ty,
|
|
rewriter.getF64FloatAttr(1.0));
|
|
|
|
// iterate and sample class indices
|
|
Value result = args[0];
|
|
Value finalResult =
|
|
rewriter
|
|
.create<scf::ForOp>(
|
|
loc, cstZero, numSamples, cstOne, ValueRange{result},
|
|
[&](OpBuilder &b, Location loc, Value i, ValueRange args) {
|
|
// Sample random float
|
|
Value uniformSample =
|
|
randomUniformF64(b, loc, i, key, min, max);
|
|
|
|
// binary search in cdf to find our sample
|
|
Value left = b.create<arith::ConstantOp>(
|
|
loc, i64Ty, b.getI64IntegerAttr(0));
|
|
Value right = numCategories;
|
|
|
|
auto checkCondition = [&](OpBuilder &b, Location loc,
|
|
ValueRange vals) {
|
|
Value left = vals[0];
|
|
Value right = vals[1];
|
|
|
|
// while (right > left)
|
|
auto comparisonPredicate = arith::CmpIPredicateAttr::get(
|
|
b.getContext(), arith::CmpIPredicate::sgt);
|
|
Value loopCondition = b.create<arith::CmpIOp>(
|
|
loc, comparisonPredicate, right, left);
|
|
b.create<scf::ConditionOp>(loc, loopCondition, vals);
|
|
};
|
|
|
|
ValueRange whileResults =
|
|
b.create<scf::WhileOp>(
|
|
loc, TypeRange{i64Ty, i64Ty},
|
|
ValueRange{left, right}, checkCondition,
|
|
[&](OpBuilder &b, Location loc, ValueRange vals) {
|
|
Value left = vals[0];
|
|
Value right = vals[1];
|
|
|
|
Value two = b.create<arith::ConstantOp>(
|
|
loc, i64Ty, b.getI64IntegerAttr(2));
|
|
Value diff =
|
|
b.create<arith::SubIOp>(loc, right, left);
|
|
Value diffMid =
|
|
b.create<arith::DivSIOp>(loc, diff, two);
|
|
Value midPointer =
|
|
b.create<arith::AddIOp>(loc, left, diffMid);
|
|
Type indexTy = b.getIndexType();
|
|
Value midIndex = b.create<arith::IndexCastOp>(
|
|
loc, indexTy, midPointer);
|
|
|
|
// branch and update search indices
|
|
auto thenBlock = [&](OpBuilder &b,
|
|
Location loc) {
|
|
// left = mid + 1
|
|
Value newLeft = b.create<arith::AddIOp>(
|
|
loc, midPointer, cstOne);
|
|
|
|
b.create<scf::YieldOp>(
|
|
loc, ValueRange{newLeft, right});
|
|
};
|
|
auto elseBlock = [&](OpBuilder &b,
|
|
Location loc) {
|
|
// right = mid
|
|
b.create<scf::YieldOp>(
|
|
loc, ValueRange{left, midPointer});
|
|
};
|
|
|
|
Value cumProb = b.create<tensor::ExtractOp>(
|
|
loc, cdf, ValueRange{midIndex});
|
|
auto cmpPredicate =
|
|
arith::CmpFPredicateAttr::get(
|
|
b.getContext(),
|
|
arith::CmpFPredicate::OLT);
|
|
Value branchCondition = b.create<arith::CmpFOp>(
|
|
loc, cmpPredicate, cumProb, uniformSample);
|
|
ValueRange branchResults =
|
|
b.create<scf::IfOp>(loc, branchCondition,
|
|
thenBlock, elseBlock)
|
|
.getResults();
|
|
Value newLeft = branchResults[0];
|
|
Value newRight = branchResults[1];
|
|
|
|
b.create<scf::YieldOp>(
|
|
loc, ValueRange{newLeft, newRight});
|
|
})
|
|
.getResults();
|
|
|
|
// sample_idx = left_pointer
|
|
Value samplePointer = whileResults[0];
|
|
Value iIndex =
|
|
b.create<arith::IndexCastOp>(loc, indexTy, i);
|
|
|
|
Value prevResult = args[0];
|
|
Value newResult;
|
|
if (inputRank == 1) {
|
|
// result[i] = sample_idx
|
|
newResult = b.create<tensor::InsertOp>(
|
|
loc, samplePointer, prevResult, ValueRange{iIndex});
|
|
} else {
|
|
// result[j][i] = sample_idx
|
|
newResult = b.create<tensor::InsertOp>(
|
|
loc, samplePointer, prevResult,
|
|
ValueRange{jIndex, iIndex});
|
|
}
|
|
|
|
b.create<scf::YieldOp>(loc, ValueRange{newResult});
|
|
})
|
|
.getResult(0);
|
|
|
|
b.create<scf::YieldOp>(loc, ValueRange{finalResult});
|
|
};
|
|
|
|
Value finalResultTensor =
|
|
rewriter
|
|
.create<scf::ForOp>(loc, cstZero, numDistributions, cstOne,
|
|
ValueRange{resultTensor},
|
|
multinomialComputation)
|
|
.getResult(0);
|
|
|
|
Type newResultType = getTypeConverter()->convertType(op.getType());
|
|
rewriter.replaceOpWithNewOp<tensor::CastOp>(op, newResultType,
|
|
finalResultTensor);
|
|
|
|
return success();
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
void mlir::torch::torch_to_linalg::populateRandomPatternsAndLegality(
|
|
TypeConverter &typeConverter, RewritePatternSet &patterns,
|
|
ConversionTarget &target) {
|
|
MLIRContext *context = patterns.getContext();
|
|
target.addIllegalOp<AtenDropoutOp>();
|
|
patterns.add<ConvertAtenDropoutOp>(typeConverter, context);
|
|
target.addIllegalOp<AtenUniformOp>();
|
|
patterns.add<ConvertAtenUniformOp>(typeConverter, context);
|
|
target.addIllegalOp<AtenMultinomialOp>();
|
|
patterns.add<ConvertAtenMultinomialOp>(typeConverter, context);
|
|
}
|