mirror of https://github.com/llvm/torch-mlir
485 lines
20 KiB
C++
485 lines
20 KiB
C++
//===- ReduceOpVariants.cpp --------------------------------------*- C++-*-===//
|
|
//
|
|
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
// Also available under a BSD-style license. See LICENSE.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "PassDetail.h"
|
|
|
|
#include "ReifyAbstractInterpCalculationsUtils.h"
|
|
#include "mlir/Transforms/DialectConversion.h"
|
|
#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
|
|
#include "torch-mlir/Dialect/Torch/Transforms/Passes.h"
|
|
#include "llvm/ADT/StringExtras.h"
|
|
|
|
using namespace mlir;
|
|
using namespace mlir::torch;
|
|
using namespace mlir::torch::Torch;
|
|
|
|
// Create an overwrite in a manner that preserves the
|
|
// `OverwriteTensorContentsOp` invariant that both arguments
|
|
// must have the same shape and dtype.
|
|
static void createOverwriteTensorContents(PatternRewriter &rewriter,
|
|
Location loc, Value overwriterTensor,
|
|
Value overwrittenTensor) {
|
|
Type overwriterTensorType = overwriterTensor.getType();
|
|
Type overwrittenTensorType =
|
|
dyn_cast<NonValueTensorType>(overwrittenTensor.getType())
|
|
.getWithValueSemantics();
|
|
if (overwriterTensorType != overwrittenTensorType) {
|
|
overwriterTensor = rewriter.create<TensorStaticInfoCastOp>(
|
|
loc, overwrittenTensorType, overwriterTensor);
|
|
}
|
|
rewriter.create<OverwriteTensorContentsOp>(loc, overwriterTensor,
|
|
overwrittenTensor);
|
|
}
|
|
|
|
static Type getContainerOrTensorTypeWithValueSemantics(Type type) {
|
|
if (auto optionalType = dyn_cast<OptionalType>(type)) {
|
|
Type newContainedType = getContainerOrTensorTypeWithValueSemantics(
|
|
optionalType.getContainedType());
|
|
return OptionalType::get(newContainedType);
|
|
} else if (auto listType = dyn_cast<ListType>(type)) {
|
|
Type newContainedType =
|
|
getContainerOrTensorTypeWithValueSemantics(listType.getContainedType());
|
|
return ListType::get(newContainedType);
|
|
} else if (auto tensorType = dyn_cast<NonValueTensorType>(type)) {
|
|
return tensorType.getWithValueSemantics();
|
|
} else {
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
operatorOpHasValueSemantics(OperatorOp opOp,
|
|
std::optional<SymbolTable> extraLibrary) {
|
|
if (!extraLibrary.has_value())
|
|
return false;
|
|
auto opName = cast<StringAttr>(opOp->getAttr("name")).getValue();
|
|
std::string libFuncName = (mlir::torch::Torch::getLibraryFunctionPrefix(
|
|
LibraryFunctionKind::HasValueSemantics) +
|
|
Twine(opName))
|
|
.str();
|
|
auto libFunc = extraLibrary->lookup<func::FuncOp>(libFuncName);
|
|
return bool(libFunc);
|
|
}
|
|
|
|
namespace {
|
|
// Convert value semantic ops operating on mutable arrays to instead operate on
|
|
// immutable tensors.
|
|
class ConvertHasValueSemanticsOpsToValueTensors : public RewritePattern {
|
|
public:
|
|
ConvertHasValueSemanticsOpsToValueTensors(
|
|
MLIRContext *context, const std::optional<SymbolTable> &extraLibrary)
|
|
: RewritePattern(MatchAnyOpTypeTag(), /*benefit=*/1, context) {
|
|
this->extraLibrary = extraLibrary;
|
|
}
|
|
LogicalResult matchAndRewrite(Operation *op,
|
|
PatternRewriter &rewriter) const override {
|
|
if (isa<OperatorOp>(op)) {
|
|
if (!operatorOpHasValueSemantics(cast<OperatorOp>(op), extraLibrary)) {
|
|
return rewriter.notifyMatchFailure(op, "does not have value semantics");
|
|
}
|
|
} else if (!op->hasTrait<Torch::OpTrait::HasValueSemantics>()) {
|
|
return rewriter.notifyMatchFailure(op, "does not have value semantics");
|
|
}
|
|
|
|
rewriter.startOpModification(op);
|
|
// Convert all operands.
|
|
SmallVector<Value> newOperands;
|
|
for (OpOperand &opOperand : op->getOpOperands()) {
|
|
Type operandType = opOperand.get().getType();
|
|
if (isa<NonValueTensorType>(operandType)) {
|
|
opOperand.set(rewriter.create<CopyToValueTensorOp>(op->getLoc(),
|
|
opOperand.get()));
|
|
} else if (auto listType = dyn_cast<ListType>(operandType)) {
|
|
if (!(isa<NonValueTensorType>(listType.getContainedType()) ||
|
|
isa<OptionalType>(listType.getContainedType())))
|
|
continue;
|
|
|
|
// Construct a new list whose elements are value tensors copied from
|
|
// the non-value tensors of the original list.
|
|
auto listConstruct =
|
|
opOperand.get().getDefiningOp<PrimListConstructOp>();
|
|
if (!listConstruct) {
|
|
rewriter.cancelOpModification(op);
|
|
return rewriter.notifyMatchFailure(
|
|
op, "unimplemented: list of non vtensor type not constructed "
|
|
"from list construct");
|
|
}
|
|
|
|
if (listConstruct.getElements().empty())
|
|
continue;
|
|
|
|
// TODO: Handle optional type in list type.
|
|
if (auto optionalType =
|
|
dyn_cast<OptionalType>(listType.getContainedType())) {
|
|
if (!llvm::all_of(listConstruct.getElements(), [](Value val) {
|
|
return isa<NonValueTensorType, Torch::NoneType>(val.getType());
|
|
})) {
|
|
rewriter.cancelOpModification(op);
|
|
return rewriter.notifyMatchFailure(
|
|
op, "unimplemented: list containing optional type is not "
|
|
"handled.");
|
|
}
|
|
}
|
|
|
|
auto newListElements = llvm::to_vector(llvm::map_range(
|
|
listConstruct.getElements(), [&](Value tensor) -> Value {
|
|
if (isa<NonValueTensorType>(tensor.getType())) {
|
|
return rewriter.create<CopyToValueTensorOp>(op->getLoc(),
|
|
tensor);
|
|
}
|
|
return tensor;
|
|
}));
|
|
|
|
Type newListType = getContainerOrTensorTypeWithValueSemantics(listType);
|
|
if (!newListType) {
|
|
rewriter.cancelOpModification(op);
|
|
return rewriter.notifyMatchFailure(
|
|
op, "Unable to convert list type to value semantics.");
|
|
}
|
|
opOperand.set(rewriter.create<PrimListConstructOp>(
|
|
op->getLoc(), newListType, newListElements));
|
|
} else if (auto optionalType = dyn_cast<OptionalType>(operandType)) {
|
|
// TODO: A more general way to handle the optional type is to
|
|
// introduce a `copy.to_optional_vtensor` op.
|
|
if (!isa<NonValueTensorType>(optionalType.getContainedType()))
|
|
continue;
|
|
|
|
// Create a new optional value whose input is a value tensor copied
|
|
// from the non value tensor of the original optional value.
|
|
auto derefine = opOperand.get().getDefiningOp<DerefineOp>();
|
|
if (!derefine) {
|
|
rewriter.cancelOpModification(op);
|
|
return rewriter.notifyMatchFailure(
|
|
op, "unimplemented: optional of non vtensor type not from "
|
|
"derefine");
|
|
}
|
|
|
|
if (!isa<NonValueTensorType>(derefine.getOperand().getType()))
|
|
continue;
|
|
auto newOperand = rewriter.create<CopyToValueTensorOp>(
|
|
op->getLoc(), derefine.getOperand());
|
|
opOperand.set(rewriter.create<DerefineOp>(
|
|
op->getLoc(), Torch::OptionalType::get(newOperand.getType()),
|
|
newOperand));
|
|
}
|
|
}
|
|
// Convert all results.
|
|
rewriter.setInsertionPointAfter(op);
|
|
for (Value result : op->getResults()) {
|
|
auto tensorType = dyn_cast<NonValueTensorType>(result.getType());
|
|
if (!tensorType)
|
|
continue;
|
|
result.setType(tensorType.getWithValueSemantics());
|
|
auto nonValueTensor =
|
|
rewriter.create<CopyToNonValueTensorOp>(op->getLoc(), result);
|
|
result.replaceAllUsesExcept(nonValueTensor, nonValueTensor);
|
|
}
|
|
rewriter.finalizeOpModification(op);
|
|
return success();
|
|
}
|
|
|
|
private:
|
|
std::optional<SymbolTable> extraLibrary;
|
|
};
|
|
} // namespace
|
|
|
|
namespace {
|
|
|
|
class TorchMatchSpecializedBackendOp
|
|
: public OpConversionPattern<Torch::OperatorOp> {
|
|
public:
|
|
using OpConversionPattern::OpConversionPattern;
|
|
|
|
using HandlerFn = LogicalResult (*)(OperatorOp op,
|
|
ConversionPatternRewriter &rewriter);
|
|
|
|
LogicalResult
|
|
matchAndRewrite(Torch::OperatorOp op, OpAdaptor adaptor,
|
|
ConversionPatternRewriter &rewriter) const override {
|
|
|
|
if (namedHandlers.contains(op.getNameAttr())) {
|
|
return namedHandlers.lookup(op.getNameAttr()).front()(op, rewriter);
|
|
}
|
|
|
|
return failure();
|
|
}
|
|
|
|
static void
|
|
populateSpecializedConversions(TorchMatchSpecializedBackendOp &matcher);
|
|
|
|
static std::unique_ptr<TorchMatchSpecializedBackendOp>
|
|
getPopulatedMatcher(MLIRContext *context) {
|
|
auto matcher = std::make_unique<TorchMatchSpecializedBackendOp>(context);
|
|
populateSpecializedConversions(*matcher);
|
|
return matcher;
|
|
};
|
|
|
|
void populate(StringRef name, HandlerFn fn) {
|
|
namedHandlers[StringAttr::get(getContext(), name)].push_back(fn);
|
|
}
|
|
|
|
void populateLegalizedNames(llvm::DenseSet<StringAttr> &set) {
|
|
for (auto handle : namedHandlers) {
|
|
set.insert(handle.first);
|
|
}
|
|
}
|
|
|
|
private:
|
|
DenseMap<StringAttr, SmallVector<HandlerFn, 1>> namedHandlers;
|
|
};
|
|
|
|
void TorchMatchSpecializedBackendOp::populateSpecializedConversions(
|
|
TorchMatchSpecializedBackendOp &matcher) {
|
|
matcher.populate(
|
|
"torch.aten._scaled_dot_product_flash_attention_for_cpu",
|
|
[](Torch::OperatorOp op,
|
|
ConversionPatternRewriter &rewriter) -> LogicalResult {
|
|
auto uses = op.getResult(1).getUses();
|
|
if (uses.end() == uses.begin()) {
|
|
auto oldOperands = op->getOperands();
|
|
llvm::SmallVector<Value> newOperands{
|
|
oldOperands[0], oldOperands[1], oldOperands[2], oldOperands[5],
|
|
oldOperands[3], oldOperands[4], oldOperands[6]};
|
|
Value enableGQA =
|
|
rewriter.create<ConstantBoolOp>(op->getLoc(), false);
|
|
newOperands.push_back(enableGQA);
|
|
|
|
auto newOp = rewriter.create<Torch::AtenScaledDotProductAttentionOp>(
|
|
op.getLoc(), op->getResultTypes()[0], newOperands,
|
|
op->getAttrs());
|
|
rewriter.replaceAllUsesWith(op.getResult(0), newOp.getResult());
|
|
rewriter.eraseOp(op);
|
|
return success();
|
|
}
|
|
return failure();
|
|
});
|
|
}
|
|
|
|
bool isSpecializedOperation(Torch::OperatorOp op) { return true; }
|
|
} // namespace
|
|
|
|
// Reduce Ops without value semantics but the corresponding without trailing
|
|
// underscore variant doesn't exist.
|
|
namespace {
|
|
|
|
// int(ceil((end - start) / step))
|
|
Value calculateArangeResultNumElements(PatternRewriter &rewriter, Location loc,
|
|
Value start, Value end, Value step) {
|
|
Value sub = rewriter.create<AtenSubOp>(
|
|
loc, Torch::NumberType::get(rewriter.getContext()), end, start);
|
|
Value div = rewriter.create<AtenDivOp>(loc, sub, step);
|
|
return rewriter.create<AtenCeilFloatOp>(loc, div);
|
|
}
|
|
|
|
class ReduceNonValueSemanticOps : public RewritePattern {
|
|
public:
|
|
ReduceNonValueSemanticOps(MLIRContext *context)
|
|
: RewritePattern(MatchAnyOpTypeTag(), /*benefit=*/1, context) {}
|
|
LogicalResult matchAndRewrite(Operation *op,
|
|
PatternRewriter &rewriter) const override {
|
|
Location loc = op->getLoc();
|
|
MLIRContext *ctx = op->getContext();
|
|
if (isa<AtenBernoulli_FloatOp>(op)) {
|
|
Operation *newOp = rewriter.create<ValsemVariantAtenBernoulliFloatOp>(
|
|
loc, op->getResultTypes(), op->getOperands());
|
|
auto tensor =
|
|
rewriter.create<CopyToValueTensorOp>(loc, newOp->getResult(0));
|
|
createOverwriteTensorContents(rewriter, loc, tensor, op->getOperand(0));
|
|
rewriter.replaceOp(op, op->getOperand(0));
|
|
return success();
|
|
} else if (auto arangeOutOp = dyn_cast<AtenArangeStartOutOp>(op)) {
|
|
Value start = arangeOutOp.getStart();
|
|
Value end = arangeOutOp.getEnd();
|
|
Value step = arangeOutOp.getStep();
|
|
Value out = arangeOutOp.getOut();
|
|
|
|
// `overwrite.tensor.contents` cannot change the tensor shape,
|
|
// so `out` tensor should have same num_elements with result tensor.
|
|
// It means that we don't support code like:
|
|
// `x = torch.randn(12)`
|
|
// `y = torch.arange(13, out=x)`
|
|
Value resultNumElements =
|
|
calculateArangeResultNumElements(rewriter, loc, start, end, step);
|
|
Value outNumElements = rewriter.create<AtenNumelOp>(loc, out);
|
|
Value eqOrNot =
|
|
rewriter.create<AtenEqIntOp>(loc, resultNumElements, outNumElements);
|
|
rewriter.create<RuntimeAssertOp>(
|
|
loc, eqOrNot,
|
|
rewriter.getStringAttr("`out` tensor should have the same "
|
|
"num_elements with result tenosr"));
|
|
|
|
auto dtype = rewriter.create<PrimDtypeOp>(loc, out);
|
|
auto device = rewriter.create<PrimDeviceOp>(loc, out);
|
|
auto shape = rewriter.create<AtenSizeOp>(
|
|
loc, Torch::ListType::get(Torch::IntType::get(ctx)), out);
|
|
auto none = rewriter.create<ConstantNoneOp>(loc);
|
|
Value newArange = rewriter.create<AtenArangeStartStepOp>(
|
|
loc, arangeOutOp.getResult().getType(), start, end, step, dtype,
|
|
/*layout=*/none, device, /*pin_memory=*/none);
|
|
Value reshape = rewriter.create<AtenReshapeOp>(
|
|
loc, arangeOutOp.getResult().getType(), newArange, shape);
|
|
|
|
auto vtensor = rewriter.create<CopyToValueTensorOp>(loc, reshape);
|
|
createOverwriteTensorContents(rewriter, loc, vtensor, out);
|
|
rewriter.replaceOp(arangeOutOp, out);
|
|
return success();
|
|
} else {
|
|
return failure();
|
|
}
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
namespace {
|
|
// Reduce the "trailing underscore inplace variant" to the value semantic
|
|
// variant + an overwrite of the original "self" argument.
|
|
class ReduceTrailingUnderscoreInplaceVariant : public RewritePattern {
|
|
public:
|
|
ReduceTrailingUnderscoreInplaceVariant(MLIRContext *context)
|
|
: RewritePattern(MatchAnyOpTypeTag(), /*benefit=*/1, context) {}
|
|
LogicalResult matchAndRewrite(Operation *op,
|
|
PatternRewriter &rewriter) const override {
|
|
if (!op->hasTrait<Torch::OpTrait::IsTrailingUnderscoreInplaceVariant>())
|
|
return rewriter.notifyMatchFailure(op, "is not trailing_ variant");
|
|
|
|
SmallVector<StringRef> fragments;
|
|
llvm::SplitString(op->getName().getStringRef(), fragments, ".");
|
|
assert(fragments.size() >= 3 && fragments[2].ends_with("_") &&
|
|
"IsTrailingUnderscoreInplaceVariant incorrectly applied");
|
|
fragments[2] = fragments[2].drop_back();
|
|
std::string noUnderscoreName = llvm::join(fragments, ".");
|
|
|
|
OperationState state(op->getLoc(), noUnderscoreName);
|
|
state.addTypes(op->getResultTypes());
|
|
state.addOperands(op->getOperands());
|
|
state.addAttributes(op->getAttrDictionary().getValue());
|
|
// Note: No successors or regions. Torch JIT operators don't have any.
|
|
assert(op->getNumRegions() == 0 && op->getNumSuccessors() == 0 &&
|
|
"Torch JIT operators shouldn't have regions or successors");
|
|
|
|
Operation *newOp = rewriter.create(state);
|
|
// Note: need to convert result to first input's dtype because mix precision
|
|
// compute would result in different behaviors.
|
|
// For example:
|
|
// a = torch.randn(3, 3).half() # float16
|
|
// b = torch.randn(3, 3) # float32
|
|
// a += b # i.e. torch.ops.aten.add_(a, b), result is float16
|
|
// c = a + b # i.e. torch.ops.aten.add(a, b), result is float32
|
|
Value none = rewriter.create<ConstantNoneOp>(op->getLoc());
|
|
Value cstFalse = rewriter.create<ConstantBoolOp>(op->getLoc(), false);
|
|
auto aDtype = rewriter.create<PrimDtypeOp>(op->getLoc(), op->getOperand(0));
|
|
auto toDtype = rewriter.create<AtenToDtypeOp>(
|
|
op->getLoc(), newOp->getResult(0).getType(), newOp->getResult(0),
|
|
aDtype, /*non_blocking=*/cstFalse, /*copy=*/cstFalse,
|
|
/*memory_format=*/none);
|
|
auto tensor = rewriter.create<CopyToValueTensorOp>(op->getLoc(), toDtype);
|
|
createOverwriteTensorContents(rewriter, op->getLoc(), tensor,
|
|
op->getOperand(0));
|
|
rewriter.replaceOp(op, op->getOperand(0));
|
|
|
|
return success();
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
static LogicalResult
|
|
reduceNonValueTensorLiteralOpToValueTensorLiteralOp(NonValueTensorLiteralOp op,
|
|
PatternRewriter &rewriter) {
|
|
Value valueTensor =
|
|
rewriter.create<ValueTensorLiteralOp>(op->getLoc(), op.getValue());
|
|
Value tensor =
|
|
copyTensorToType(rewriter, op->getLoc(), op.getType(), valueTensor);
|
|
rewriter.replaceOp(op, {tensor});
|
|
return success();
|
|
}
|
|
|
|
namespace {
|
|
struct ReduceOpVariantsPass
|
|
: public ReduceOpVariantsBase<ReduceOpVariantsPass> {
|
|
ReduceOpVariantsPass() = default;
|
|
ReduceOpVariantsPass(StringRef extraLibrary) {
|
|
this->extraLibrary = extraLibrary.str();
|
|
}
|
|
void runOnOperation() override {
|
|
MLIRContext *context = &getContext();
|
|
RewritePatternSet patterns(context);
|
|
OwningOpRef<ModuleOp> extraLibraryModule =
|
|
ModuleOp::create(UnknownLoc::get(context));
|
|
std::optional<SymbolTable> extraLibraryModuleSymTable = std::nullopt;
|
|
if (!extraLibrary.empty()) {
|
|
if (failed(loadExtraLibrary(extraLibrary, extraLibraryModule))) {
|
|
emitError(getOperation()->getLoc(),
|
|
"Failed to load extra-library file at " + extraLibrary);
|
|
return signalPassFailure();
|
|
}
|
|
|
|
extraLibraryModuleSymTable =
|
|
SymbolTable(extraLibraryModule->getOperation());
|
|
}
|
|
patterns.add<ConvertHasValueSemanticsOpsToValueTensors>(
|
|
context, extraLibraryModuleSymTable);
|
|
patterns.add<ReduceTrailingUnderscoreInplaceVariant>(context);
|
|
patterns.add(reduceNonValueTensorLiteralOpToValueTensorLiteralOp);
|
|
patterns.add<ReduceNonValueSemanticOps>(context);
|
|
|
|
// Create specialized matcher:
|
|
auto specialized =
|
|
TorchMatchSpecializedBackendOp::getPopulatedMatcher(context);
|
|
DenseSet<StringAttr> specializedNames;
|
|
specialized->populateLegalizedNames(specializedNames);
|
|
patterns.insert(std::move(specialized));
|
|
|
|
ConversionTarget target(*context);
|
|
target.addIllegalOp<NonValueTensorLiteralOp>();
|
|
target.addIllegalOp<AtenBernoulli_FloatOp>();
|
|
target.addIllegalOp<AtenArangeStartOutOp>();
|
|
target.markUnknownOpDynamicallyLegal([&extraLibraryModuleSymTable,
|
|
&specializedNames](Operation *op) {
|
|
if (isa<OperatorOp>(op)) {
|
|
if (specializedNames.contains(cast<OperatorOp>(op).getNameAttr())) {
|
|
return false;
|
|
}
|
|
}
|
|
if (op->hasTrait<Torch::OpTrait::HasValueSemantics>() ||
|
|
(isa<OperatorOp>(op) &&
|
|
operatorOpHasValueSemantics(cast<OperatorOp>(op),
|
|
extraLibraryModuleSymTable))) {
|
|
auto hasValueSemantics = [](Type t) {
|
|
// TODO: Make this an allowlist based on a closed torch dialect
|
|
// type system.
|
|
if (auto tensorType = dyn_cast<NonValueTensorType>(t)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
};
|
|
return llvm::all_of(op->getOperandTypes(), hasValueSemantics) &&
|
|
llvm::all_of(op->getResultTypes(), hasValueSemantics);
|
|
}
|
|
if (op->hasTrait<Torch::OpTrait::IsTrailingUnderscoreInplaceVariant>()) {
|
|
return false;
|
|
}
|
|
|
|
if (isa<OperatorOp>(op) && isSpecializedOperation(cast<OperatorOp>(op)))
|
|
return false;
|
|
return true;
|
|
});
|
|
|
|
if (failed(applyPartialConversion(getOperation(), target,
|
|
std::move(patterns)))) {
|
|
return signalPassFailure();
|
|
}
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
std::unique_ptr<OperationPass<func::FuncOp>>
|
|
mlir::torch::Torch::createReduceOpVariantsPass(StringRef extraLibrary) {
|
|
return std::make_unique<ReduceOpVariantsPass>(extraLibrary);
|
|
}
|