torch-mlir/lib/Dialect/Torch/Transforms/ReduceOpVariants.cpp

485 lines
20 KiB
C++

//===- ReduceOpVariants.cpp --------------------------------------*- C++-*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Also available under a BSD-style license. See LICENSE.
//
//===----------------------------------------------------------------------===//
#include "PassDetail.h"
#include "ReifyAbstractInterpCalculationsUtils.h"
#include "mlir/Transforms/DialectConversion.h"
#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
#include "torch-mlir/Dialect/Torch/Transforms/Passes.h"
#include "llvm/ADT/StringExtras.h"
using namespace mlir;
using namespace mlir::torch;
using namespace mlir::torch::Torch;
// Create an overwrite in a manner that preserves the
// `OverwriteTensorContentsOp` invariant that both arguments
// must have the same shape and dtype.
static void createOverwriteTensorContents(PatternRewriter &rewriter,
Location loc, Value overwriterTensor,
Value overwrittenTensor) {
Type overwriterTensorType = overwriterTensor.getType();
Type overwrittenTensorType =
dyn_cast<NonValueTensorType>(overwrittenTensor.getType())
.getWithValueSemantics();
if (overwriterTensorType != overwrittenTensorType) {
overwriterTensor = rewriter.create<TensorStaticInfoCastOp>(
loc, overwrittenTensorType, overwriterTensor);
}
rewriter.create<OverwriteTensorContentsOp>(loc, overwriterTensor,
overwrittenTensor);
}
static Type getContainerOrTensorTypeWithValueSemantics(Type type) {
if (auto optionalType = dyn_cast<OptionalType>(type)) {
Type newContainedType = getContainerOrTensorTypeWithValueSemantics(
optionalType.getContainedType());
return OptionalType::get(newContainedType);
} else if (auto listType = dyn_cast<ListType>(type)) {
Type newContainedType =
getContainerOrTensorTypeWithValueSemantics(listType.getContainedType());
return ListType::get(newContainedType);
} else if (auto tensorType = dyn_cast<NonValueTensorType>(type)) {
return tensorType.getWithValueSemantics();
} else {
return nullptr;
}
}
static bool
operatorOpHasValueSemantics(OperatorOp opOp,
std::optional<SymbolTable> extraLibrary) {
if (!extraLibrary.has_value())
return false;
auto opName = cast<StringAttr>(opOp->getAttr("name")).getValue();
std::string libFuncName = (mlir::torch::Torch::getLibraryFunctionPrefix(
LibraryFunctionKind::HasValueSemantics) +
Twine(opName))
.str();
auto libFunc = extraLibrary->lookup<func::FuncOp>(libFuncName);
return bool(libFunc);
}
namespace {
// Convert value semantic ops operating on mutable arrays to instead operate on
// immutable tensors.
class ConvertHasValueSemanticsOpsToValueTensors : public RewritePattern {
public:
ConvertHasValueSemanticsOpsToValueTensors(
MLIRContext *context, const std::optional<SymbolTable> &extraLibrary)
: RewritePattern(MatchAnyOpTypeTag(), /*benefit=*/1, context) {
this->extraLibrary = extraLibrary;
}
LogicalResult matchAndRewrite(Operation *op,
PatternRewriter &rewriter) const override {
if (isa<OperatorOp>(op)) {
if (!operatorOpHasValueSemantics(cast<OperatorOp>(op), extraLibrary)) {
return rewriter.notifyMatchFailure(op, "does not have value semantics");
}
} else if (!op->hasTrait<Torch::OpTrait::HasValueSemantics>()) {
return rewriter.notifyMatchFailure(op, "does not have value semantics");
}
rewriter.startOpModification(op);
// Convert all operands.
SmallVector<Value> newOperands;
for (OpOperand &opOperand : op->getOpOperands()) {
Type operandType = opOperand.get().getType();
if (isa<NonValueTensorType>(operandType)) {
opOperand.set(rewriter.create<CopyToValueTensorOp>(op->getLoc(),
opOperand.get()));
} else if (auto listType = dyn_cast<ListType>(operandType)) {
if (!(isa<NonValueTensorType>(listType.getContainedType()) ||
isa<OptionalType>(listType.getContainedType())))
continue;
// Construct a new list whose elements are value tensors copied from
// the non-value tensors of the original list.
auto listConstruct =
opOperand.get().getDefiningOp<PrimListConstructOp>();
if (!listConstruct) {
rewriter.cancelOpModification(op);
return rewriter.notifyMatchFailure(
op, "unimplemented: list of non vtensor type not constructed "
"from list construct");
}
if (listConstruct.getElements().empty())
continue;
// TODO: Handle optional type in list type.
if (auto optionalType =
dyn_cast<OptionalType>(listType.getContainedType())) {
if (!llvm::all_of(listConstruct.getElements(), [](Value val) {
return isa<NonValueTensorType, Torch::NoneType>(val.getType());
})) {
rewriter.cancelOpModification(op);
return rewriter.notifyMatchFailure(
op, "unimplemented: list containing optional type is not "
"handled.");
}
}
auto newListElements = llvm::to_vector(llvm::map_range(
listConstruct.getElements(), [&](Value tensor) -> Value {
if (isa<NonValueTensorType>(tensor.getType())) {
return rewriter.create<CopyToValueTensorOp>(op->getLoc(),
tensor);
}
return tensor;
}));
Type newListType = getContainerOrTensorTypeWithValueSemantics(listType);
if (!newListType) {
rewriter.cancelOpModification(op);
return rewriter.notifyMatchFailure(
op, "Unable to convert list type to value semantics.");
}
opOperand.set(rewriter.create<PrimListConstructOp>(
op->getLoc(), newListType, newListElements));
} else if (auto optionalType = dyn_cast<OptionalType>(operandType)) {
// TODO: A more general way to handle the optional type is to
// introduce a `copy.to_optional_vtensor` op.
if (!isa<NonValueTensorType>(optionalType.getContainedType()))
continue;
// Create a new optional value whose input is a value tensor copied
// from the non value tensor of the original optional value.
auto derefine = opOperand.get().getDefiningOp<DerefineOp>();
if (!derefine) {
rewriter.cancelOpModification(op);
return rewriter.notifyMatchFailure(
op, "unimplemented: optional of non vtensor type not from "
"derefine");
}
if (!isa<NonValueTensorType>(derefine.getOperand().getType()))
continue;
auto newOperand = rewriter.create<CopyToValueTensorOp>(
op->getLoc(), derefine.getOperand());
opOperand.set(rewriter.create<DerefineOp>(
op->getLoc(), Torch::OptionalType::get(newOperand.getType()),
newOperand));
}
}
// Convert all results.
rewriter.setInsertionPointAfter(op);
for (Value result : op->getResults()) {
auto tensorType = dyn_cast<NonValueTensorType>(result.getType());
if (!tensorType)
continue;
result.setType(tensorType.getWithValueSemantics());
auto nonValueTensor =
rewriter.create<CopyToNonValueTensorOp>(op->getLoc(), result);
result.replaceAllUsesExcept(nonValueTensor, nonValueTensor);
}
rewriter.finalizeOpModification(op);
return success();
}
private:
std::optional<SymbolTable> extraLibrary;
};
} // namespace
namespace {
class TorchMatchSpecializedBackendOp
: public OpConversionPattern<Torch::OperatorOp> {
public:
using OpConversionPattern::OpConversionPattern;
using HandlerFn = LogicalResult (*)(OperatorOp op,
ConversionPatternRewriter &rewriter);
LogicalResult
matchAndRewrite(Torch::OperatorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
if (namedHandlers.contains(op.getNameAttr())) {
return namedHandlers.lookup(op.getNameAttr()).front()(op, rewriter);
}
return failure();
}
static void
populateSpecializedConversions(TorchMatchSpecializedBackendOp &matcher);
static std::unique_ptr<TorchMatchSpecializedBackendOp>
getPopulatedMatcher(MLIRContext *context) {
auto matcher = std::make_unique<TorchMatchSpecializedBackendOp>(context);
populateSpecializedConversions(*matcher);
return matcher;
};
void populate(StringRef name, HandlerFn fn) {
namedHandlers[StringAttr::get(getContext(), name)].push_back(fn);
}
void populateLegalizedNames(llvm::DenseSet<StringAttr> &set) {
for (auto handle : namedHandlers) {
set.insert(handle.first);
}
}
private:
DenseMap<StringAttr, SmallVector<HandlerFn, 1>> namedHandlers;
};
void TorchMatchSpecializedBackendOp::populateSpecializedConversions(
TorchMatchSpecializedBackendOp &matcher) {
matcher.populate(
"torch.aten._scaled_dot_product_flash_attention_for_cpu",
[](Torch::OperatorOp op,
ConversionPatternRewriter &rewriter) -> LogicalResult {
auto uses = op.getResult(1).getUses();
if (uses.end() == uses.begin()) {
auto oldOperands = op->getOperands();
llvm::SmallVector<Value> newOperands{
oldOperands[0], oldOperands[1], oldOperands[2], oldOperands[5],
oldOperands[3], oldOperands[4], oldOperands[6]};
Value enableGQA =
rewriter.create<ConstantBoolOp>(op->getLoc(), false);
newOperands.push_back(enableGQA);
auto newOp = rewriter.create<Torch::AtenScaledDotProductAttentionOp>(
op.getLoc(), op->getResultTypes()[0], newOperands,
op->getAttrs());
rewriter.replaceAllUsesWith(op.getResult(0), newOp.getResult());
rewriter.eraseOp(op);
return success();
}
return failure();
});
}
bool isSpecializedOperation(Torch::OperatorOp op) { return true; }
} // namespace
// Reduce Ops without value semantics but the corresponding without trailing
// underscore variant doesn't exist.
namespace {
// int(ceil((end - start) / step))
Value calculateArangeResultNumElements(PatternRewriter &rewriter, Location loc,
Value start, Value end, Value step) {
Value sub = rewriter.create<AtenSubOp>(
loc, Torch::NumberType::get(rewriter.getContext()), end, start);
Value div = rewriter.create<AtenDivOp>(loc, sub, step);
return rewriter.create<AtenCeilFloatOp>(loc, div);
}
class ReduceNonValueSemanticOps : public RewritePattern {
public:
ReduceNonValueSemanticOps(MLIRContext *context)
: RewritePattern(MatchAnyOpTypeTag(), /*benefit=*/1, context) {}
LogicalResult matchAndRewrite(Operation *op,
PatternRewriter &rewriter) const override {
Location loc = op->getLoc();
MLIRContext *ctx = op->getContext();
if (isa<AtenBernoulli_FloatOp>(op)) {
Operation *newOp = rewriter.create<ValsemVariantAtenBernoulliFloatOp>(
loc, op->getResultTypes(), op->getOperands());
auto tensor =
rewriter.create<CopyToValueTensorOp>(loc, newOp->getResult(0));
createOverwriteTensorContents(rewriter, loc, tensor, op->getOperand(0));
rewriter.replaceOp(op, op->getOperand(0));
return success();
} else if (auto arangeOutOp = dyn_cast<AtenArangeStartOutOp>(op)) {
Value start = arangeOutOp.getStart();
Value end = arangeOutOp.getEnd();
Value step = arangeOutOp.getStep();
Value out = arangeOutOp.getOut();
// `overwrite.tensor.contents` cannot change the tensor shape,
// so `out` tensor should have same num_elements with result tensor.
// It means that we don't support code like:
// `x = torch.randn(12)`
// `y = torch.arange(13, out=x)`
Value resultNumElements =
calculateArangeResultNumElements(rewriter, loc, start, end, step);
Value outNumElements = rewriter.create<AtenNumelOp>(loc, out);
Value eqOrNot =
rewriter.create<AtenEqIntOp>(loc, resultNumElements, outNumElements);
rewriter.create<RuntimeAssertOp>(
loc, eqOrNot,
rewriter.getStringAttr("`out` tensor should have the same "
"num_elements with result tenosr"));
auto dtype = rewriter.create<PrimDtypeOp>(loc, out);
auto device = rewriter.create<PrimDeviceOp>(loc, out);
auto shape = rewriter.create<AtenSizeOp>(
loc, Torch::ListType::get(Torch::IntType::get(ctx)), out);
auto none = rewriter.create<ConstantNoneOp>(loc);
Value newArange = rewriter.create<AtenArangeStartStepOp>(
loc, arangeOutOp.getResult().getType(), start, end, step, dtype,
/*layout=*/none, device, /*pin_memory=*/none);
Value reshape = rewriter.create<AtenReshapeOp>(
loc, arangeOutOp.getResult().getType(), newArange, shape);
auto vtensor = rewriter.create<CopyToValueTensorOp>(loc, reshape);
createOverwriteTensorContents(rewriter, loc, vtensor, out);
rewriter.replaceOp(arangeOutOp, out);
return success();
} else {
return failure();
}
}
};
} // namespace
namespace {
// Reduce the "trailing underscore inplace variant" to the value semantic
// variant + an overwrite of the original "self" argument.
class ReduceTrailingUnderscoreInplaceVariant : public RewritePattern {
public:
ReduceTrailingUnderscoreInplaceVariant(MLIRContext *context)
: RewritePattern(MatchAnyOpTypeTag(), /*benefit=*/1, context) {}
LogicalResult matchAndRewrite(Operation *op,
PatternRewriter &rewriter) const override {
if (!op->hasTrait<Torch::OpTrait::IsTrailingUnderscoreInplaceVariant>())
return rewriter.notifyMatchFailure(op, "is not trailing_ variant");
SmallVector<StringRef> fragments;
llvm::SplitString(op->getName().getStringRef(), fragments, ".");
assert(fragments.size() >= 3 && fragments[2].ends_with("_") &&
"IsTrailingUnderscoreInplaceVariant incorrectly applied");
fragments[2] = fragments[2].drop_back();
std::string noUnderscoreName = llvm::join(fragments, ".");
OperationState state(op->getLoc(), noUnderscoreName);
state.addTypes(op->getResultTypes());
state.addOperands(op->getOperands());
state.addAttributes(op->getAttrDictionary().getValue());
// Note: No successors or regions. Torch JIT operators don't have any.
assert(op->getNumRegions() == 0 && op->getNumSuccessors() == 0 &&
"Torch JIT operators shouldn't have regions or successors");
Operation *newOp = rewriter.create(state);
// Note: need to convert result to first input's dtype because mix precision
// compute would result in different behaviors.
// For example:
// a = torch.randn(3, 3).half() # float16
// b = torch.randn(3, 3) # float32
// a += b # i.e. torch.ops.aten.add_(a, b), result is float16
// c = a + b # i.e. torch.ops.aten.add(a, b), result is float32
Value none = rewriter.create<ConstantNoneOp>(op->getLoc());
Value cstFalse = rewriter.create<ConstantBoolOp>(op->getLoc(), false);
auto aDtype = rewriter.create<PrimDtypeOp>(op->getLoc(), op->getOperand(0));
auto toDtype = rewriter.create<AtenToDtypeOp>(
op->getLoc(), newOp->getResult(0).getType(), newOp->getResult(0),
aDtype, /*non_blocking=*/cstFalse, /*copy=*/cstFalse,
/*memory_format=*/none);
auto tensor = rewriter.create<CopyToValueTensorOp>(op->getLoc(), toDtype);
createOverwriteTensorContents(rewriter, op->getLoc(), tensor,
op->getOperand(0));
rewriter.replaceOp(op, op->getOperand(0));
return success();
}
};
} // namespace
static LogicalResult
reduceNonValueTensorLiteralOpToValueTensorLiteralOp(NonValueTensorLiteralOp op,
PatternRewriter &rewriter) {
Value valueTensor =
rewriter.create<ValueTensorLiteralOp>(op->getLoc(), op.getValue());
Value tensor =
copyTensorToType(rewriter, op->getLoc(), op.getType(), valueTensor);
rewriter.replaceOp(op, {tensor});
return success();
}
namespace {
struct ReduceOpVariantsPass
: public ReduceOpVariantsBase<ReduceOpVariantsPass> {
ReduceOpVariantsPass() = default;
ReduceOpVariantsPass(StringRef extraLibrary) {
this->extraLibrary = extraLibrary.str();
}
void runOnOperation() override {
MLIRContext *context = &getContext();
RewritePatternSet patterns(context);
OwningOpRef<ModuleOp> extraLibraryModule =
ModuleOp::create(UnknownLoc::get(context));
std::optional<SymbolTable> extraLibraryModuleSymTable = std::nullopt;
if (!extraLibrary.empty()) {
if (failed(loadExtraLibrary(extraLibrary, extraLibraryModule))) {
emitError(getOperation()->getLoc(),
"Failed to load extra-library file at " + extraLibrary);
return signalPassFailure();
}
extraLibraryModuleSymTable =
SymbolTable(extraLibraryModule->getOperation());
}
patterns.add<ConvertHasValueSemanticsOpsToValueTensors>(
context, extraLibraryModuleSymTable);
patterns.add<ReduceTrailingUnderscoreInplaceVariant>(context);
patterns.add(reduceNonValueTensorLiteralOpToValueTensorLiteralOp);
patterns.add<ReduceNonValueSemanticOps>(context);
// Create specialized matcher:
auto specialized =
TorchMatchSpecializedBackendOp::getPopulatedMatcher(context);
DenseSet<StringAttr> specializedNames;
specialized->populateLegalizedNames(specializedNames);
patterns.insert(std::move(specialized));
ConversionTarget target(*context);
target.addIllegalOp<NonValueTensorLiteralOp>();
target.addIllegalOp<AtenBernoulli_FloatOp>();
target.addIllegalOp<AtenArangeStartOutOp>();
target.markUnknownOpDynamicallyLegal([&extraLibraryModuleSymTable,
&specializedNames](Operation *op) {
if (isa<OperatorOp>(op)) {
if (specializedNames.contains(cast<OperatorOp>(op).getNameAttr())) {
return false;
}
}
if (op->hasTrait<Torch::OpTrait::HasValueSemantics>() ||
(isa<OperatorOp>(op) &&
operatorOpHasValueSemantics(cast<OperatorOp>(op),
extraLibraryModuleSymTable))) {
auto hasValueSemantics = [](Type t) {
// TODO: Make this an allowlist based on a closed torch dialect
// type system.
if (auto tensorType = dyn_cast<NonValueTensorType>(t)) {
return false;
}
return true;
};
return llvm::all_of(op->getOperandTypes(), hasValueSemantics) &&
llvm::all_of(op->getResultTypes(), hasValueSemantics);
}
if (op->hasTrait<Torch::OpTrait::IsTrailingUnderscoreInplaceVariant>()) {
return false;
}
if (isa<OperatorOp>(op) && isSpecializedOperation(cast<OperatorOp>(op)))
return false;
return true;
});
if (failed(applyPartialConversion(getOperation(), target,
std::move(patterns)))) {
return signalPassFailure();
}
}
};
} // namespace
std::unique_ptr<OperationPass<func::FuncOp>>
mlir::torch::Torch::createReduceOpVariantsPass(StringRef extraLibrary) {
return std::make_unique<ReduceOpVariantsPass>(extraLibrary);
}