2020-10-16 03:26:21 +08:00
|
|
|
//===- Bufferize.cpp - Bufferization for TCP dialect -------------*- C++-*-===//
|
2020-09-17 08:31:40 +08:00
|
|
|
//
|
2020-10-16 03:26:21 +08:00
|
|
|
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
|
2020-09-17 08:31:40 +08:00
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2020-10-16 03:26:21 +08:00
|
|
|
#include "PassDetail.h"
|
2020-09-17 08:31:40 +08:00
|
|
|
|
|
|
|
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
|
2021-03-24 05:16:23 +08:00
|
|
|
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
2020-09-17 08:31:40 +08:00
|
|
|
#include "mlir/Dialect/SCF/SCF.h"
|
|
|
|
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
2020-12-12 06:43:38 +08:00
|
|
|
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
2020-10-16 03:26:21 +08:00
|
|
|
#include "mlir/IR/Builders.h"
|
2020-12-12 06:43:38 +08:00
|
|
|
#include "mlir/IR/BuiltinOps.h"
|
2020-10-15 10:28:43 +08:00
|
|
|
#include "mlir/Transforms/Bufferize.h"
|
2020-09-17 08:31:40 +08:00
|
|
|
#include "mlir/Transforms/DialectConversion.h"
|
2020-10-16 03:26:21 +08:00
|
|
|
#include "npcomp/Dialect/Refback/IR/RefbackDialect.h"
|
2020-10-08 08:30:10 +08:00
|
|
|
#include "npcomp/Dialect/Refback/IR/RefbackOps.h"
|
2020-09-17 08:31:40 +08:00
|
|
|
#include "npcomp/Dialect/TCP/IR/TCPDialect.h"
|
|
|
|
#include "npcomp/Dialect/TCP/IR/TCPOps.h"
|
2020-10-16 03:26:21 +08:00
|
|
|
#include "npcomp/Dialect/TCP/Transforms/Passes.h"
|
2020-09-17 08:31:40 +08:00
|
|
|
|
|
|
|
using namespace mlir;
|
|
|
|
using namespace mlir::NPCOMP;
|
|
|
|
|
2020-10-16 03:26:21 +08:00
|
|
|
// TODO: Don't just open-code all shape transfer functions here.
|
|
|
|
static SmallVector<Value, 6> bypassResultShapes(Operation &op) {
|
|
|
|
OpBuilder builder(&op);
|
|
|
|
|
|
|
|
if (auto broadcastTo = dyn_cast<tcp::BroadcastToOp>(op)) {
|
|
|
|
return {broadcastTo.shape()};
|
|
|
|
}
|
|
|
|
|
2020-11-10 07:49:22 +08:00
|
|
|
if (auto splatted = dyn_cast<tcp::SplattedOp>(op)) {
|
|
|
|
return {splatted.shape()};
|
2020-10-16 03:26:21 +08:00
|
|
|
}
|
|
|
|
|
2020-12-18 02:56:46 +08:00
|
|
|
if (auto pad = dyn_cast<tcp::PadOp>(op)) {
|
|
|
|
SmallVector<Value, 6> outDims;
|
|
|
|
auto inputType = pad.operand().getType().cast<RankedTensorType>();
|
|
|
|
for (int i = 0, e = inputType.getRank(); i < e; i++) {
|
2021-01-27 05:23:58 +08:00
|
|
|
auto dimIndex = builder.create<ConstantIndexOp>(op.getLoc(), i);
|
|
|
|
auto lowerExpansion =
|
|
|
|
builder.create<tensor::ExtractOp>(op.getLoc(), pad.lowerExpansion(),
|
|
|
|
ValueRange({dimIndex}));
|
|
|
|
auto upperExpansion =
|
|
|
|
builder.create<tensor::ExtractOp>(op.getLoc(), pad.upperExpansion(),
|
|
|
|
ValueRange({dimIndex}));
|
|
|
|
auto operandDim =
|
2021-03-24 05:16:23 +08:00
|
|
|
builder.create<memref::DimOp>(op.getLoc(), pad.operand(), i);
|
2021-01-27 05:23:58 +08:00
|
|
|
auto totalExpansion =
|
|
|
|
builder.create<AddIOp>(op.getLoc(), lowerExpansion, upperExpansion);
|
|
|
|
auto outDim =
|
|
|
|
builder.create<AddIOp>(op.getLoc(), totalExpansion, operandDim);
|
2020-12-18 02:56:46 +08:00
|
|
|
outDims.push_back(outDim);
|
|
|
|
}
|
2021-01-22 22:34:09 +08:00
|
|
|
Value outDimTensor = builder.create<tensor::FromElementsOp>(op.getLoc(), ValueRange(outDims));
|
2020-12-18 02:56:46 +08:00
|
|
|
return {outDimTensor};
|
|
|
|
}
|
|
|
|
|
2020-10-16 03:26:21 +08:00
|
|
|
// No shape transfer function.
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2020-09-18 09:56:01 +08:00
|
|
|
static FailureOr<SmallVector<Value, 6>>
|
|
|
|
allocateResults(Operation *op, ConversionPatternRewriter &rewriter,
|
|
|
|
Location loc,
|
|
|
|
SmallVectorImpl<Value> *resultShapesOut = nullptr) {
|
2020-10-16 03:26:21 +08:00
|
|
|
auto resultShapes = bypassResultShapes(*op);
|
2020-09-18 09:56:01 +08:00
|
|
|
SmallVector<Value, 6> results;
|
|
|
|
for (auto t : llvm::zip(op->getResults(), resultShapes)) {
|
|
|
|
auto result = std::get<0>(t);
|
|
|
|
auto resultShape = std::get<1>(t);
|
|
|
|
auto tensorType = result.getType().cast<RankedTensorType>();
|
|
|
|
auto memrefType =
|
|
|
|
MemRefType::get(tensorType.getShape(), tensorType.getElementType());
|
|
|
|
auto memref =
|
[RefBackend] Split out RefBackend (refback) dialect from TCP.
This is the first in a patch series that is refactoring the
constellation of things variously called or associated with "E2E",
"RefE2E", "npcomprt", and "TCP" into a more cleanly layered result.
Concretely, this first patch fixes the fact that TCP was basically
acting like a dumping ground needed by the reference backend. This
splits it out, which is fairly mechanical, but touches a lot of lines of
code (basically replacing `tcp` with `refback` and `TCP` with
`RefBackend).
Now, the RefBackend dialect is that dumping ground, which
is slighly better, as it starts allowing TCP to become a nice clean
middle layer that is not related per se to the reference backend.
The previous name RefE2E or "reference e2e flow" was super confusing.
Now that we are seeing more clearly where the "backend" distinction
lies, the [RefBackend] commit tag is born :)
2020-10-07 06:44:18 +08:00
|
|
|
rewriter.create<refback::AllocMemRefOp>(loc, memrefType, resultShape);
|
2020-09-18 09:56:01 +08:00
|
|
|
results.push_back(memref);
|
|
|
|
}
|
|
|
|
if (resultShapesOut)
|
|
|
|
resultShapesOut->append(resultShapes.begin(), resultShapes.end());
|
|
|
|
return results;
|
2020-09-17 08:31:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
// TODO: Lower to a "buffer version" of tcp::BroadcastTo instead of directly to
|
|
|
|
// loops.
|
|
|
|
class LowerBroadcastToToLoopsPattern
|
|
|
|
: public OpConversionPattern<tcp::BroadcastToOp> {
|
|
|
|
public:
|
|
|
|
using OpConversionPattern::OpConversionPattern;
|
|
|
|
LogicalResult
|
|
|
|
matchAndRewrite(tcp::BroadcastToOp op, ArrayRef<Value> operands,
|
|
|
|
ConversionPatternRewriter &rewriter) const override {
|
|
|
|
auto resultType = op.getType().cast<RankedTensorType>();
|
|
|
|
auto inputType = op.operand().getType().cast<RankedTensorType>();
|
2020-09-18 09:56:01 +08:00
|
|
|
SmallVector<Value, 6> resultShapes;
|
|
|
|
auto resultsOrFailure =
|
|
|
|
allocateResults(op, rewriter, op.getLoc(), &resultShapes);
|
|
|
|
if (failed(resultsOrFailure))
|
|
|
|
return failure();
|
|
|
|
Value resultMemref = (*resultsOrFailure)[0];
|
|
|
|
auto resultShape = resultShapes[0];
|
2020-09-17 08:31:40 +08:00
|
|
|
Value inputMemref = operands[0];
|
|
|
|
|
|
|
|
SmallVector<Value, 6> outputExtents;
|
|
|
|
for (int i = 0, e = resultType.getRank(); i < e; i++) {
|
|
|
|
Value dimIndex = rewriter.create<ConstantIndexOp>(op.getLoc(), i);
|
2020-12-12 06:43:38 +08:00
|
|
|
Value outputExtent = rewriter.create<tensor::ExtractOp>(
|
2020-11-17 05:26:13 +08:00
|
|
|
op.getLoc(), resultShape, ValueRange({dimIndex}));
|
2020-09-17 08:31:40 +08:00
|
|
|
outputExtents.push_back(outputExtent);
|
|
|
|
}
|
|
|
|
int rankDiff = resultType.getRank() - inputType.getRank();
|
|
|
|
SmallVector<Value, 6> inputDimRequiresBroadcasting;
|
|
|
|
for (int i = 0, e = inputType.getRank(); i < e; i++) {
|
|
|
|
// Calculate the relevant extents.
|
2021-03-24 05:16:23 +08:00
|
|
|
Value inputExtent =
|
|
|
|
rewriter.create<memref::DimOp>(op.getLoc(), op.operand(), i);
|
2020-09-17 08:31:40 +08:00
|
|
|
inputDimRequiresBroadcasting.push_back(
|
|
|
|
rewriter.create<CmpIOp>(op.getLoc(), CmpIPredicate::ne, inputExtent,
|
|
|
|
outputExtents[rankDiff + i]));
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
OpBuilder::InsertionGuard guard(rewriter);
|
|
|
|
Value c0 = rewriter.create<ConstantIndexOp>(op.getLoc(), 0);
|
|
|
|
Value c1 = rewriter.create<ConstantIndexOp>(op.getLoc(), 1);
|
|
|
|
|
|
|
|
SmallVector<Value, 6> inductionVariables;
|
|
|
|
// Create the (perfectly nested) loops.
|
|
|
|
// Loop invariant: At the start of iteration `i`, the rewriter insertion
|
|
|
|
// point is inside `i` nested loops.
|
|
|
|
for (int i = 0, e = resultType.getRank(); i < e; i++) {
|
|
|
|
auto loop = rewriter.create<scf::ForOp>(
|
|
|
|
op.getLoc(), c0, outputExtents[i], c1, ValueRange({}));
|
|
|
|
Block *body = loop.getBody();
|
|
|
|
inductionVariables.push_back(body->getArgument(0));
|
|
|
|
// Leave the insertion point at the beginning of the body.
|
|
|
|
rewriter.setInsertionPointToStart(body);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create the inner loop body.
|
|
|
|
// When reading from the input, clamp any indices for dimensions that are
|
|
|
|
// being broadcast.
|
|
|
|
SmallVector<Value, 6> inputIndices;
|
|
|
|
for (int i = 0, e = inputType.getRank(); i < e; i++) {
|
|
|
|
auto c0 = rewriter.create<ConstantIndexOp>(op.getLoc(), 0);
|
|
|
|
auto select = rewriter.create<SelectOp>(
|
|
|
|
op.getLoc(), inputDimRequiresBroadcasting[i], c0,
|
|
|
|
inductionVariables[rankDiff + i]);
|
|
|
|
inputIndices.push_back(select);
|
|
|
|
}
|
2021-03-24 05:16:23 +08:00
|
|
|
Value load = rewriter.create<memref::LoadOp>(op.getLoc(), inputMemref,
|
|
|
|
inputIndices);
|
|
|
|
rewriter.create<memref::StoreOp>(op.getLoc(), load, resultMemref,
|
|
|
|
inductionVariables);
|
2020-09-17 08:31:40 +08:00
|
|
|
}
|
|
|
|
rewriter.replaceOp(op, resultMemref);
|
|
|
|
return success();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
2020-09-18 09:56:01 +08:00
|
|
|
namespace {
|
2020-11-10 07:49:22 +08:00
|
|
|
class BufferizeSplattedOp : public OpConversionPattern<tcp::SplattedOp> {
|
2020-09-18 09:56:01 +08:00
|
|
|
public:
|
|
|
|
using OpConversionPattern::OpConversionPattern;
|
|
|
|
LogicalResult
|
2020-11-10 07:49:22 +08:00
|
|
|
matchAndRewrite(tcp::SplattedOp op, ArrayRef<Value> operands,
|
2020-09-18 09:56:01 +08:00
|
|
|
ConversionPatternRewriter &rewriter) const override {
|
|
|
|
auto resultsOrFailure = allocateResults(op, rewriter, op.getLoc());
|
|
|
|
if (failed(resultsOrFailure))
|
|
|
|
return failure();
|
|
|
|
auto results = *resultsOrFailure;
|
2021-06-24 01:03:29 +08:00
|
|
|
rewriter.create<linalg::FillOp>(op.getLoc(), op.splatVal(), results[0]);
|
2020-09-18 09:56:01 +08:00
|
|
|
rewriter.replaceOp(op, results);
|
2020-09-17 08:31:40 +08:00
|
|
|
return success();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
2020-12-18 02:56:46 +08:00
|
|
|
namespace {
|
|
|
|
class BufferizePadOp : public OpConversionPattern<tcp::PadOp> {
|
|
|
|
public:
|
|
|
|
using OpConversionPattern::OpConversionPattern;
|
|
|
|
LogicalResult
|
|
|
|
matchAndRewrite(tcp::PadOp op, ArrayRef<Value> operands,
|
|
|
|
ConversionPatternRewriter &rewriter) const override {
|
|
|
|
auto resultsOrFailure = allocateResults(op, rewriter, op.getLoc());
|
|
|
|
if (failed(resultsOrFailure))
|
|
|
|
return failure();
|
|
|
|
auto results = *resultsOrFailure;
|
2021-01-27 05:23:58 +08:00
|
|
|
auto c1 =
|
|
|
|
rewriter.create<ConstantOp>(op.getLoc(), rewriter.getIntegerAttr(
|
|
|
|
rewriter.getIndexType(), 1));
|
2020-12-18 02:56:46 +08:00
|
|
|
SmallVector<Value, 6> offsets, sizes, strides;
|
|
|
|
auto resultType = op.getType().cast<RankedTensorType>();
|
|
|
|
for (int i = 0, e = resultType.getRank(); i < e; i++) {
|
2021-01-27 05:23:58 +08:00
|
|
|
auto dimIndex = rewriter.create<ConstantIndexOp>(op.getLoc(), i);
|
|
|
|
auto offset =
|
|
|
|
rewriter.create<tensor::ExtractOp>(op.getLoc(), op.lowerExpansion(),
|
|
|
|
ValueRange({dimIndex}));
|
2021-03-24 05:16:23 +08:00
|
|
|
auto size = rewriter.create<memref::DimOp>(op.getLoc(), op.operand(), i);
|
2021-01-27 05:23:58 +08:00
|
|
|
auto stride = c1;
|
2020-12-18 02:56:46 +08:00
|
|
|
offsets.push_back(offset);
|
|
|
|
sizes.push_back(size);
|
|
|
|
strides.push_back(stride);
|
|
|
|
}
|
2021-06-24 01:03:29 +08:00
|
|
|
rewriter.create<linalg::FillOp>(op.getLoc(), op.fillVal(), results[0]);
|
2021-03-24 05:16:23 +08:00
|
|
|
auto unpadded = rewriter.create<memref::SubViewOp>(
|
|
|
|
op.getLoc(), results[0], ValueRange(offsets), ValueRange(sizes),
|
|
|
|
ValueRange(strides));
|
2021-01-27 05:23:58 +08:00
|
|
|
auto inputMemref = operands[0];
|
2020-12-18 02:56:46 +08:00
|
|
|
rewriter.create<linalg::CopyOp>(op.getLoc(), inputMemref, unpadded);
|
|
|
|
rewriter.replaceOp(op, results);
|
|
|
|
return success();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
2020-09-17 08:31:40 +08:00
|
|
|
namespace {
|
2020-10-16 03:26:21 +08:00
|
|
|
class TCPBufferizePass : public TCPBufferizeBase<TCPBufferizePass> {
|
|
|
|
void getDependentDialects(::mlir::DialectRegistry ®istry) const override {
|
|
|
|
registry.insert<refback::RefbackDialect>();
|
2021-04-23 09:11:40 +08:00
|
|
|
registry.insert<memref::MemRefDialect>();
|
2020-10-16 03:26:21 +08:00
|
|
|
registry.insert<linalg::LinalgDialect>();
|
|
|
|
registry.insert<scf::SCFDialect>();
|
2020-09-22 05:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void runOnOperation() override {
|
2020-09-17 08:31:40 +08:00
|
|
|
auto func = getOperation();
|
|
|
|
auto *context = &getContext();
|
|
|
|
|
2020-10-15 10:28:43 +08:00
|
|
|
BufferizeTypeConverter typeConverter;
|
2020-09-17 08:31:40 +08:00
|
|
|
|
2021-03-24 05:16:23 +08:00
|
|
|
RewritePatternSet patterns(context);
|
2020-09-17 08:31:40 +08:00
|
|
|
|
|
|
|
ConversionTarget target(*context);
|
|
|
|
|
[RefBackend] Split out RefBackend (refback) dialect from TCP.
This is the first in a patch series that is refactoring the
constellation of things variously called or associated with "E2E",
"RefE2E", "npcomprt", and "TCP" into a more cleanly layered result.
Concretely, this first patch fixes the fact that TCP was basically
acting like a dumping ground needed by the reference backend. This
splits it out, which is fairly mechanical, but touches a lot of lines of
code (basically replacing `tcp` with `refback` and `TCP` with
`RefBackend).
Now, the RefBackend dialect is that dumping ground, which
is slighly better, as it starts allowing TCP to become a nice clean
middle layer that is not related per se to the reference backend.
The previous name RefE2E or "reference e2e flow" was super confusing.
Now that we are seeing more clearly where the "backend" distinction
lies, the [RefBackend] commit tag is born :)
2020-10-07 06:44:18 +08:00
|
|
|
// All lowering to buffers involves refback.alloc_memref ops.
|
2020-10-16 03:26:21 +08:00
|
|
|
// TODO: This makes the tests cleaner, but otherwise isn't too essential as
|
|
|
|
// we can just open-code the extents for the alloc.
|
[RefBackend] Split out RefBackend (refback) dialect from TCP.
This is the first in a patch series that is refactoring the
constellation of things variously called or associated with "E2E",
"RefE2E", "npcomprt", and "TCP" into a more cleanly layered result.
Concretely, this first patch fixes the fact that TCP was basically
acting like a dumping ground needed by the reference backend. This
splits it out, which is fairly mechanical, but touches a lot of lines of
code (basically replacing `tcp` with `refback` and `TCP` with
`RefBackend).
Now, the RefBackend dialect is that dumping ground, which
is slighly better, as it starts allowing TCP to become a nice clean
middle layer that is not related per se to the reference backend.
The previous name RefE2E or "reference e2e flow" was super confusing.
Now that we are seeing more clearly where the "backend" distinction
lies, the [RefBackend] commit tag is born :)
2020-10-07 06:44:18 +08:00
|
|
|
target.addLegalOp<refback::AllocMemRefOp>();
|
2020-09-17 08:31:40 +08:00
|
|
|
|
2021-03-24 05:16:23 +08:00
|
|
|
patterns.add<LowerBroadcastToToLoopsPattern>(typeConverter, context);
|
2020-09-17 08:31:40 +08:00
|
|
|
target.addIllegalOp<tcp::BroadcastToOp>();
|
2021-03-24 05:16:23 +08:00
|
|
|
patterns.add<BufferizeSplattedOp>(typeConverter, context);
|
2020-11-10 07:49:22 +08:00
|
|
|
target.addIllegalOp<tcp::SplattedOp>();
|
2021-03-24 05:16:23 +08:00
|
|
|
patterns.add<BufferizePadOp>(typeConverter, context);
|
2020-12-18 02:56:46 +08:00
|
|
|
target.addIllegalOp<tcp::PadOp>();
|
2020-09-18 09:56:01 +08:00
|
|
|
|
|
|
|
target.addLegalDialect<linalg::LinalgDialect>();
|
2020-09-17 08:31:40 +08:00
|
|
|
target.addLegalDialect<StandardOpsDialect>();
|
|
|
|
target.addLegalDialect<scf::SCFDialect>();
|
2020-12-12 06:43:38 +08:00
|
|
|
target.addLegalDialect<tensor::TensorDialect>();
|
2021-03-24 05:16:23 +08:00
|
|
|
target.addLegalDialect<memref::MemRefDialect>();
|
2020-09-17 08:31:40 +08:00
|
|
|
|
2020-10-30 06:25:55 +08:00
|
|
|
if (failed(applyPartialConversion(func, target, std::move(patterns))))
|
2020-09-17 08:31:40 +08:00
|
|
|
return signalPassFailure();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
2020-10-16 03:26:21 +08:00
|
|
|
std::unique_ptr<OperationPass<FuncOp>> mlir::NPCOMP::createTCPBufferizePass() {
|
|
|
|
return std::make_unique<TCPBufferizePass>();
|
2020-09-17 08:31:40 +08:00
|
|
|
}
|