torch-mlir/lib/Dialect/Torch/Transforms/MatchQuantizedOps.cpp

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Also available under a BSD-style license. See LICENSE.
//
//===----------------------------------------------------------------------===//

#include "PassDetail.h"

#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
#include "torch-mlir/Dialect/Torch/Transforms/Passes.h"
#include "torch-mlir/Dialect/Torch/Utils/Utils.h"

using namespace mlir;
using namespace mlir::torch;
using namespace mlir::torch::Torch;

namespace {

Type getQuantizedType(MLIRContext *context, Type t) {
  if (t.isSignlessInteger(8))
    return Torch::QUInt8Type::get(context);
  if (t.isInteger(8) || t.isSignedInteger(8))
    return Torch::QInt8Type::get(context);
  if (t.isInteger(32))
    return Torch::QInt32Type::get(context);
  return {};
}

class MatchQuantizeOperator : public OpRewritePattern<OperatorOp> {
public:
  using OpRewritePattern::OpRewritePattern;
  LogicalResult matchAndRewrite(OperatorOp op,
                                PatternRewriter &rewriter) const override {
    if (op.getName() == "torch.quantized_decomposed.quantize_per_tensor") {
      auto resultTy = cast<ValueTensorType>(op.getType(0));
      auto qeTy = getQuantizedType(rewriter.getContext(), resultTy.getDtype());
      if (!qeTy)
        qeTy = resultTy.getDtype();

      auto qTy =
          rewriter.getType<ValueTensorType>(resultTy.getOptionalSizes(), qeTy);
      Value quant = rewriter.create<AtenQuantizePerTensorOp>(
          op.getLoc(), qTy,
          /*self=*/op.getOperand(0), /*scale=*/op.getOperand(1),
          /*zero_point=*/op.getOperand(2), /*dtype=*/op.getOperand(5));

      if (qTy != resultTy) {
        quant = rewriter.create<AtenIntReprOp>(op.getLoc(), resultTy, quant);
      }

      rewriter.replaceOpWithNewOp<AtenClampOp>(
          op, resultTy, quant, op.getOperand(3), op.getOperand(4));
      return success();
    }

    if (op.getName() == "torch.quantized_decomposed.dequantize_per_tensor") {
      auto clamp = rewriter.create<AtenClampOp>(
          op.getLoc(), op.getOperand(0).getType(), op.getOperand(0),
          op.getOperand(3), op.getOperand(4));

      auto clampTy = cast<Torch::ValueTensorType>(clamp.getType());
      if (!clampTy.hasDtype())
        return rewriter.notifyMatchFailure(op,
                                           "dequantization has unknown dtype");

      Type dtype = clampTy.getDtype();
      Type qetype = getQuantizedType(op.getContext(), dtype);
      if (!qetype)
        return rewriter.notifyMatchFailure(op,
                                           "dequantization has unknown qtype");

      Type qTy = Torch::ValueTensorType::get(
          op.getContext(), clampTy.getOptionalSizes(), qetype);
      auto quant = rewriter.create<Aten_MakePerTensorQuantizedTensorOp>(
          op.getLoc(), qTy, clamp, op.getOperand(1), op.getOperand(2));
      rewriter.replaceOpWithNewOp<AtenDequantizeTensorOp>(
          op, op.getResultTypes(), quant);
      return success();
    }

    return failure();
  }
};

class MatchQuantizedCustomOpsPass
    : public MatchQuantizedCustomOpsBase<MatchQuantizedCustomOpsPass> {
public:
  void runOnOperation() override {
    MLIRContext *context = &getContext();
    RewritePatternSet patterns(context);
    patterns.insert<MatchQuantizeOperator>(context);

    GreedyRewriteConfig config;
    if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns),
                                            config)))
      return signalPassFailure();
  }
};

} // namespace

std::unique_ptr<OperationPass<func::FuncOp>>
mlir::torch::Torch::createMatchQuantizedCustomOpsPass() {
  return std::make_unique<MatchQuantizedCustomOpsPass>();
}
[torch][quant] Quantized `torch.mm` for linalg with end-to-end test (#2750) This includes custom op matching for decomposed operations and fusing dequantization into dense operations. As a validation we compare to the dequant+mm torch implementation. 2024-01-25 06:02:50 +08:00			`//===----------------------------------------------------------------------===//`
			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`// Also available under a BSD-style license. See LICENSE.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "PassDetail.h"`

			`#include "mlir/Transforms/GreedyPatternRewriteDriver.h"`
			`#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"`
			`#include "torch-mlir/Dialect/Torch/Transforms/Passes.h"`
			`#include "torch-mlir/Dialect/Torch/Utils/Utils.h"`

			`using namespace mlir;`
			`using namespace mlir::torch;`
			`using namespace mlir::torch::Torch;`

			`namespace {`

			`Type getQuantizedType(MLIRContext *context, Type t) {`
			`if (t.isSignlessInteger(8))`
			`return Torch::QUInt8Type::get(context);`
			`if (t.isInteger(8) \|\| t.isSignedInteger(8))`
			`return Torch::QInt8Type::get(context);`
			`if (t.isInteger(32))`
			`return Torch::QInt32Type::get(context);`
			`return {};`
			`}`

			`class MatchQuantizeOperator : public OpRewritePattern<OperatorOp> {`
			`public:`
			`using OpRewritePattern::OpRewritePattern;`
			`LogicalResult matchAndRewrite(OperatorOp op,`
			`PatternRewriter &rewriter) const override {`
			`if (op.getName() == "torch.quantized_decomposed.quantize_per_tensor") {`
			`auto resultTy = cast<ValueTensorType>(op.getType(0));`
			`auto qeTy = getQuantizedType(rewriter.getContext(), resultTy.getDtype());`
			`if (!qeTy)`
			`qeTy = resultTy.getDtype();`

			`auto qTy =`
			`rewriter.getType<ValueTensorType>(resultTy.getOptionalSizes(), qeTy);`
			`Value quant = rewriter.create<AtenQuantizePerTensorOp>(`
			`op.getLoc(), qTy,`
			`/self=/op.getOperand(0), /scale=/op.getOperand(1),`
			`/zero_point=/op.getOperand(2), /dtype=/op.getOperand(5));`

			`if (qTy != resultTy) {`
			`quant = rewriter.create<AtenIntReprOp>(op.getLoc(), resultTy, quant);`
			`}`

			`rewriter.replaceOpWithNewOp<AtenClampOp>(`
			`op, resultTy, quant, op.getOperand(3), op.getOperand(4));`
			`return success();`
			`}`

			`if (op.getName() == "torch.quantized_decomposed.dequantize_per_tensor") {`
			`auto clamp = rewriter.create<AtenClampOp>(`
			`op.getLoc(), op.getOperand(0).getType(), op.getOperand(0),`
			`op.getOperand(3), op.getOperand(4));`

Fix deprecated uses of cast/dyn_cast/dyn_cast_or_null/isa (#3243) Like #3130, gradually replace the deprecated code https://github.com/llvm/mlir-www/blob/main/website/content/deprecation/_index.md#deprecated 2024-04-28 05:00:56 +08:00			`auto clampTy = cast<Torch::ValueTensorType>(clamp.getType());`
[torch][quant] Quantized `torch.mm` for linalg with end-to-end test (#2750) This includes custom op matching for decomposed operations and fusing dequantization into dense operations. As a validation we compare to the dequant+mm torch implementation. 2024-01-25 06:02:50 +08:00			`if (!clampTy.hasDtype())`
			`return rewriter.notifyMatchFailure(op,`
			`"dequantization has unknown dtype");`

			`Type dtype = clampTy.getDtype();`
			`Type qetype = getQuantizedType(op.getContext(), dtype);`
			`if (!qetype)`
			`return rewriter.notifyMatchFailure(op,`
			`"dequantization has unknown qtype");`

			`Type qTy = Torch::ValueTensorType::get(`
			`op.getContext(), clampTy.getOptionalSizes(), qetype);`
			`auto quant = rewriter.create<Aten_MakePerTensorQuantizedTensorOp>(`
			`op.getLoc(), qTy, clamp, op.getOperand(1), op.getOperand(2));`
			`rewriter.replaceOpWithNewOp<AtenDequantizeTensorOp>(`
			`op, op.getResultTypes(), quant);`
			`return success();`
			`}`

			`return failure();`
			`}`
			`};`

			`class MatchQuantizedCustomOpsPass`
			`: public MatchQuantizedCustomOpsBase<MatchQuantizedCustomOpsPass> {`
			`public:`
			`void runOnOperation() override {`
			`MLIRContext *context = &getContext();`
			`RewritePatternSet patterns(context);`
			`patterns.insert<MatchQuantizeOperator>(context);`

			`GreedyRewriteConfig config;`
			`if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns),`
			`config)))`
			`return signalPassFailure();`
			`}`
			`};`

			`} // namespace`

			`std::unique_ptr<OperationPass<func::FuncOp>>`
			`mlir::torch::Torch::createMatchQuantizedCustomOpsPass() {`
			`return std::make_unique<MatchQuantizedCustomOpsPass>();`
			`}`