mirror of https://github.com/llvm/torch-mlir
[onnx] Lowering `onnx.dequantize_linear` to `torch` (#2759)
We can make the per-tensor version of the operation to the dequantize operation via marking with the make quantized tensor component. This introductions the `qint*` and `quint*` tensor type that can be lowered to teh appropriate dequantization behavior during the torch-to-linalg conversion.pull/2775/head
parent
bd11877f6f
commit
b5387c0f29
|
@ -1156,6 +1156,59 @@ void mlir::torch::onnx_c::populateDefaultDomainAtoF(
|
||||||
binder.op, resultType, transposedInput, reshapeSizesList);
|
binder.op, resultType, transposedInput, reshapeSizesList);
|
||||||
return success();
|
return success();
|
||||||
});
|
});
|
||||||
|
patterns.onOp(
|
||||||
|
"DequantizeLinear", 1,
|
||||||
|
[](OpBinder binder, ConversionPatternRewriter &rewriter) {
|
||||||
|
Torch::ValueTensorType resultType;
|
||||||
|
llvm::SmallVector<Value> operands;
|
||||||
|
if (binder.tensorOperands(operands, 3) ||
|
||||||
|
binder.tensorResultType(resultType))
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
Value operand = operands[0];
|
||||||
|
Value scale = operands[1];
|
||||||
|
Value zeropoint = operands[2];
|
||||||
|
|
||||||
|
auto operandTy = operand.getType().cast<Torch::ValueTensorType>();
|
||||||
|
|
||||||
|
auto scaleTy = scale.getType().dyn_cast<Torch::ValueTensorType>();
|
||||||
|
if (!scaleTy || !scaleTy.hasSizes())
|
||||||
|
return rewriter.notifyMatchFailure(binder.op, "requires known rank");
|
||||||
|
if (!resultType.hasDtype())
|
||||||
|
return rewriter.notifyMatchFailure(binder.op,
|
||||||
|
"requires known resulty dtype");
|
||||||
|
|
||||||
|
if (scaleTy.getSizes().size() == 0) {
|
||||||
|
Type qTy = operandTy.getDtype();
|
||||||
|
|
||||||
|
if (qTy.isUnsignedInteger(8)) {
|
||||||
|
qTy = rewriter.getType<Torch::QUInt8Type>();
|
||||||
|
} else if (qTy.isSignedInteger(8)) {
|
||||||
|
qTy = rewriter.getType<Torch::QInt8Type>();
|
||||||
|
} else if (qTy.isSignedInteger(32)) {
|
||||||
|
qTy = rewriter.getType<Torch::QInt32Type>();
|
||||||
|
} else {
|
||||||
|
return rewriter.notifyMatchFailure(binder.op,
|
||||||
|
"unsupported result dtype");
|
||||||
|
}
|
||||||
|
|
||||||
|
auto qTensorTy = rewriter.getType<Torch::ValueTensorType>(
|
||||||
|
resultType.getOptionalSizes(), qTy);
|
||||||
|
scale = rewriter.create<Torch::AtenItemOp>(
|
||||||
|
binder.getLoc(), rewriter.getType<Torch::FloatType>(), scale);
|
||||||
|
zeropoint = rewriter.create<Torch::AtenItemOp>(
|
||||||
|
binder.getLoc(), rewriter.getType<Torch::IntType>(), zeropoint);
|
||||||
|
|
||||||
|
auto quantize =
|
||||||
|
rewriter.create<Torch::Aten_MakePerTensorQuantizedTensorOp>(
|
||||||
|
binder.getLoc(), qTensorTy, operand, scale, zeropoint);
|
||||||
|
rewriter.replaceOpWithNewOp<Torch::AtenDequantizeSelfOp>(
|
||||||
|
binder.op, resultType, quantize);
|
||||||
|
return success();
|
||||||
|
}
|
||||||
|
|
||||||
|
return failure();
|
||||||
|
});
|
||||||
patterns.onOp("Div", 14,
|
patterns.onOp("Div", 14,
|
||||||
[](OpBinder binder, ConversionPatternRewriter &rewriter) {
|
[](OpBinder binder, ConversionPatternRewriter &rewriter) {
|
||||||
Torch::ValueTensorType resultType;
|
Torch::ValueTensorType resultType;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// RUN: torch-mlir-opt <%s -convert-torch-onnx-to-torch | FileCheck %s
|
// RUN: torch-mlir-opt <%s --split-input-file -convert-torch-onnx-to-torch | FileCheck %s
|
||||||
// Generally, the test cases accumulated here come from running the importer
|
// Generally, the test cases accumulated here come from running the importer
|
||||||
// over all included backend tests that involve simple ops with no model
|
// over all included backend tests that involve simple ops with no model
|
||||||
// level constants. This is a pragmatic choice which lets us have a lot
|
// level constants. This is a pragmatic choice which lets us have a lot
|
||||||
|
@ -438,6 +438,48 @@ func.func @test_cos(%arg0: !torch.vtensor<[3,4,5],f32>) -> !torch.vtensor<[3,4,5
|
||||||
return %0 : !torch.vtensor<[3,4,5],f32>
|
return %0 : !torch.vtensor<[3,4,5],f32>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_dequantizelinear_si8
|
||||||
|
func.func @test_dequantizelinear_si8(%arg0: !torch.vtensor<[6],si8>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],si8>) -> !torch.vtensor<[6],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 19 : si64} {
|
||||||
|
%0 = torch.operator "onnx.DequantizeLinear"(%arg0, %arg1, %arg2) : (!torch.vtensor<[6],si8>, !torch.vtensor<[],f32>, !torch.vtensor<[],si8>) -> !torch.vtensor<[6],f32>
|
||||||
|
// CHECK: %[[SCALE:.+]] = torch.aten.item %arg1 : !torch.vtensor<[],f32> -> !torch.float
|
||||||
|
// CHECK: %[[ZP:.+]] = torch.aten.item %arg2 : !torch.vtensor<[],si8> -> !torch.int
|
||||||
|
// CHECK: %[[MAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[SCALE]], %[[ZP]]
|
||||||
|
// CHECK: %[[DEQ:.+]] = torch.aten.dequantize.self %[[MAKE]]
|
||||||
|
// CHECK: return %[[DEQ]]
|
||||||
|
return %0 : !torch.vtensor<[6],f32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_dequantizelinear_ui8
|
||||||
|
func.func @test_dequantizelinear_ui8(%arg0: !torch.vtensor<[6],ui8>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],ui8>) -> !torch.vtensor<[6],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 19 : si64} {
|
||||||
|
%0 = torch.operator "onnx.DequantizeLinear"(%arg0, %arg1, %arg2) : (!torch.vtensor<[6],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>) -> !torch.vtensor<[6],f32>
|
||||||
|
// CHECK: %[[SCALE:.+]] = torch.aten.item %arg1 : !torch.vtensor<[],f32> -> !torch.float
|
||||||
|
// CHECK: %[[ZP:.+]] = torch.aten.item %arg2 : !torch.vtensor<[],ui8> -> !torch.int
|
||||||
|
// CHECK: %[[MAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[SCALE]], %[[ZP]]
|
||||||
|
// CHECK: %[[DEQ:.+]] = torch.aten.dequantize.self %[[MAKE]]
|
||||||
|
// CHECK: return %[[DEQ]]
|
||||||
|
return %0 : !torch.vtensor<[6],f32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_dequantizelinear_i32
|
||||||
|
func.func @test_dequantizelinear_i32(%arg0: !torch.vtensor<[6],si32>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],si32>) -> !torch.vtensor<[6],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 19 : si64} {
|
||||||
|
%0 = torch.operator "onnx.DequantizeLinear"(%arg0, %arg1, %arg2) : (!torch.vtensor<[6],si32>, !torch.vtensor<[],f32>, !torch.vtensor<[],si32>) -> !torch.vtensor<[6],f32>
|
||||||
|
// CHECK: %[[SCALE:.+]] = torch.aten.item %arg1 : !torch.vtensor<[],f32> -> !torch.float
|
||||||
|
// CHECK: %[[ZP:.+]] = torch.aten.item %arg2 : !torch.vtensor<[],si32> -> !torch.int
|
||||||
|
// CHECK: %[[MAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[SCALE]], %[[ZP]]
|
||||||
|
// CHECK: %[[DEQ:.+]] = torch.aten.dequantize.self %[[MAKE]]
|
||||||
|
// CHECK: return %[[DEQ]]
|
||||||
|
return %0 : !torch.vtensor<[6],f32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_div_bcast
|
// CHECK-LABEL: @test_div_bcast
|
||||||
func.func @test_div_bcast(%arg0: !torch.vtensor<[3,4,5],f32>, %arg1: !torch.vtensor<[5],f32>) -> !torch.vtensor<[3,4,5],f32> attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 14 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} {
|
func.func @test_div_bcast(%arg0: !torch.vtensor<[3,4,5],f32>, %arg1: !torch.vtensor<[5],f32>) -> !torch.vtensor<[3,4,5],f32> attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 14 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} {
|
||||||
// CHECK: torch.aten.div.Tensor %arg0, %arg1 : !torch.vtensor<[3,4,5],f32>, !torch.vtensor<[5],f32> -> !torch.vtensor<[3,4,5],f32>
|
// CHECK: torch.aten.div.Tensor %arg0, %arg1 : !torch.vtensor<[3,4,5],f32>, !torch.vtensor<[5],f32> -> !torch.vtensor<[3,4,5],f32>
|
||||||
|
|
Loading…
Reference in New Issue