From b5387c0f29e6cce9e1586b92df4a975ad539baa4 Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Thu, 18 Jan 2024 16:47:21 -0800 Subject: [PATCH] [onnx] Lowering `onnx.dequantize_linear` to `torch` (#2759) We can make the per-tensor version of the operation to the dequantize operation via marking with the make quantized tensor component. This introductions the `qint*` and `quint*` tensor type that can be lowered to teh appropriate dequantization behavior during the torch-to-linalg conversion. --- .../TorchOnnxToTorch/DefaultDomainAtoF.cpp | 53 +++++++++++++++++++ .../TorchOnnxToTorch/simple_ops_a_to_f.mlir | 44 ++++++++++++++- 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainAtoF.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainAtoF.cpp index aa3b5fc01..c18d68105 100644 --- a/lib/Conversion/TorchOnnxToTorch/DefaultDomainAtoF.cpp +++ b/lib/Conversion/TorchOnnxToTorch/DefaultDomainAtoF.cpp @@ -1156,6 +1156,59 @@ void mlir::torch::onnx_c::populateDefaultDomainAtoF( binder.op, resultType, transposedInput, reshapeSizesList); return success(); }); + patterns.onOp( + "DequantizeLinear", 1, + [](OpBinder binder, ConversionPatternRewriter &rewriter) { + Torch::ValueTensorType resultType; + llvm::SmallVector operands; + if (binder.tensorOperands(operands, 3) || + binder.tensorResultType(resultType)) + return failure(); + + Value operand = operands[0]; + Value scale = operands[1]; + Value zeropoint = operands[2]; + + auto operandTy = operand.getType().cast(); + + auto scaleTy = scale.getType().dyn_cast(); + if (!scaleTy || !scaleTy.hasSizes()) + return rewriter.notifyMatchFailure(binder.op, "requires known rank"); + if (!resultType.hasDtype()) + return rewriter.notifyMatchFailure(binder.op, + "requires known resulty dtype"); + + if (scaleTy.getSizes().size() == 0) { + Type qTy = operandTy.getDtype(); + + if (qTy.isUnsignedInteger(8)) { + qTy = rewriter.getType(); + } else if (qTy.isSignedInteger(8)) { + qTy = rewriter.getType(); + } else if (qTy.isSignedInteger(32)) { + qTy = rewriter.getType(); + } else { + return rewriter.notifyMatchFailure(binder.op, + "unsupported result dtype"); + } + + auto qTensorTy = rewriter.getType( + resultType.getOptionalSizes(), qTy); + scale = rewriter.create( + binder.getLoc(), rewriter.getType(), scale); + zeropoint = rewriter.create( + binder.getLoc(), rewriter.getType(), zeropoint); + + auto quantize = + rewriter.create( + binder.getLoc(), qTensorTy, operand, scale, zeropoint); + rewriter.replaceOpWithNewOp( + binder.op, resultType, quantize); + return success(); + } + + return failure(); + }); patterns.onOp("Div", 14, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; diff --git a/test/Conversion/TorchOnnxToTorch/simple_ops_a_to_f.mlir b/test/Conversion/TorchOnnxToTorch/simple_ops_a_to_f.mlir index f8bc219dc..42a0fe743 100644 --- a/test/Conversion/TorchOnnxToTorch/simple_ops_a_to_f.mlir +++ b/test/Conversion/TorchOnnxToTorch/simple_ops_a_to_f.mlir @@ -1,4 +1,4 @@ -// RUN: torch-mlir-opt <%s -convert-torch-onnx-to-torch | FileCheck %s +// RUN: torch-mlir-opt <%s --split-input-file -convert-torch-onnx-to-torch | FileCheck %s // Generally, the test cases accumulated here come from running the importer // over all included backend tests that involve simple ops with no model // level constants. This is a pragmatic choice which lets us have a lot @@ -438,6 +438,48 @@ func.func @test_cos(%arg0: !torch.vtensor<[3,4,5],f32>) -> !torch.vtensor<[3,4,5 return %0 : !torch.vtensor<[3,4,5],f32> } +// ----- + +// CHECK-LABEL: @test_dequantizelinear_si8 +func.func @test_dequantizelinear_si8(%arg0: !torch.vtensor<[6],si8>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],si8>) -> !torch.vtensor<[6],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 19 : si64} { + %0 = torch.operator "onnx.DequantizeLinear"(%arg0, %arg1, %arg2) : (!torch.vtensor<[6],si8>, !torch.vtensor<[],f32>, !torch.vtensor<[],si8>) -> !torch.vtensor<[6],f32> + // CHECK: %[[SCALE:.+]] = torch.aten.item %arg1 : !torch.vtensor<[],f32> -> !torch.float + // CHECK: %[[ZP:.+]] = torch.aten.item %arg2 : !torch.vtensor<[],si8> -> !torch.int + // CHECK: %[[MAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[SCALE]], %[[ZP]] + // CHECK: %[[DEQ:.+]] = torch.aten.dequantize.self %[[MAKE]] + // CHECK: return %[[DEQ]] + return %0 : !torch.vtensor<[6],f32> +} + +// ----- + +// CHECK-LABEL: @test_dequantizelinear_ui8 +func.func @test_dequantizelinear_ui8(%arg0: !torch.vtensor<[6],ui8>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],ui8>) -> !torch.vtensor<[6],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 19 : si64} { + %0 = torch.operator "onnx.DequantizeLinear"(%arg0, %arg1, %arg2) : (!torch.vtensor<[6],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>) -> !torch.vtensor<[6],f32> + // CHECK: %[[SCALE:.+]] = torch.aten.item %arg1 : !torch.vtensor<[],f32> -> !torch.float + // CHECK: %[[ZP:.+]] = torch.aten.item %arg2 : !torch.vtensor<[],ui8> -> !torch.int + // CHECK: %[[MAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[SCALE]], %[[ZP]] + // CHECK: %[[DEQ:.+]] = torch.aten.dequantize.self %[[MAKE]] + // CHECK: return %[[DEQ]] + return %0 : !torch.vtensor<[6],f32> +} + +// ----- + +// CHECK-LABEL: @test_dequantizelinear_i32 +func.func @test_dequantizelinear_i32(%arg0: !torch.vtensor<[6],si32>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],si32>) -> !torch.vtensor<[6],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 19 : si64} { + %0 = torch.operator "onnx.DequantizeLinear"(%arg0, %arg1, %arg2) : (!torch.vtensor<[6],si32>, !torch.vtensor<[],f32>, !torch.vtensor<[],si32>) -> !torch.vtensor<[6],f32> + // CHECK: %[[SCALE:.+]] = torch.aten.item %arg1 : !torch.vtensor<[],f32> -> !torch.float + // CHECK: %[[ZP:.+]] = torch.aten.item %arg2 : !torch.vtensor<[],si32> -> !torch.int + // CHECK: %[[MAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[SCALE]], %[[ZP]] + // CHECK: %[[DEQ:.+]] = torch.aten.dequantize.self %[[MAKE]] + // CHECK: return %[[DEQ]] + return %0 : !torch.vtensor<[6],f32> +} + +// ----- + + // CHECK-LABEL: @test_div_bcast func.func @test_div_bcast(%arg0: !torch.vtensor<[3,4,5],f32>, %arg1: !torch.vtensor<[5],f32>) -> !torch.vtensor<[3,4,5],f32> attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 14 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} { // CHECK: torch.aten.div.Tensor %arg0, %arg1 : !torch.vtensor<[3,4,5],f32>, !torch.vtensor<[5],f32> -> !torch.vtensor<[3,4,5],f32>