From d3fd754b93ea10e6f3a1cc46bbb471d1a1ff287a Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Mon, 29 Jan 2024 09:40:21 -0800 Subject: [PATCH] [onnx] `onnx.MatMulInteger` lowering to `torch.mm` and `quint*` types (#2761) Torch does not have an equivalent matmul operation for integers. Instead it sidechannels the information via its quantized types. For this lowering we setup these sidechannels then invoke `torch.mm`. --- .../TorchOnnxToTorch/DefaultDomainGtoP.cpp | 69 +++++++++++++++++++ .../TorchOnnxToTorch/simple_ops_g_to_p.mlir | 15 ++++ 2 files changed, 84 insertions(+) diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp index 7e3025da3..4ee71af3f 100644 --- a/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp +++ b/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp @@ -136,6 +136,75 @@ void mlir::torch::onnx_c::populateDefaultDomainGtoP( binder.op, resultType, lhs, rhs); return success(); }); + patterns.onOp( + "MatMulInteger", 10, + [](OpBinder binder, ConversionPatternRewriter &rewriter) { + Torch::ValueTensorType resultType; + Value lhs, rhs, lhsZp, rhsZp; + if (binder.tensorOperandAtIndex(lhs, 0) || + binder.tensorOperandAtIndex(rhs, 1) || + binder.tensorResultType(resultType)) + return failure(); + + if (binder.tensorOperandAtIndex(lhsZp, 2)) { + lhsZp = rewriter.create( + binder.getLoc(), rewriter.getType(), + rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0)); + } + + if (binder.tensorOperandAtIndex(rhsZp, 3)) { + rhsZp = rewriter.create( + binder.getLoc(), rewriter.getType(), + rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0)); + } + + auto lhsTy = dyn_cast(lhs.getType()); + auto rhsTy = dyn_cast(rhs.getType()); + + if (auto zpTy = dyn_cast(lhsZp.getType())) { + for (auto dim : zpTy.getSizes()) + if (dim != 1) + return failure(); + lhsZp = rewriter.create( + binder.getLoc(), rewriter.getType(), lhsZp); + } + + if (auto zpTy = dyn_cast(rhsZp.getType())) { + for (auto dim : zpTy.getSizes()) + if (dim != 1) + return failure(); + rhsZp = rewriter.create( + binder.getLoc(), rewriter.getType(), rhsZp); + } + + Value scale = rewriter.create( + binder.getLoc(), rewriter.getType(), + rewriter.getF64FloatAttr(1.0)); + + auto q = [&](Type qty) -> Type { + if (qty.isSignedInteger(8)) + return rewriter.getType(); + if (qty.isUnsignedInteger(8)) + return rewriter.getType(); + if (qty.isSignedInteger(32)) + return rewriter.getType(); + return {}; + }; + + Type lhsQTy = rewriter.getType( + lhsTy.getOptionalSizes(), q(lhsTy.getDtype())); + Type rhsQTy = rewriter.getType( + rhsTy.getOptionalSizes(), q(rhsTy.getDtype())); + + lhs = rewriter.create( + binder.getLoc(), lhsQTy, lhs, scale, lhsZp); + rhs = rewriter.create( + binder.getLoc(), rhsQTy, rhs, scale, rhsZp); + + rewriter.replaceOpWithNewOp(binder.op, resultType, lhs, + rhs); + return success(); + }); patterns.onOp("Mul", 7, [](OpBinder binder, ConversionPatternRewriter &rewriter) { Torch::ValueTensorType resultType; diff --git a/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir b/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir index 6a420300c..449b7e4fe 100644 --- a/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir +++ b/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir @@ -222,6 +222,21 @@ func.func @test_matmul_4d(%arg0: !torch.vtensor<[1,2,3,4],f32>, %arg1: !torch.vt // ----- +// CHECK-LABEL: @test_matmulinteger +func.func @test_matmulinteger(%arg0: !torch.vtensor<[4,3],ui8>, %arg1: !torch.vtensor<[3,2],ui8>, %arg2: !torch.vtensor<[1],ui8>, %arg3: !torch.vtensor<[1],ui8>) -> !torch.vtensor<[4,2],si32> attributes {torch.onnx_meta.ir_version = 5 : si64, torch.onnx_meta.opset_version = 10 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} { + %0 = torch.operator "onnx.MatMulInteger"(%arg0, %arg1, %arg2, %arg3) : (!torch.vtensor<[4,3],ui8>, !torch.vtensor<[3,2],ui8>, !torch.vtensor<[1],ui8>, !torch.vtensor<[1],ui8>) -> !torch.vtensor<[4,2],si32> + // CHECK: %[[LITEM:.+]] = torch.aten.item %arg2 + // CHECK: %[[RITEM:.+]] = torch.aten.item %arg3 + // CHECK: %[[SCALE:.+]] = torch.constant.float 1.000000e+00 + // CHECK: %[[LMAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[SCALE]], %[[LITEM]] : !torch.vtensor<[4,3],ui8>, !torch.float, !torch.int -> !torch.vtensor<[4,3],!torch.quint8> + // CHECK: %[[RMAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg1, %[[SCALE]], %[[RITEM]] : !torch.vtensor<[3,2],ui8>, !torch.float, !torch.int -> !torch.vtensor<[3,2],!torch.quint8> + // CHECK: %[[MM:.+]] = torch.aten.mm %[[LMAKE]], %[[RMAKE]] + // CHECK: return %[[MM]] + return %0 : !torch.vtensor<[4,2],si32> +} + +// ----- + // CHECK-LABEL: func.func @test_mul func.func @test_mul(%arg0: !torch.vtensor<[3,4,5],f32>, %arg1: !torch.vtensor<[3,4,5],f32>) -> !torch.vtensor<[3,4,5],f32> attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 14 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} { // CHECK: torch.aten.mul.Tensor %arg0, %arg1 : !torch.vtensor<[3,4,5],f32>, !torch.vtensor<[3,4,5],f32> -> !torch.vtensor<[3,4,5],f32>