mirror of https://github.com/llvm/torch-mlir
[onnx] `onnx.MatMulInteger` lowering to `torch.mm` and `quint*` types (#2761)
Torch does not have an equivalent matmul operation for integers. Instead it sidechannels the information via its quantized types. For this lowering we setup these sidechannels then invoke `torch.mm`.pull/2812/head
parent
67cb2e7341
commit
d3fd754b93
|
@ -136,6 +136,75 @@ void mlir::torch::onnx_c::populateDefaultDomainGtoP(
|
|||
binder.op, resultType, lhs, rhs);
|
||||
return success();
|
||||
});
|
||||
patterns.onOp(
|
||||
"MatMulInteger", 10,
|
||||
[](OpBinder binder, ConversionPatternRewriter &rewriter) {
|
||||
Torch::ValueTensorType resultType;
|
||||
Value lhs, rhs, lhsZp, rhsZp;
|
||||
if (binder.tensorOperandAtIndex(lhs, 0) ||
|
||||
binder.tensorOperandAtIndex(rhs, 1) ||
|
||||
binder.tensorResultType(resultType))
|
||||
return failure();
|
||||
|
||||
if (binder.tensorOperandAtIndex(lhsZp, 2)) {
|
||||
lhsZp = rewriter.create<Torch::ConstantIntOp>(
|
||||
binder.getLoc(), rewriter.getType<Torch::IntType>(),
|
||||
rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));
|
||||
}
|
||||
|
||||
if (binder.tensorOperandAtIndex(rhsZp, 3)) {
|
||||
rhsZp = rewriter.create<Torch::ConstantIntOp>(
|
||||
binder.getLoc(), rewriter.getType<Torch::IntType>(),
|
||||
rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));
|
||||
}
|
||||
|
||||
auto lhsTy = dyn_cast<Torch::ValueTensorType>(lhs.getType());
|
||||
auto rhsTy = dyn_cast<Torch::ValueTensorType>(rhs.getType());
|
||||
|
||||
if (auto zpTy = dyn_cast<Torch::ValueTensorType>(lhsZp.getType())) {
|
||||
for (auto dim : zpTy.getSizes())
|
||||
if (dim != 1)
|
||||
return failure();
|
||||
lhsZp = rewriter.create<Torch::AtenItemOp>(
|
||||
binder.getLoc(), rewriter.getType<Torch::IntType>(), lhsZp);
|
||||
}
|
||||
|
||||
if (auto zpTy = dyn_cast<Torch::ValueTensorType>(rhsZp.getType())) {
|
||||
for (auto dim : zpTy.getSizes())
|
||||
if (dim != 1)
|
||||
return failure();
|
||||
rhsZp = rewriter.create<Torch::AtenItemOp>(
|
||||
binder.getLoc(), rewriter.getType<Torch::IntType>(), rhsZp);
|
||||
}
|
||||
|
||||
Value scale = rewriter.create<Torch::ConstantFloatOp>(
|
||||
binder.getLoc(), rewriter.getType<Torch::FloatType>(),
|
||||
rewriter.getF64FloatAttr(1.0));
|
||||
|
||||
auto q = [&](Type qty) -> Type {
|
||||
if (qty.isSignedInteger(8))
|
||||
return rewriter.getType<Torch::QInt8Type>();
|
||||
if (qty.isUnsignedInteger(8))
|
||||
return rewriter.getType<Torch::QUInt8Type>();
|
||||
if (qty.isSignedInteger(32))
|
||||
return rewriter.getType<Torch::QInt32Type>();
|
||||
return {};
|
||||
};
|
||||
|
||||
Type lhsQTy = rewriter.getType<Torch::ValueTensorType>(
|
||||
lhsTy.getOptionalSizes(), q(lhsTy.getDtype()));
|
||||
Type rhsQTy = rewriter.getType<Torch::ValueTensorType>(
|
||||
rhsTy.getOptionalSizes(), q(rhsTy.getDtype()));
|
||||
|
||||
lhs = rewriter.create<Torch::Aten_MakePerTensorQuantizedTensorOp>(
|
||||
binder.getLoc(), lhsQTy, lhs, scale, lhsZp);
|
||||
rhs = rewriter.create<Torch::Aten_MakePerTensorQuantizedTensorOp>(
|
||||
binder.getLoc(), rhsQTy, rhs, scale, rhsZp);
|
||||
|
||||
rewriter.replaceOpWithNewOp<Torch::AtenMmOp>(binder.op, resultType, lhs,
|
||||
rhs);
|
||||
return success();
|
||||
});
|
||||
patterns.onOp("Mul", 7,
|
||||
[](OpBinder binder, ConversionPatternRewriter &rewriter) {
|
||||
Torch::ValueTensorType resultType;
|
||||
|
|
|
@ -222,6 +222,21 @@ func.func @test_matmul_4d(%arg0: !torch.vtensor<[1,2,3,4],f32>, %arg1: !torch.vt
|
|||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: @test_matmulinteger
|
||||
func.func @test_matmulinteger(%arg0: !torch.vtensor<[4,3],ui8>, %arg1: !torch.vtensor<[3,2],ui8>, %arg2: !torch.vtensor<[1],ui8>, %arg3: !torch.vtensor<[1],ui8>) -> !torch.vtensor<[4,2],si32> attributes {torch.onnx_meta.ir_version = 5 : si64, torch.onnx_meta.opset_version = 10 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} {
|
||||
%0 = torch.operator "onnx.MatMulInteger"(%arg0, %arg1, %arg2, %arg3) : (!torch.vtensor<[4,3],ui8>, !torch.vtensor<[3,2],ui8>, !torch.vtensor<[1],ui8>, !torch.vtensor<[1],ui8>) -> !torch.vtensor<[4,2],si32>
|
||||
// CHECK: %[[LITEM:.+]] = torch.aten.item %arg2
|
||||
// CHECK: %[[RITEM:.+]] = torch.aten.item %arg3
|
||||
// CHECK: %[[SCALE:.+]] = torch.constant.float 1.000000e+00
|
||||
// CHECK: %[[LMAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[SCALE]], %[[LITEM]] : !torch.vtensor<[4,3],ui8>, !torch.float, !torch.int -> !torch.vtensor<[4,3],!torch.quint8>
|
||||
// CHECK: %[[RMAKE:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg1, %[[SCALE]], %[[RITEM]] : !torch.vtensor<[3,2],ui8>, !torch.float, !torch.int -> !torch.vtensor<[3,2],!torch.quint8>
|
||||
// CHECK: %[[MM:.+]] = torch.aten.mm %[[LMAKE]], %[[RMAKE]]
|
||||
// CHECK: return %[[MM]]
|
||||
return %0 : !torch.vtensor<[4,2],si32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func.func @test_mul
|
||||
func.func @test_mul(%arg0: !torch.vtensor<[3,4,5],f32>, %arg1: !torch.vtensor<[3,4,5],f32>) -> !torch.vtensor<[3,4,5],f32> attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 14 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} {
|
||||
// CHECK: torch.aten.mul.Tensor %arg0, %arg1 : !torch.vtensor<[3,4,5],f32>, !torch.vtensor<[3,4,5],f32> -> !torch.vtensor<[3,4,5],f32>
|
||||
|
|
Loading…
Reference in New Issue