Expands Q Commuting Ops (#3332)

After running the model tests in SHARK-TestSuite, I noticed a few model
failures due to half-fusion.

Notably, RDN_pytorch_vaiq_int8 had a depth=5 convolution chain with
multiple AtenViewOp's.
pull/3335/head
zjgarvey 2024-05-13 13:01:53 -05:00 committed by GitHub
parent 20d4d16d32
commit 911e723581
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 6 additions and 4 deletions

View File

@ -39,7 +39,8 @@ template <> struct QuantInfo<AtenReluOp> {
bool isQCommutingOp(mlir::Operation *op) {
// if adding a new commuting op here, be sure to add a
// RemoveUnused pattern for that op to clean up afterwards
return llvm::isa<AtenTransposeIntOp, AtenReshapeOp, AtenSliceTensorOp>(op);
return llvm::isa<AtenTransposeIntOp, AtenReshapeOp, AtenSliceTensorOp,
PrimsCollapseOp, AtenViewOp>(op);
}
// The following conversion takes patterns of the form [op0 -> MPTQT -> dequant
@ -372,11 +373,12 @@ public:
RemoveUnused<AtenQuantizePerTensorOp>,
RemoveUnused<Aten_MakePerTensorQuantizedTensorOp>,
RemoveUnused<AtenTransposeIntOp>, RemoveUnused<AtenSliceTensorOp>,
RemoveUnused<AtenReshapeOp>,
QuantizeOperandsPastCommutingOps<AtenConvolutionOp, 0>,
RemoveUnused<AtenReshapeOp>, RemoveUnused<PrimsCollapseOp>,
RemoveUnused<AtenViewOp>,
QuantizeOperandsPastCommutingOps<AtenConvolutionOp, 5>,
QuantizeOperandsPastCommutingOps<AtenReluOp, 0>,
QuantizeOperandsPastCommutingOps<AtenMatmulOp, 2>,
QuantizeOperandsPastCommutingOps<AtenMmOp, 1>,
QuantizeOperandsPastCommutingOps<AtenMmOp, 2>,
QuantizeAccumulator<AtenMmOp>, QuantizeAccumulator<AtenMatmulOp>,
QuantizeResultLikeOperand<AtenReluOp>, QuantizeBias<AtenConvolutionOp>>(
context);