Expands Q Commuting Ops (#3332)

After running the model tests in SHARK-TestSuite, I noticed a few model failures due to half-fusion. Notably, RDN_pytorch_vaiq_int8 had a depth=5 convolution chain with multiple AtenViewOp's.
2024-05-13 13:01:53 -05:00 · 2024-05-13 13:01:53 -05:00 · 911e723581
parent 20d4d16d32
commit 911e723581
1 changed files with 6 additions and 4 deletions
--- a/lib/Dialect/Torch/Transforms/FuseQuantizedOps.cpp
+++ b/lib/Dialect/Torch/Transforms/FuseQuantizedOps.cpp
@ -39,7 +39,8 @@ template <> struct QuantInfo<AtenReluOp> {
 bool isQCommutingOp(mlir::Operation *op) {
  // if adding a new commuting op here, be sure to add a
  // RemoveUnused pattern for that op to clean up afterwards
-  return llvm::isa<AtenTransposeIntOp, AtenReshapeOp, AtenSliceTensorOp>(op);
+  return llvm::isa<AtenTransposeIntOp, AtenReshapeOp, AtenSliceTensorOp,
+                   PrimsCollapseOp, AtenViewOp>(op);
 }

 // The following conversion takes patterns of the form [op0 -> MPTQT -> dequant
@ -372,11 +373,12 @@ public:
        RemoveUnused<AtenQuantizePerTensorOp>,
        RemoveUnused<Aten_MakePerTensorQuantizedTensorOp>,
        RemoveUnused<AtenTransposeIntOp>, RemoveUnused<AtenSliceTensorOp>,
-        RemoveUnused<AtenReshapeOp>,
-        QuantizeOperandsPastCommutingOps<AtenConvolutionOp, 0>,
+        RemoveUnused<AtenReshapeOp>, RemoveUnused<PrimsCollapseOp>,
+        RemoveUnused<AtenViewOp>,
+        QuantizeOperandsPastCommutingOps<AtenConvolutionOp, 5>,
        QuantizeOperandsPastCommutingOps<AtenReluOp, 0>,
        QuantizeOperandsPastCommutingOps<AtenMatmulOp, 2>,
-        QuantizeOperandsPastCommutingOps<AtenMmOp, 1>,
+        QuantizeOperandsPastCommutingOps<AtenMmOp, 2>,
        QuantizeAccumulator<AtenMmOp>, QuantizeAccumulator<AtenMatmulOp>,
        QuantizeResultLikeOperand<AtenReluOp>, QuantizeBias<AtenConvolutionOp>>(
        context);