mirror of https://github.com/llvm/torch-mlir
Expands Q Commuting Ops (#3332)
After running the model tests in SHARK-TestSuite, I noticed a few model failures due to half-fusion. Notably, RDN_pytorch_vaiq_int8 had a depth=5 convolution chain with multiple AtenViewOp's.pull/3335/head
parent
20d4d16d32
commit
911e723581
|
@ -39,7 +39,8 @@ template <> struct QuantInfo<AtenReluOp> {
|
|||
bool isQCommutingOp(mlir::Operation *op) {
|
||||
// if adding a new commuting op here, be sure to add a
|
||||
// RemoveUnused pattern for that op to clean up afterwards
|
||||
return llvm::isa<AtenTransposeIntOp, AtenReshapeOp, AtenSliceTensorOp>(op);
|
||||
return llvm::isa<AtenTransposeIntOp, AtenReshapeOp, AtenSliceTensorOp,
|
||||
PrimsCollapseOp, AtenViewOp>(op);
|
||||
}
|
||||
|
||||
// The following conversion takes patterns of the form [op0 -> MPTQT -> dequant
|
||||
|
@ -372,11 +373,12 @@ public:
|
|||
RemoveUnused<AtenQuantizePerTensorOp>,
|
||||
RemoveUnused<Aten_MakePerTensorQuantizedTensorOp>,
|
||||
RemoveUnused<AtenTransposeIntOp>, RemoveUnused<AtenSliceTensorOp>,
|
||||
RemoveUnused<AtenReshapeOp>,
|
||||
QuantizeOperandsPastCommutingOps<AtenConvolutionOp, 0>,
|
||||
RemoveUnused<AtenReshapeOp>, RemoveUnused<PrimsCollapseOp>,
|
||||
RemoveUnused<AtenViewOp>,
|
||||
QuantizeOperandsPastCommutingOps<AtenConvolutionOp, 5>,
|
||||
QuantizeOperandsPastCommutingOps<AtenReluOp, 0>,
|
||||
QuantizeOperandsPastCommutingOps<AtenMatmulOp, 2>,
|
||||
QuantizeOperandsPastCommutingOps<AtenMmOp, 1>,
|
||||
QuantizeOperandsPastCommutingOps<AtenMmOp, 2>,
|
||||
QuantizeAccumulator<AtenMmOp>, QuantizeAccumulator<AtenMatmulOp>,
|
||||
QuantizeResultLikeOperand<AtenReluOp>, QuantizeBias<AtenConvolutionOp>>(
|
||||
context);
|
||||
|
|
Loading…
Reference in New Issue