[MLIR][TORCH] Add E2E support for aten::max_pool2d_with_indices_backward op

This commit adds lowering of `aten::max_pool2d_with_indices_backward` op. This commit also fixes formatting issues in basic.py. Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>
2022-04-14 17:46:39 +05:30 · 2022-04-14 17:46:39 +05:30 · 1bccb4fc8a
parent 91d3e7ba15
commit 1bccb4fc8a
9 changed files with 680 additions and 30 deletions
--- a/include/torch-mlir/Conversion/Utils/Utils.h
+++ b/include/torch-mlir/Conversion/Utils/Utils.h
@ -77,6 +77,10 @@ SmallVector<Value> getTypeConvertedValues(OpBuilder &b, Location loc,
 Value convertScalarToDtype(OpBuilder &b, Location loc, Value scalar,
                           Type dtype);

+// Return the number of elements of a tensor if the shape is static; otherwise,
+// return -1.
+int64_t getNumberOfElements(RankedTensorType inputType);
+
 } // namespace Torch
 } // namespace torch
 } // namespace mlir
--- a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
+++ b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
@ -2938,6 +2938,36 @@ def Torch_AtenMaxPool2dOp : Torch_Op<"aten.max_pool2d", [
  }];
 }

+def Torch_AtenMaxPool2dWithIndicesBackwardOp : Torch_Op<"aten.max_pool2d_with_indices_backward", [
+    AllowsTypeRefinement,
+    HasValueSemantics,
+    ReadOnly
+  ]> {
+  let summary = "Generated op for `aten::max_pool2d_with_indices_backward : (Tensor, Tensor, int[], int[], int[], int[], bool, Tensor) -> (Tensor)`";
+  let arguments = (ins
+    AnyTorchTensorType:$grad_output,
+    AnyTorchTensorType:$self,
+    ListOfTorchIntType:$kernel_size,
+    ListOfTorchIntType:$stride,
+    ListOfTorchIntType:$padding,
+    ListOfTorchIntType:$dilation,
+    Torch_BoolType:$ceil_mode,
+    AnyTorchTensorType:$indices
+  );
+  let results = (outs
+    AnyTorchTensorType:$result
+  );
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDefinition = [{
+    ParseResult AtenMaxPool2dWithIndicesBackwardOp::parse(OpAsmParser &parser, OperationState &result) {
+      return parseDefaultTorchOp(parser, result, 8, 1);
+    }
+    void AtenMaxPool2dWithIndicesBackwardOp::print(OpAsmPrinter &printer) {
+      printDefaultTorchOp(printer, *this, 8, 1);
+    }
+  }];
+}
+
 def Torch_AtenSoftmaxIntOp : Torch_Op<"aten.softmax.int", [
    AllowsTypeRefinement,
    HasValueSemantics,
--- a/lib/Conversion/TorchToTMTensor/TorchToTMTensor.cpp
+++ b/lib/Conversion/TorchToTMTensor/TorchToTMTensor.cpp
@ -305,6 +305,222 @@ public:
 };
 } // namespace

+namespace {
+// The original implementation of the op is as follows:
+//
+// Indices and GradOutput Layout: [N, C, H, W] or [C, H, W]
+// Input Layout: [N, C, Hin, Win] or [C, Hin, Win]
+//
+// for i in range(N):
+//   for j in range(C):
+//       for k in range(H):
+//           for l in range(W):
+//               index = indices[i, j, k, l]
+//               result[i, j, index/Win, index%Win] += gradOutput[i, j, k, l]
+//
+//                    OR
+//
+// for i in range(C):
+//   for j in range(H):
+//       for k in range(W):
+//           index = indices[i, j, k]
+//           result[i, index/Win, index%Win] += gradOutput[i, j, k]
+//
+class ConvertAtenMaxPool2dWithIndicesBackwardOp
+    : public OpConversionPattern<AtenMaxPool2dWithIndicesBackwardOp> {
+public:
+  using OpConversionPattern::OpConversionPattern;
+  LogicalResult
+  matchAndRewrite(AtenMaxPool2dWithIndicesBackwardOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (failed(verifyLinalgCompatibleTypes(op, rewriter)))
+      return failure();
+
+    Location loc = op.getLoc();
+    MLIRContext *context = op->getContext();
+    Value gradOutput = adaptor.grad_output();
+    Value input = adaptor.self();
+    RankedTensorType gradOutputType =
+        gradOutput.getType().cast<RankedTensorType>();
+    Type gradOutputElemType = gradOutputType.getElementType();
+    RankedTensorType inputType = input.getType().cast<RankedTensorType>();
+    Type inputElemType = inputType.getElementType();
+    int64_t tensorOperandRank = inputType.getRank();
+
+    // `TMTensor::ScatterOp` expects indices of element type i32.
+    Value indices = convertTensorToDtype(
+        rewriter, loc, op.indices(),
+        mlir::IntegerType::get(context, 32, mlir::IntegerType::Signed));
+    indices = typeConverter->materializeTargetConversion(
+        rewriter, loc, typeConverter->convertType(indices.getType()), indices);
+    RankedTensorType indicesType = indices.getType().cast<RankedTensorType>();
+    Type indicesElemType = indicesType.getElementType();
+
+    // The element type of the `input` and `grad_output` should be same.
+    if (inputElemType != gradOutputElemType)
+      return rewriter.notifyMatchFailure(
+          op,
+          "Input element type should be same as the grad_output element type.");
+
+    // Since the scatter op requires indices to be a 2-d tensor, we create a new
+    // 5-d/4-d tensor (depending on the original indices layout) comprising the
+    // index values. We will collapse this tensor into a 2-d tensor. The
+    // algorithm for the creation of updated indices tensor is as follows:
+    //
+    // for i in range(N):
+    //   for j in range(C):
+    //     for k in range(H):
+    //       for l in range(W):
+    //         for m in range(4):
+    //           if m == 0:
+    //             updatedIndices[N][C][H][W][0] = i
+    //           if m == 1:
+    //             updatedIndices[N][C][H][W][1] = j
+    //           if m == 2:
+    //             updatedIndices[N][C][H][W][2] =
+    //                                      originalIndices[i, j, k, l] / Win
+    //           if m == 3:
+    //             updatedIndices[N][C][H][W][3] =
+    //                                      originalIndices[i, j, k, l] % Win
+    //
+    //                  OR
+    //
+    //  for j in range(C):
+    //   for k in range(H):
+    //     for l in range(W):
+    //       for m in range(3):
+    //         if m == 0:
+    //           updatedIndices[C][H][W][0] = i
+    //         if m == 1:
+    //          updatedIndices[C][H][W][1] = originalIndices[i, j, k, l] / Win
+    //         if m == 2:
+    //           updatedIndices[C][H][W][2] = originalIndices[i, j, k, l] % Win
+
+    SmallVector<Value> inputShape = getTensorSizes(rewriter, loc, input);
+
+    SmallVector<AffineExpr> originalIndicesDimExprs, updatedIndicesDimExprs;
+    for (int64_t i = 0; i < tensorOperandRank; i++) {
+      originalIndicesDimExprs.push_back(rewriter.getAffineDimExpr(i));
+      updatedIndicesDimExprs.push_back(rewriter.getAffineDimExpr(i));
+    }
+    updatedIndicesDimExprs.push_back(
+        rewriter.getAffineDimExpr(tensorOperandRank));
+
+    SmallVector<AffineMap> indexingMaps = AffineMap::inferFromExprList(
+        {originalIndicesDimExprs, updatedIndicesDimExprs});
+    SmallVector<StringRef> iteratorTypes(tensorOperandRank + 1, "parallel");
+
+    SmallVector<OpFoldResult> updatedIndicesShape =
+        getAsOpFoldResult(getTensorSizes(rewriter, loc, indices));
+    updatedIndicesShape.push_back(rewriter.getIndexAttr(tensorOperandRank));
+
+    Value initTensor = rewriter.create<linalg::InitTensorOp>(
+        loc, updatedIndicesShape, indicesElemType);
+
+    Value wIn = inputShape[tensorOperandRank - 1];
+    SmallVector<Value> cstValues;
+    for (int64_t i = 0; i < tensorOperandRank; i++)
+      cstValues.push_back(rewriter.create<arith::ConstantIndexOp>(loc, i));
+
+    Value updatedIndices =
+        rewriter
+            .create<linalg::GenericOp>(
+                loc, initTensor.getType(), indices, initTensor, indexingMaps,
+                iteratorTypes,
+                [tensorOperandRank, wIn, cstValues,
+                 indicesElemType](OpBuilder &b, Location loc, ValueRange args) {
+                  Value index = castIntToIndex(b, loc, args[0]);
+                  Value updatedIndex = cstValues[0];
+                  Value lastDim =
+                      b.create<linalg::IndexOp>(loc, tensorOperandRank);
+
+                  for (int64_t i = tensorOperandRank - 1; i >= 0; i--) {
+                    Value result;
+                    if (i == tensorOperandRank - 1)
+                      result = b.create<arith::RemSIOp>(loc, index, wIn);
+                    if (i == tensorOperandRank - 2)
+                      result = b.create<arith::FloorDivSIOp>(loc, index, wIn);
+                    if (i == tensorOperandRank - 3 ||
+                        i == tensorOperandRank - 4)
+                      result = b.create<linalg::IndexOp>(loc, i);
+
+                    Value pred = b.create<arith::CmpIOp>(
+                        loc, arith::CmpIPredicate::eq, lastDim, cstValues[i]);
+                    Value addAmount = b.create<arith::SelectOp>(
+                        loc, pred, result, cstValues[0]);
+                    updatedIndex =
+                        b.create<arith::AddIOp>(loc, updatedIndex, addAmount);
+                  }
+
+                  updatedIndex = b.create<arith::IndexCastOp>(
+                      loc, indicesElemType, updatedIndex);
+                  b.create<linalg::YieldOp>(loc, updatedIndex);
+                })
+            .getResult(0);
+
+    // Creating a new tensor initialized with zeros and size same as the input
+    // tensor.
+    Value outputTensor =
+        createZeroInitTensor(rewriter, loc, inputShape, inputElemType);
+
+    // Collapsing `gradOutput` into a 1-d tensor.
+    SmallVector<ReassociationIndices> reassociationCollapse(1);
+    for (auto i = 0; i < gradOutputType.getRank(); i++)
+      reassociationCollapse[0].push_back(i);
+    RankedTensorType gradOutputFlattenedType;
+    int64_t numelGradOutput = getNumberOfElements(gradOutputType);
+    gradOutputFlattenedType =
+        RankedTensorType::get({numelGradOutput}, gradOutputElemType);
+    Value gradOutputFlattened = rewriter.create<tensor::CollapseShapeOp>(
+        loc, gradOutputFlattenedType, gradOutput, reassociationCollapse);
+
+    // Collapsing updated indices into a 2-d tensor.
+    SmallVector<ReassociationIndices> reassociationCollapseIndices(2);
+    for (auto i = 0; i < tensorOperandRank; i++)
+      reassociationCollapseIndices[0].push_back(i);
+    reassociationCollapseIndices[1].push_back(tensorOperandRank);
+    int64_t numelIndices = getNumberOfElements(indicesType);
+    Value indicesCollapsed = rewriter.create<tensor::CollapseShapeOp>(
+        loc,
+        RankedTensorType::get({numelIndices, tensorOperandRank},
+                              indicesElemType),
+        updatedIndices, reassociationCollapseIndices);
+
+    bool invalidInputTypeFound = false;
+    Value scatterOp = createTMTensorScatterOp(
+        rewriter, loc, /*updates=*/gradOutputFlattened,
+        /*indices=*/indicesCollapsed, /*original=*/outputTensor,
+        /*uniqueIndices=*/false,
+        [&](OpBuilder &b, Location loc, Value valuesElement,
+            Value inputElement) {
+          Value yieldValue = valuesElement;
+          if (inputElement.getType().isa<mlir::IntegerType>()) {
+            yieldValue =
+                b.create<arith::AddIOp>(loc, inputElement, valuesElement);
+          } else if (inputElement.getType().isa<mlir::FloatType>()) {
+            yieldValue =
+                b.create<arith::AddFOp>(loc, inputElement, valuesElement);
+          } else {
+            invalidInputTypeFound = true;
+            return;
+          }
+          b.create<TMTensor::YieldOp>(loc, yieldValue);
+        });
+
+    if (invalidInputTypeFound) {
+      return rewriter.notifyMatchFailure(
+          op,
+          "unimplemented: input tensor must be of integer type or float type");
+    }
+
+    Type newResultType = getTypeConverter()->convertType(op.getType());
+    rewriter.replaceOpWithNewOp<tensor::CastOp>(op, newResultType, scatterOp);
+
+    return success();
+  }
+};
+} // namespace
+
 // -----------------------------------------------------------------------------
 // The pass
 // -----------------------------------------------------------------------------
@ -339,6 +555,9 @@ public:
    target.addIllegalOp<ValsemVariantAtenIndexPutImplOp>();
    patterns.add<ConvertValsemVariantAtenIndexPutImplOp>(typeConverter,
                                                         context);
+    target.addIllegalOp<AtenMaxPool2dWithIndicesBackwardOp>();
+    patterns.add<ConvertAtenMaxPool2dWithIndicesBackwardOp>(typeConverter,
+                                                            context);

    if (failed(applyPartialConversion(getOperation(), target,
                                      std::move(patterns))))
--- a/lib/Conversion/Utils/Utils.cpp
+++ b/lib/Conversion/Utils/Utils.cpp
@ -272,6 +272,18 @@ Value convertScalarToDtype(OpBuilder &b, Location loc, Value scalar,
  llvm_unreachable("convertScalarToDtype should handle all the types");
 }

+// Return the number of elements of a tensor if the shape is static; otherwise,
+// return -1.
+int64_t getNumberOfElements(RankedTensorType inputType) {
+  if (!inputType.hasStaticShape())
+    return -1;
+  ArrayRef<int64_t> inputShape = inputType.getShape();
+  int64_t numel = 1;
+  for (int64_t i = 0; i < inputType.getRank(); i++)
+    numel *= inputShape[i];
+  return numel;
+}
+
 } // namespace Torch
 } // namespace torch
 } // namespace mlir
--- a/lib/Dialect/Torch/Transforms/RefineTypes.cpp
+++ b/lib/Dialect/Torch/Transforms/RefineTypes.cpp
@ -521,7 +521,7 @@ ChangeResult TypeAnalyzer::visitOperation(
  }

  // Take dtype from second operand.
-  if (isa<AtenNllLossBackwardOp>(op)) {
+  if (isa<AtenNllLossBackwardOp, AtenMaxPool2dWithIndicesBackwardOp>(op)) {
    auto self = operands[1]->getValue();
    auto knowledge =
        ValueKnowledge::getNotNonePessimisticValueState(op->getContext());
--- a/lib/Dialect/Torch/Transforms/ShapeLibrary.cpp
+++ b/lib/Dialect/Torch/Transforms/ShapeLibrary.cpp
@ -1572,6 +1572,9 @@ module {
    }
    return %none : !torch.none
  }
+  func @"__torch_mlir_shape_fn.aten.max_pool2d_with_indices_backward"(%arg0: !torch.list<int>, %arg1: !torch.list<int>, %arg2: !torch.list<int>, %arg3: !torch.list<int>, %arg4: !torch.list<int>, %arg5: !torch.list<int>, %arg6: !torch.bool, %arg7: !torch.list<int>) -> !torch.list<int> {
+    return %arg1 : !torch.list<int>
+  }
  func @"__torch_mlir_shape_fn.aten.adaptive_avg_pool2d"(%arg0: !torch.list<int>, %arg1: !torch.list<int>) -> !torch.list<int> {
    %0 = call @__torch__.torch_mlir.dialects.torch.importer.jit_ir.build_tools.upstream_shape_helpers.adaptive_avg_pool2d(%arg0, %arg1) : (!torch.list<int>, !torch.list<int>) -> !torch.list<int>
    return %0 : !torch.list<int>
--- a/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/shape_lib_gen.py
+++ b/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/shape_lib_gen.py
@ -563,6 +563,9 @@ def aten〇resize_(self: List[int], size: List[int], memory_format: Optional[int
 def aten〇max_pool2d(self: List[int], kernel_size: List[int], stride: List[int] = (), padding: List[int] = (0, 0), dilation: List[int] = (1, 1), ceil_mode: bool = False) -> List[int]:
    return upstream_shape_helpers.max_pool2d(self, kernel_size, stride, padding, dilation, ceil_mode)

+def aten〇max_pool2d_with_indices_backward(grad_output: List[int], self: List[int], kernel_size: List[int], stride: List[int], padding: List[int], dilation: List[int], ceil_mode: bool, indices: List[int]) -> List[int]:
+    return self
+
 def aten〇adaptive_avg_pool2d(self: List[int], output_size: List[int]) -> List[int]:
    return upstream_shape_helpers.adaptive_avg_pool2d(self, output_size)

--- a/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/torch_ods_gen.py
+++ b/python/torch_mlir/dialects/torch/importer/jit_ir/build_tools/torch_ods_gen.py
@ -325,6 +325,9 @@ def emit_ops(emitter_td: TextEmitter, registry: Registry):
    emit(
        "aten::max_pool2d : (Tensor, int[], int[], int[], int[], bool) -> (Tensor)"
    )
+    emit(
+        "aten::max_pool2d_with_indices_backward : (Tensor, Tensor, int[], int[], int[], int[], bool, Tensor) -> (Tensor)"
+    )
    emit(
        "aten::softmax.int : (Tensor, int, int?) -> (Tensor)"
    )
--- a/python/torch_mlir_e2e_test/test_suite/basic.py
+++ b/python/torch_mlir_e2e_test/test_suite/basic.py