[TorchToLinalg] Use `linalg.transpose` instead of `generic` in `permuteTensor` (#3872)

This PR changes the lowering to use `linalg.transpose` instead of
`linalg.generic` in `torch_to_linalg::permuteTensor`.
pull/3759/merge
Longsheng Mou 2024-11-15 17:13:14 +08:00 committed by GitHub
parent c26ca8b94d
commit 0a607a410d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 43 additions and 21 deletions

View File

@ -578,6 +578,12 @@ LogicalResult torch_to_linalg::permuteTensor(Operation *op,
int64_t inputRank = inType.getRank();
Type elementType = inType.getElementType();
// Check for 0-D tensor.
if (inputRank == 0) {
result = input;
return success();
}
// Check if the dimensions are a valid constants.
int64_t numDimensions = dimensions.size();
if (inputRank != numDimensions)
@ -596,28 +602,10 @@ LogicalResult torch_to_linalg::permuteTensor(Operation *op,
Value outVector = rewriter.create<tensor::EmptyOp>(
loc, getAsOpFoldResult(outputDims), elementType);
SmallVector<AffineExpr> idExprs;
SmallVector<AffineExpr> swapExprs;
for (uint32_t i = 0; i < inputRank; i++)
idExprs.push_back(getAffineDimExpr(i, rewriter.getContext()));
for (uint32_t i = 0; i < inputRank; i++)
swapExprs.push_back(idExprs[dimensions[i]]);
AffineMap inputMap =
AffineMap::get(inputRank, /*symbolCount=*/0, idExprs, op->getContext());
AffineMap outputMap =
AffineMap::get(inputRank, /*symbolCount=*/0, swapExprs, op->getContext());
SmallVector<AffineMap> indexingMaps{inputMap, outputMap};
SmallVector<utils::IteratorType> iteratorTypes(inputRank,
utils::IteratorType::parallel);
result = rewriter
.create<linalg::GenericOp>(
loc, outVector.getType(), input, outVector, indexingMaps,
iteratorTypes,
[](OpBuilder &b, Location loc, ValueRange args) {
b.create<linalg::YieldOp>(loc, args[0]);
})
.getResult(0);
result =
rewriter.create<linalg::TransposeOp>(loc, input, outVector, dimensions)
->getResult(0);
return success();
}

View File

@ -0,0 +1,34 @@
// RUN: torch-mlir-opt <%s -convert-torch-to-linalg -canonicalize -split-input-file -verify-diagnostics | FileCheck %s
// CHECK-LABEL: func.func @torch.aten.permute(
// CHECK-SAME: %[[VAL_0:.*]]: !torch.vtensor<[64,32,16,8,4],f32>) -> !torch.vtensor<[64,8,4,32,16],f32> {
// CHECK: %[[VAL_1:.*]] = torch_c.to_builtin_tensor %[[VAL_0]] : !torch.vtensor<[64,32,16,8,4],f32> -> tensor<64x32x16x8x4xf32>
// CHECK: %[[VAL_2:.*]] = tensor.empty() : tensor<64x8x4x32x16xf32>
// CHECK: %[[VAL_3:.*]] = linalg.transpose ins(%[[VAL_1]] : tensor<64x32x16x8x4xf32>) outs(%[[VAL_2]] : tensor<64x8x4x32x16xf32>) permutation = [0, 3, 4, 1, 2]
// CHECK: %[[VAL_4:.*]] = torch_c.from_builtin_tensor %[[VAL_3]] : tensor<64x8x4x32x16xf32> -> !torch.vtensor<[64,8,4,32,16],f32>
// CHECK: return %[[VAL_4]] : !torch.vtensor<[64,8,4,32,16],f32>
// CHECK: }
func.func @torch.aten.permute(%arg0: !torch.vtensor<[64,32,16,8,4],f32>) -> !torch.vtensor<[64,8,4,32,16],f32> {
%int0 = torch.constant.int 0
%int3 = torch.constant.int 3
%int4 = torch.constant.int 4
%int1 = torch.constant.int 1
%int2 = torch.constant.int 2
%0 = torch.prim.ListConstruct %int0, %int3, %int4, %int1, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1 = torch.aten.permute %arg0, %0 : !torch.vtensor<[64,32,16,8,4],f32>, !torch.list<int> -> !torch.vtensor<[64,8,4,32,16],f32>
return %1 : !torch.vtensor<[64,8,4,32,16],f32>
}
// -----
// CHECK-LABEL: func.func @torch.aten.permute$rank0(
// CHECK-SAME: %[[VAL_0:.*]]: !torch.vtensor<[],f32>) -> !torch.vtensor<[],f32> {
// CHECK: %[[VAL_1:.*]] = torch_c.to_builtin_tensor %[[VAL_0]] : !torch.vtensor<[],f32> -> tensor<f32>
// CHECK: %[[VAL_2:.*]] = torch_c.from_builtin_tensor %[[VAL_1]] : tensor<f32> -> !torch.vtensor<[],f32>
// CHECK: return %[[VAL_2]] : !torch.vtensor<[],f32>
// CHECK: }
func.func @torch.aten.permute$rank0(%arg0: !torch.vtensor<[],f32>) -> !torch.vtensor<[],f32> {
%0 = torch.prim.ListConstruct : () -> !torch.list<int>
%1 = torch.aten.permute %arg0, %0 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[],f32>
return %1 : !torch.vtensor<[],f32>
}