torch-mlir/test/Dialect/TCP/bufferize.mlir

// RUN: npcomp-opt -tcp-bufferize <%s | FileCheck %s

// CHECK-LABEL:   func @tcp_broadcast_to(
// CHECK-SAME:                           %[[TENSOR:.*]]: tensor<?xf32>,
// CHECK-SAME:                           %[[SHAPE:.*]]: tensor<?xindex>) -> tensor<?x?xf32> {
// CHECK:           refback.alloc_memref %[[SHAPE]] : memref<?x?xf32>
// Check for two nested loops, but don't look at more detail for now.
// TODO: This pass should not create loops. Instead it should create a
// buffer version of tcp.broadcast_to
// CHECK:           scf.for
// CHECK:             scf.for
func @tcp_broadcast_to(%arg0: tensor<?xf32>, %arg1: tensor<?xindex>) -> tensor<?x?xf32> {
  %0 = tcp.broadcast_to %arg0, %arg1 : (tensor<?xf32>, tensor<?xindex>) -> tensor<?x?xf32>
  return %0 : tensor<?x?xf32>
}

// CHECK-LABEL:   func @tcp_add(
// CHECK-SAME:                  %[[LHS_TENSOR:.*]]: tensor<?xf32>,
// CHECK-SAME:                  %[[RHS_TENSOR:.*]]: tensor<?xf32>) -> tensor<?xf32> {
// CHECK:           %[[LHS:.*]] = tensor_to_memref %[[LHS_TENSOR]] : memref<?xf32>
// CHECK:           %[[RHS:.*]] = tensor_to_memref %[[RHS_TENSOR]] : memref<?xf32>
// CHECK:           %[[SHAPE:.*]] = shape.shape_of %[[LHS_TENSOR]] : tensor<?xf32> -> tensor<?xindex>
// CHECK:           %[[RESULT:.*]] = refback.alloc_memref %[[SHAPE]] : memref<?xf32>
// CHECK:           linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} ins(%[[LHS]], %[[RHS]] : memref<?xf32>, memref<?xf32>) outs(%[[RESULT]] : memref<?xf32>) {
// CHECK:           ^bb0(%[[LHS_SCALR:.*]]: f32, %[[RHS_SCALAR:.*]]: f32, %{{.*}}: f32):
// CHECK:             %[[RESULT_SCALAR:.*]] = addf %[[LHS_SCALR]], %[[RHS_SCALAR]] : f32
// CHECK:             linalg.yield %[[RESULT_SCALAR]] : f32
// CHECK:           }
// CHECK:           %[[RESULT_TENSOR:.*]] = tensor_load %[[RESULT]] : memref<?xf32>
// CHECK:           return %[[RESULT_TENSOR]] : tensor<?xf32>
// CHECK:         }
func @tcp_add(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
  %0 = tcp.add %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
  return %0 : tensor<?xf32>
}

// CHECK-LABEL:   func @tcp_matmul(
// CHECK-SAME:                     %[[LHS_TENSOR:.*]]: tensor<?x?xf32>,
// CHECK-SAME:                     %[[RHS_TENSOR:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
// CHECK:           %[[LHS:.*]] = tensor_to_memref %[[LHS_TENSOR]] : memref<?x?xf32>
// CHECK:           %[[RHS:.*]] = tensor_to_memref %[[RHS_TENSOR]] : memref<?x?xf32>
// CHECK:           %[[C0:.*]] = constant 0 : index
// CHECK:           %[[LHS_ROWS:.*]] = dim %[[LHS_TENSOR]], %[[C0]] : tensor<?x?xf32>
// CHECK:           %[[C1:.*]] = constant 1 : index
// CHECK:           %[[RHS_COLS:.*]] = dim %[[RHS_TENSOR]], %[[C1]] : tensor<?x?xf32>
// CHECK:           %[[SHAPE:.*]] = tensor_from_elements %[[LHS_ROWS]], %[[RHS_COLS]] : tensor<2xindex>
// CHECK:           %[[RESULT:.*]] = refback.alloc_memref %[[SHAPE]] : memref<?x?xf32>
// CHECK:           %[[C0F32:.*]] = constant 0.000000e+00 : f32
// CHECK:           linalg.fill(%[[RESULT]], %[[C0F32]]) : memref<?x?xf32>, f32
// CHECK:           linalg.matmul ins(%[[LHS]], %[[RHS]] : memref<?x?xf32>, memref<?x?xf32>) outs(%[[RESULT]] : memref<?x?xf32>)
// CHECK:           %[[RESULT_TENSOR:.*]] = tensor_load %[[RESULT]] : memref<?x?xf32>
// CHECK:           return %[[RESULT_TENSOR]] : tensor<?x?xf32>
// CHECK:         }
func @tcp_matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
  %0 = tcp.matmul %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
  return %0 : tensor<?x?xf32>
}
[RefBackend] Use more idiomatic bufferize pattern for TCP. The time has come for BypassShapes/LowerShapedResultsToMemref to go away :( For the reference backend, being consistent with upstream conventions is the name of the game now. This is a step down in a number of ways, e.g. test clarity and separation of concerns. But it is fewer files and fewer tests, and does address the "TODO: This is really fragile". It also eliminates two more ops from the refback dialect (sadly, they are the shaped_results/yield that we were getting kind of fond of, but alas). 2020-10-16 03:26:21 +08:00			`// RUN: npcomp-opt -tcp-bufferize <%s \| FileCheck %s`

			`// CHECK-LABEL: func @tcp_broadcast_to(`
			`// CHECK-SAME: %[[TENSOR:.*]]: tensor<?xf32>,`
			`// CHECK-SAME: %[[SHAPE:.*]]: tensor<?xindex>) -> tensor<?x?xf32> {`
			`// CHECK: refback.alloc_memref %[[SHAPE]] : memref<?x?xf32>`
			`// Check for two nested loops, but don't look at more detail for now.`
			`// TODO: This pass should not create loops. Instead it should create a`
			`// buffer version of tcp.broadcast_to`
			`// CHECK: scf.for`
			`// CHECK: scf.for`
			`func @tcp_broadcast_to(%arg0: tensor<?xf32>, %arg1: tensor<?xindex>) -> tensor<?x?xf32> {`
			`%0 = tcp.broadcast_to %arg0, %arg1 : (tensor<?xf32>, tensor<?xindex>) -> tensor<?x?xf32>`
			`return %0 : tensor<?x?xf32>`
			`}`

			`// CHECK-LABEL: func @tcp_add(`
			`// CHECK-SAME: %[[LHS_TENSOR:.*]]: tensor<?xf32>,`
			`// CHECK-SAME: %[[RHS_TENSOR:.*]]: tensor<?xf32>) -> tensor<?xf32> {`
			`// CHECK: %[[LHS:.*]] = tensor_to_memref %[[LHS_TENSOR]] : memref<?xf32>`
			`// CHECK: %[[RHS:.*]] = tensor_to_memref %[[RHS_TENSOR]] : memref<?xf32>`
			`// CHECK: %[[SHAPE:.*]] = shape.shape_of %[[LHS_TENSOR]] : tensor<?xf32> -> tensor<?xindex>`
			`// CHECK: %[[RESULT:.*]] = refback.alloc_memref %[[SHAPE]] : memref<?xf32>`
			`// CHECK: linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} ins(%[[LHS]], %[[RHS]] : memref<?xf32>, memref<?xf32>) outs(%[[RESULT]] : memref<?xf32>) {`
			`// CHECK: ^bb0(%[[LHS_SCALR:.]]: f32, %[[RHS_SCALAR:.]]: f32, %{{.*}}: f32):`
			`// CHECK: %[[RESULT_SCALAR:.*]] = addf %[[LHS_SCALR]], %[[RHS_SCALAR]] : f32`
			`// CHECK: linalg.yield %[[RESULT_SCALAR]] : f32`
			`// CHECK: }`
			`// CHECK: %[[RESULT_TENSOR:.*]] = tensor_load %[[RESULT]] : memref<?xf32>`
			`// CHECK: return %[[RESULT_TENSOR]] : tensor<?xf32>`
			`// CHECK: }`
			`func @tcp_add(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {`
			`%0 = tcp.add %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>`
			`return %0 : tensor<?xf32>`
			`}`

			`// CHECK-LABEL: func @tcp_matmul(`
			`// CHECK-SAME: %[[LHS_TENSOR:.*]]: tensor<?x?xf32>,`
			`// CHECK-SAME: %[[RHS_TENSOR:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32> {`
			`// CHECK: %[[LHS:.*]] = tensor_to_memref %[[LHS_TENSOR]] : memref<?x?xf32>`
			`// CHECK: %[[RHS:.*]] = tensor_to_memref %[[RHS_TENSOR]] : memref<?x?xf32>`
			`// CHECK: %[[C0:.*]] = constant 0 : index`
			`// CHECK: %[[LHS_ROWS:.*]] = dim %[[LHS_TENSOR]], %[[C0]] : tensor<?x?xf32>`
			`// CHECK: %[[C1:.*]] = constant 1 : index`
			`// CHECK: %[[RHS_COLS:.*]] = dim %[[RHS_TENSOR]], %[[C1]] : tensor<?x?xf32>`
			`// CHECK: %[[SHAPE:.*]] = tensor_from_elements %[[LHS_ROWS]], %[[RHS_COLS]] : tensor<2xindex>`
			`// CHECK: %[[RESULT:.*]] = refback.alloc_memref %[[SHAPE]] : memref<?x?xf32>`
			`// CHECK: %[[C0F32:.*]] = constant 0.000000e+00 : f32`
			`// CHECK: linalg.fill(%[[RESULT]], %[[C0F32]]) : memref<?x?xf32>, f32`
			`// CHECK: linalg.matmul ins(%[[LHS]], %[[RHS]] : memref<?x?xf32>, memref<?x?xf32>) outs(%[[RESULT]] : memref<?x?xf32>)`
			`// CHECK: %[[RESULT_TENSOR:.*]] = tensor_load %[[RESULT]] : memref<?x?xf32>`
			`// CHECK: return %[[RESULT_TENSOR]] : tensor<?x?xf32>`
			`// CHECK: }`
			`func @tcp_matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {`
			`%0 = tcp.matmul %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>`
			`return %0 : tensor<?x?xf32>`
			`}`