// RUN: npcomp-opt -tcp-bufferize <%s | FileCheck %s // CHECK-LABEL: func @tcp_broadcast_to( // CHECK-SAME: %[[TENSOR:.*]]: tensor, // CHECK-SAME: %[[SHAPE:.*]]: tensor) -> tensor { // CHECK: refback.alloc_memref %[[SHAPE]] : memref // Check for two nested loops, but don't look at more detail for now. // TODO: This pass should not create loops. Instead it should create a // buffer version of tcp.broadcast_to // CHECK: scf.for // CHECK: scf.for func @tcp_broadcast_to(%arg0: tensor, %arg1: tensor) -> tensor { %0 = tcp.broadcast_to %arg0, %arg1 : (tensor, tensor) -> tensor return %0 : tensor } // CHECK-LABEL: func @tcp_matmul( // CHECK-SAME: %[[LHS_TENSOR:.*]]: tensor, // CHECK-SAME: %[[RHS_TENSOR:.*]]: tensor) -> tensor { // CHECK: %[[LHS:.*]] = tensor_to_memref %[[LHS_TENSOR]] : memref // CHECK: %[[RHS:.*]] = tensor_to_memref %[[RHS_TENSOR]] : memref // CHECK: %[[C0:.*]] = constant 0 : index // CHECK: %[[LHS_ROWS:.*]] = dim %[[LHS_TENSOR]], %[[C0]] : tensor // CHECK: %[[C1:.*]] = constant 1 : index // CHECK: %[[RHS_COLS:.*]] = dim %[[RHS_TENSOR]], %[[C1]] : tensor // CHECK: %[[SHAPE:.*]] = tensor_from_elements %[[LHS_ROWS]], %[[RHS_COLS]] : tensor<2xindex> // CHECK: %[[RESULT:.*]] = refback.alloc_memref %[[SHAPE]] : memref // CHECK: %[[C0F32:.*]] = constant 0.000000e+00 : f32 // CHECK: linalg.fill(%[[RESULT]], %[[C0F32]]) : memref, f32 // CHECK: linalg.matmul ins(%[[LHS]], %[[RHS]] : memref, memref) outs(%[[RESULT]] : memref) // CHECK: %[[RESULT_TENSOR:.*]] = tensor_load %[[RESULT]] : memref // CHECK: return %[[RESULT_TENSOR]] : tensor // CHECK: } func @tcp_matmul(%arg0: tensor, %arg1: tensor) -> tensor { %0 = tcp.matmul %arg0, %arg1 : (tensor, tensor) -> tensor return %0 : tensor }