// RUN: npcomp-opt -lower-shaped-results-to-memref <%s -split-input-file | FileCheck %s --dump-input=fail #map0 = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @linalg_generic func @linalg_generic(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { // CHECK: %[[LHS:.*]] = tcp.tensor_to_memref %arg0 : tensor -> memref // CHECK: %[[RHS:.*]] = tcp.tensor_to_memref %arg1 : tensor -> memref // CHECK: %[[DST:.*]] = tcp.alloc_memref %arg2 : memref // CHECK: linalg.generic {{.*}} %[[LHS]], %[[RHS]], %[[DST]] // CHECK-NOT: tcp.shaped_results %0 = tcp.shaped_results %arg2 { %0 = linalg.generic {args_in = 2 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} %arg0, %arg1 { ^bb0(%arg3: f32, %arg4: f32): %8 = addf %arg3, %arg4 : f32 linalg.yield %8 : f32 } : tensor, tensor -> tensor tcp.yield %0 : tensor } : tensor -> tensor return %0 : tensor } // ----- // CHECK-LABEL: func @tcp_broadcast_to func @tcp_broadcast_to(%arg0: tensor, %arg1: tensor) -> tensor { // Check for two nested loops, but don't look at more detail for now. // TODO: This pass should not create loops. Instead it should create a // buffer version of tcp.broadcast_to // CHECK: scf.for // CHECK: scf.for // CHECK-NOT: tcp.shaped_results %0 = tcp.shaped_results %arg1 { %0 = "tcp.broadcast_to"(%arg0, %arg1) : (tensor, tensor) -> tensor tcp.yield %0 : tensor } : tensor -> tensor return %0 : tensor }