torch-mlir/test/Dialect/TMTensor/bufferize.mlir

// RUN: torch-mlir-opt -split-input-file -tm-tensor-bufferize %s | FileCheck %s

// -----
// CHECK-LABEL:   func.func @scan_1d_inclusive(
// CHECK-SAME:            %[[IN_TENSOR:.*]]: tensor<128xi32>, %[[OUT_TENSOR:.*]]: tensor<128xi32>,
// CHECK-SAME:            %[[ACC_TENSOR:.*]]: tensor<i32>) -> (tensor<128xi32>, tensor<i32>) {
// CHECK:           %[[IN_MEMREF:.*]] = bufferization.to_memref %[[IN_TENSOR]] : memref<128xi32>
// CHECK:           %[[OUT_MEMREF_NEW:.*]] = memref.alloc() : memref<128xi32>
// CHECK:           %[[ACC_MEMREF_NEW:.*]] = memref.alloc() : memref<i32>
// CHECK:           tm_tensor.scan dimension(0) inclusive(true) ins(%[[IN_MEMREF]] : memref<128xi32>)
// CHECK-SAME:            outs(%[[OUT_MEMREF_NEW]], %[[ACC_MEMREF_NEW]] : memref<128xi32>, memref<i32>) {
// CHECK:           ^bb0(%[[OUT_PREV_ELEMENT:.*]]: i32, %[[IN_ELEMENT:.*]]: i32):
// CHECK:             %[[OUT_CURRENT_ELEMENT:.*]] = arith.addi %[[OUT_PREV_ELEMENT]], %[[IN_ELEMENT]] : i32
// CHECK:             tm_tensor.yield %[[OUT_CURRENT_ELEMENT]] : i32
// CHECK:           }
// CHECK:           %[[OUT_TENSOR_NEW:.*]] = bufferization.to_tensor %[[OUT_MEMREF_NEW]] : memref<128xi32>
// CHECK:           %[[ACC_TENSOR_NEW:.*]] = bufferization.to_tensor %[[ACC_MEMREF_NEW]] : memref<i32>
// CHECK:           return %[[OUT_TENSOR_NEW]], %[[ACC_TENSOR_NEW]] : tensor<128xi32>, tensor<i32>
func.func @scan_1d_inclusive(%in: tensor<128xi32>, %out: tensor<128xi32>, %acc: tensor<i32>) -> (tensor<128xi32>, tensor<i32>) {
  %ret_out, %ret_acc = tm_tensor.scan dimension(0) inclusive(true)
    ins(%in : tensor<128xi32>) outs(%out, %acc: tensor<128xi32>, tensor<i32>) {
    ^bb0(%arg0 : i32, %arg1 : i32):
      %sum = arith.addi %arg0, %arg1 : i32
      tm_tensor.yield %sum : i32
  } -> tensor<128xi32>, tensor<i32>
  return %ret_out, %ret_acc: tensor<128xi32>, tensor<i32>
}

// -----
// CHECK-LABEL:   func.func @scan_1d_exclusive(
// CHECK-SAME:            %[[IN_TENSOR:.*]]: tensor<128xi32>, %[[OUT_TENSOR:.*]]: tensor<128xi32>,
// CHECK-SAME:            %[[ACC_TENSOR:.*]]: tensor<i32>) -> (tensor<128xi32>, tensor<i32>) {
// CHECK:           %[[IN_MEMREF:.*]] = bufferization.to_memref %[[IN_TENSOR]] : memref<128xi32>
// CHECK:           %[[ACC_MEMREF:.*]] = bufferization.to_memref %[[ACC_TENSOR]] : memref<i32>
// CHECK:           %[[OUT_MEMREF_NEW:.*]] = memref.alloc() : memref<128xi32>
// CHECK:           %[[ACC_MEMREF_NEW:.*]] = memref.alloc() : memref<i32>
// CHECK:           memref.copy %[[ACC_MEMREF]], %[[ACC_MEMREF_NEW]] : memref<i32> to memref<i32>
// CHECK:           tm_tensor.scan dimension(0) inclusive(false) ins(%[[IN_MEMREF]] : memref<128xi32>)
// CHECK-SAME:            outs(%[[OUT_MEMREF_NEW]], %[[ACC_MEMREF_NEW]] : memref<128xi32>, memref<i32>) {
// CHECK:           ^bb0(%[[OUT_PREV_ELEMENT:.*]]: i32, %[[IN_ELEMENT:.*]]: i32):
// CHECK:             %[[OUT_CURRENT_ELEMENT:.*]] = arith.addi %[[OUT_PREV_ELEMENT]], %[[IN_ELEMENT]] : i32
// CHECK:             tm_tensor.yield %[[OUT_CURRENT_ELEMENT]] : i32
// CHECK:           }
// CHECK:           %[[OUT_TENSOR_NEW:.*]] = bufferization.to_tensor %[[OUT_MEMREF_NEW]] : memref<128xi32>
// CHECK:           %[[ACC_TENSOR_NEW:.*]] = bufferization.to_tensor %[[ACC_MEMREF_NEW]] : memref<i32>
// CHECK:           return %[[OUT_TENSOR_NEW]], %[[ACC_TENSOR_NEW]] : tensor<128xi32>, tensor<i32>
func.func @scan_1d_exclusive(%in: tensor<128xi32>, %out: tensor<128xi32>, %acc: tensor<i32>) -> (tensor<128xi32>, tensor<i32>) {
  %ret_out, %ret_acc = tm_tensor.scan dimension(0) inclusive(false)
    ins(%in : tensor<128xi32>) outs(%out, %acc: tensor<128xi32>, tensor<i32>) {
    ^bb0(%arg0 : i32, %arg1 : i32):
      %sum = arith.addi %arg0, %arg1 : i32
      tm_tensor.yield %sum : i32
  } -> tensor<128xi32>, tensor<i32>
  return %ret_out, %ret_acc: tensor<128xi32>, tensor<i32>
}

// -----
// CHECK-LABEL:   func.func @scatter_update_scalar_1D(
// CHECK-SAME:            %[[ORIG_TENSOR:.*]]: tensor<8xi32>,
// CHECK-SAME:            %[[INDICES_TENSOR:.*]]: tensor<3x1xi32>,
// CHECK-SAME:            %[[UPDATES_TENSOR:.*]]: tensor<3xi32>) -> tensor<8xi32> {
// CHECK:           %[[UPDATES_MEMREF:.*]] = bufferization.to_memref %[[UPDATES_TENSOR]] : memref<3xi32>
// CHECK:           %[[INDICES_MEMREF:.*]] = bufferization.to_memref %[[INDICES_TENSOR]] : memref<3x1xi32>
// CHECK:           %[[ORIG_MEMREF:.*]] = bufferization.to_memref %[[ORIG_TENSOR]] : memref<8xi32>
// CHECK:           %[[ORIG_MEMREF_NEW:.*]] = memref.alloc() : memref<8xi32>
// CHECK:           memref.copy %[[ORIG_MEMREF]], %[[ORIG_MEMREF_NEW]] : memref<8xi32> to memref<8xi32>
// CHECK:           tm_tensor.scatter unique_indices(true) ins(%[[UPDATES_MEMREF]], %[[INDICES_MEMREF]]
// CHECK-SAME:        : memref<3xi32>, memref<3x1xi32>) outs(%[[ORIG_MEMREF_NEW]] : memref<8xi32>) {
// CHECK:           ^bb0(%[[UPDATE_SCALAR:.*]]: i32, %[[ORIG_SCALAR:.*]]: i32):
// CHECK:             tm_tensor.yield %[[UPDATE_SCALAR]] : i32
// CHECK:           }
// CHECK:           %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ORIG_MEMREF_NEW]] : memref<8xi32>
// CHECK:           return %[[OUT_TENSOR]] : tensor<8xi32>
func.func @scatter_update_scalar_1D(
    %original: tensor<8xi32>, %indices: tensor<3x1xi32>,
    %updates: tensor<3xi32>) -> tensor<8xi32> {
  %0 = tm_tensor.scatter unique_indices(true)
    ins(%updates, %indices : tensor<3xi32>, tensor<3x1xi32>)
    outs(%original : tensor<8xi32>)  {
  ^bb0(%update: i32, %orig: i32):  // no predecessors
    tm_tensor.yield %update: i32
  } -> tensor<8xi32>
  return %0 : tensor<8xi32>
}

// CHECK-LABEL:   func.func @scatter_add_scalar_1D(
// CHECK-SAME:            %[[ORIG_TENSOR:.*]]: tensor<8xi32>,
// CHECK-SAME:            %[[INDICES_TENSOR:.*]]: tensor<3x1xi32>,
// CHECK-SAME:            %[[UPDATES_TENSOR:.*]]: tensor<3xi32>) -> tensor<8xi32> {
// CHECK:           %[[UPDATES_MEMREF:.*]] = bufferization.to_memref %[[UPDATES_TENSOR]] : memref<3xi32>
// CHECK:           %[[INDICES_MEMREF:.*]] = bufferization.to_memref %[[INDICES_TENSOR]] : memref<3x1xi32>
// CHECK:           %[[ORIG_MEMREF:.*]] = bufferization.to_memref %[[ORIG_TENSOR]] : memref<8xi32>
// CHECK:           %[[ORIG_MEMREF_NEW:.*]] = memref.alloc() : memref<8xi32>
// CHECK:           memref.copy %[[ORIG_MEMREF]], %[[ORIG_MEMREF_NEW]] : memref<8xi32> to memref<8xi32>
// CHECK:           tm_tensor.scatter unique_indices(true) ins(%[[UPDATES_MEMREF]], %[[INDICES_MEMREF]]
// CHECK-SAME:        : memref<3xi32>, memref<3x1xi32>) outs(%[[ORIG_MEMREF_NEW]] : memref<8xi32>) {
// CHECK:           ^bb0(%[[UPDATE_SCALAR:.*]]: i32, %[[ORIG_SCALAR:.*]]: i32):
// CHECK:             %[[CST1:.*]] = arith.constant 1 : i32
// CHECK:             %[[ADD:.*]] = arith.addi %[[ORIG_SCALAR]], %[[CST1]] : i32
// CHECK:             tm_tensor.yield %[[ADD]] : i32
// CHECK:           }
// CHECK:           %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ORIG_MEMREF_NEW]] : memref<8xi32>
// CHECK:           return %[[OUT_TENSOR]] : tensor<8xi32>
func.func @scatter_add_scalar_1D(
    %original: tensor<8xi32>, %indices: tensor<3x1xi32>,
    %updates: tensor<3xi32>) -> tensor<8xi32> {
  %0 = tm_tensor.scatter unique_indices(true)
    ins(%updates, %indices : tensor<3xi32>, tensor<3x1xi32>)
    outs(%original : tensor<8xi32>)  {
  ^bb0(%update: i32, %orig: i32):  // no predecessors
    %cst1 = arith.constant 1: i32
    %add = arith.addi %orig, %cst1: i32
    tm_tensor.yield %add: i32
  } -> tensor<8xi32>
  return %0 : tensor<8xi32>
}
Re-organize project structure to separate PyTorch dependencies from core project. (#2542) This is a first step towards the structure we discussed here: https://gist.github.com/stellaraccident/931b068aaf7fa56f34069426740ebf20 There are two primary goals: 1. Separate the core project (C++ dialects and conversions) from the hard PyTorch dependencies. We move all such things into projects/pt1 as a starting point since they are presently entangled with PT1-era APIs. Additional work can be done to disentangle components from that (specifically LTC is identified as likely ultimately living in a `projects/ltc`). 2. Create space for native PyTorch2 Dynamo-based infra to be upstreamed without needing to co-exist with the original TorchScript path. Very little changes in this path with respect to build layering or options. These can be updated in a followup without commingling directory structure changes. This also takes steps toward a couple of other layering enhancements: * Removes the llvm-external-projects/torch-mlir-dialects sub-project, collapsing it into the main tree. * Audits and fixes up the core C++ build to account for issues found while moving things. This is just an opportunistic pass through but roughly ~halves the number of build actions for the project from the high 4000's to the low 2000's. It deviates from the discussed plan by having a `projects/` tree instead of `compat/`. As I was thinking about it, this will better accommodate the follow-on code movement. Once things are roughly in place and the CI passing, followups will focus on more in-situ fixes and cleanups. 2023-11-03 10:45:55 +08:00			`// RUN: torch-mlir-opt -split-input-file -tm-tensor-bufferize %s \| FileCheck %s`
Add bufferization pass for TMTensor ops The pass is mostly borrowed from the BufferizeAnyLinalgOp pass in mlir upstream with some minor changes. At a high level, it's a naive partial bufferization pass which allocate new buffers for all the output tensors. The initial value of an output buffer is copied from the original buffer if there are uses of the original value. One difference from linalg bufferization pass is the way to tell if the loop body uses the init value of output operand. For TMTensor ops, it differs from op to op because the payload region doesn't represent the entire loop body. 2022-02-26 07:04:33 +08:00
			`// -----`
mlir: bump llvm tag to 5380e3 (#856) In addition to updating the llvm-project submodule, this patch also: 1. updates shape functions and tests so that `func` and `call` operations refer to the `func` dialect 2. avoid duplicate registration of dialects 2022-05-17 03:54:35 +08:00			`// CHECK-LABEL: func.func @scan_1d_inclusive(`
Add bufferization pass for TMTensor ops The pass is mostly borrowed from the BufferizeAnyLinalgOp pass in mlir upstream with some minor changes. At a high level, it's a naive partial bufferization pass which allocate new buffers for all the output tensors. The initial value of an output buffer is copied from the original buffer if there are uses of the original value. One difference from linalg bufferization pass is the way to tell if the loop body uses the init value of output operand. For TMTensor ops, it differs from op to op because the payload region doesn't represent the entire loop body. 2022-02-26 07:04:33 +08:00			`// CHECK-SAME: %[[IN_TENSOR:.]]: tensor<128xi32>, %[[OUT_TENSOR:.]]: tensor<128xi32>,`
			`// CHECK-SAME: %[[ACC_TENSOR:.*]]: tensor<i32>) -> (tensor<128xi32>, tensor<i32>) {`
			`// CHECK: %[[IN_MEMREF:.*]] = bufferization.to_memref %[[IN_TENSOR]] : memref<128xi32>`
			`// CHECK: %[[OUT_MEMREF_NEW:.*]] = memref.alloc() : memref<128xi32>`
			`// CHECK: %[[ACC_MEMREF_NEW:.*]] = memref.alloc() : memref<i32>`
			`// CHECK: tm_tensor.scan dimension(0) inclusive(true) ins(%[[IN_MEMREF]] : memref<128xi32>)`
			`// CHECK-SAME: outs(%[[OUT_MEMREF_NEW]], %[[ACC_MEMREF_NEW]] : memref<128xi32>, memref<i32>) {`
			`// CHECK: ^bb0(%[[OUT_PREV_ELEMENT:.]]: i32, %[[IN_ELEMENT:.]]: i32):`
			`// CHECK: %[[OUT_CURRENT_ELEMENT:.*]] = arith.addi %[[OUT_PREV_ELEMENT]], %[[IN_ELEMENT]] : i32`
			`// CHECK: tm_tensor.yield %[[OUT_CURRENT_ELEMENT]] : i32`
			`// CHECK: }`
			`// CHECK: %[[OUT_TENSOR_NEW:.*]] = bufferization.to_tensor %[[OUT_MEMREF_NEW]] : memref<128xi32>`
			`// CHECK: %[[ACC_TENSOR_NEW:.*]] = bufferization.to_tensor %[[ACC_MEMREF_NEW]] : memref<i32>`
			`// CHECK: return %[[OUT_TENSOR_NEW]], %[[ACC_TENSOR_NEW]] : tensor<128xi32>, tensor<i32>`
mlir: bump llvm tag to 5380e3 (#856) In addition to updating the llvm-project submodule, this patch also: 1. updates shape functions and tests so that `func` and `call` operations refer to the `func` dialect 2. avoid duplicate registration of dialects 2022-05-17 03:54:35 +08:00			`func.func @scan_1d_inclusive(%in: tensor<128xi32>, %out: tensor<128xi32>, %acc: tensor<i32>) -> (tensor<128xi32>, tensor<i32>) {`
Add bufferization pass for TMTensor ops The pass is mostly borrowed from the BufferizeAnyLinalgOp pass in mlir upstream with some minor changes. At a high level, it's a naive partial bufferization pass which allocate new buffers for all the output tensors. The initial value of an output buffer is copied from the original buffer if there are uses of the original value. One difference from linalg bufferization pass is the way to tell if the loop body uses the init value of output operand. For TMTensor ops, it differs from op to op because the payload region doesn't represent the entire loop body. 2022-02-26 07:04:33 +08:00			`%ret_out, %ret_acc = tm_tensor.scan dimension(0) inclusive(true)`
			`ins(%in : tensor<128xi32>) outs(%out, %acc: tensor<128xi32>, tensor<i32>) {`
			`^bb0(%arg0 : i32, %arg1 : i32):`
			`%sum = arith.addi %arg0, %arg1 : i32`
			`tm_tensor.yield %sum : i32`
			`} -> tensor<128xi32>, tensor<i32>`
			`return %ret_out, %ret_acc: tensor<128xi32>, tensor<i32>`
			`}`

			`// -----`
mlir: bump llvm tag to 5380e3 (#856) In addition to updating the llvm-project submodule, this patch also: 1. updates shape functions and tests so that `func` and `call` operations refer to the `func` dialect 2. avoid duplicate registration of dialects 2022-05-17 03:54:35 +08:00			`// CHECK-LABEL: func.func @scan_1d_exclusive(`
Add bufferization pass for TMTensor ops The pass is mostly borrowed from the BufferizeAnyLinalgOp pass in mlir upstream with some minor changes. At a high level, it's a naive partial bufferization pass which allocate new buffers for all the output tensors. The initial value of an output buffer is copied from the original buffer if there are uses of the original value. One difference from linalg bufferization pass is the way to tell if the loop body uses the init value of output operand. For TMTensor ops, it differs from op to op because the payload region doesn't represent the entire loop body. 2022-02-26 07:04:33 +08:00			`// CHECK-SAME: %[[IN_TENSOR:.]]: tensor<128xi32>, %[[OUT_TENSOR:.]]: tensor<128xi32>,`
			`// CHECK-SAME: %[[ACC_TENSOR:.*]]: tensor<i32>) -> (tensor<128xi32>, tensor<i32>) {`
			`// CHECK: %[[IN_MEMREF:.*]] = bufferization.to_memref %[[IN_TENSOR]] : memref<128xi32>`
			`// CHECK: %[[ACC_MEMREF:.*]] = bufferization.to_memref %[[ACC_TENSOR]] : memref<i32>`
			`// CHECK: %[[OUT_MEMREF_NEW:.*]] = memref.alloc() : memref<128xi32>`
			`// CHECK: %[[ACC_MEMREF_NEW:.*]] = memref.alloc() : memref<i32>`
			`// CHECK: memref.copy %[[ACC_MEMREF]], %[[ACC_MEMREF_NEW]] : memref<i32> to memref<i32>`
			`// CHECK: tm_tensor.scan dimension(0) inclusive(false) ins(%[[IN_MEMREF]] : memref<128xi32>)`
			`// CHECK-SAME: outs(%[[OUT_MEMREF_NEW]], %[[ACC_MEMREF_NEW]] : memref<128xi32>, memref<i32>) {`
			`// CHECK: ^bb0(%[[OUT_PREV_ELEMENT:.]]: i32, %[[IN_ELEMENT:.]]: i32):`
			`// CHECK: %[[OUT_CURRENT_ELEMENT:.*]] = arith.addi %[[OUT_PREV_ELEMENT]], %[[IN_ELEMENT]] : i32`
			`// CHECK: tm_tensor.yield %[[OUT_CURRENT_ELEMENT]] : i32`
			`// CHECK: }`
			`// CHECK: %[[OUT_TENSOR_NEW:.*]] = bufferization.to_tensor %[[OUT_MEMREF_NEW]] : memref<128xi32>`
			`// CHECK: %[[ACC_TENSOR_NEW:.*]] = bufferization.to_tensor %[[ACC_MEMREF_NEW]] : memref<i32>`
			`// CHECK: return %[[OUT_TENSOR_NEW]], %[[ACC_TENSOR_NEW]] : tensor<128xi32>, tensor<i32>`
mlir: bump llvm tag to 5380e3 (#856) In addition to updating the llvm-project submodule, this patch also: 1. updates shape functions and tests so that `func` and `call` operations refer to the `func` dialect 2. avoid duplicate registration of dialects 2022-05-17 03:54:35 +08:00			`func.func @scan_1d_exclusive(%in: tensor<128xi32>, %out: tensor<128xi32>, %acc: tensor<i32>) -> (tensor<128xi32>, tensor<i32>) {`
Add bufferization pass for TMTensor ops The pass is mostly borrowed from the BufferizeAnyLinalgOp pass in mlir upstream with some minor changes. At a high level, it's a naive partial bufferization pass which allocate new buffers for all the output tensors. The initial value of an output buffer is copied from the original buffer if there are uses of the original value. One difference from linalg bufferization pass is the way to tell if the loop body uses the init value of output operand. For TMTensor ops, it differs from op to op because the payload region doesn't represent the entire loop body. 2022-02-26 07:04:33 +08:00			`%ret_out, %ret_acc = tm_tensor.scan dimension(0) inclusive(false)`
			`ins(%in : tensor<128xi32>) outs(%out, %acc: tensor<128xi32>, tensor<i32>) {`
			`^bb0(%arg0 : i32, %arg1 : i32):`
			`%sum = arith.addi %arg0, %arg1 : i32`
			`tm_tensor.yield %sum : i32`
			`} -> tensor<128xi32>, tensor<i32>`
			`return %ret_out, %ret_acc: tensor<128xi32>, tensor<i32>`
			`}`

			`// -----`
mlir: bump llvm tag to 5380e3 (#856) In addition to updating the llvm-project submodule, this patch also: 1. updates shape functions and tests so that `func` and `call` operations refer to the `func` dialect 2. avoid duplicate registration of dialects 2022-05-17 03:54:35 +08:00			`// CHECK-LABEL: func.func @scatter_update_scalar_1D(`
Add bufferization pass for TMTensor ops The pass is mostly borrowed from the BufferizeAnyLinalgOp pass in mlir upstream with some minor changes. At a high level, it's a naive partial bufferization pass which allocate new buffers for all the output tensors. The initial value of an output buffer is copied from the original buffer if there are uses of the original value. One difference from linalg bufferization pass is the way to tell if the loop body uses the init value of output operand. For TMTensor ops, it differs from op to op because the payload region doesn't represent the entire loop body. 2022-02-26 07:04:33 +08:00			`// CHECK-SAME: %[[ORIG_TENSOR:.*]]: tensor<8xi32>,`
			`// CHECK-SAME: %[[INDICES_TENSOR:.*]]: tensor<3x1xi32>,`
			`// CHECK-SAME: %[[UPDATES_TENSOR:.*]]: tensor<3xi32>) -> tensor<8xi32> {`
			`// CHECK: %[[UPDATES_MEMREF:.*]] = bufferization.to_memref %[[UPDATES_TENSOR]] : memref<3xi32>`
			`// CHECK: %[[INDICES_MEMREF:.*]] = bufferization.to_memref %[[INDICES_TENSOR]] : memref<3x1xi32>`
Fix scatter op bufferization to alway copy original tensor 2022-03-10 04:53:30 +08:00			`// CHECK: %[[ORIG_MEMREF:.*]] = bufferization.to_memref %[[ORIG_TENSOR]] : memref<8xi32>`
Add bufferization pass for TMTensor ops The pass is mostly borrowed from the BufferizeAnyLinalgOp pass in mlir upstream with some minor changes. At a high level, it's a naive partial bufferization pass which allocate new buffers for all the output tensors. The initial value of an output buffer is copied from the original buffer if there are uses of the original value. One difference from linalg bufferization pass is the way to tell if the loop body uses the init value of output operand. For TMTensor ops, it differs from op to op because the payload region doesn't represent the entire loop body. 2022-02-26 07:04:33 +08:00			`// CHECK: %[[ORIG_MEMREF_NEW:.*]] = memref.alloc() : memref<8xi32>`
Fix scatter op bufferization to alway copy original tensor 2022-03-10 04:53:30 +08:00			`// CHECK: memref.copy %[[ORIG_MEMREF]], %[[ORIG_MEMREF_NEW]] : memref<8xi32> to memref<8xi32>`
Add bufferization pass for TMTensor ops The pass is mostly borrowed from the BufferizeAnyLinalgOp pass in mlir upstream with some minor changes. At a high level, it's a naive partial bufferization pass which allocate new buffers for all the output tensors. The initial value of an output buffer is copied from the original buffer if there are uses of the original value. One difference from linalg bufferization pass is the way to tell if the loop body uses the init value of output operand. For TMTensor ops, it differs from op to op because the payload region doesn't represent the entire loop body. 2022-02-26 07:04:33 +08:00			`// CHECK: tm_tensor.scatter unique_indices(true) ins(%[[UPDATES_MEMREF]], %[[INDICES_MEMREF]]`
			`// CHECK-SAME: : memref<3xi32>, memref<3x1xi32>) outs(%[[ORIG_MEMREF_NEW]] : memref<8xi32>) {`
			`// CHECK: ^bb0(%[[UPDATE_SCALAR:.]]: i32, %[[ORIG_SCALAR:.]]: i32):`
			`// CHECK: tm_tensor.yield %[[UPDATE_SCALAR]] : i32`
			`// CHECK: }`
			`// CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ORIG_MEMREF_NEW]] : memref<8xi32>`
			`// CHECK: return %[[OUT_TENSOR]] : tensor<8xi32>`
mlir: bump llvm tag to 5380e3 (#856) In addition to updating the llvm-project submodule, this patch also: 1. updates shape functions and tests so that `func` and `call` operations refer to the `func` dialect 2. avoid duplicate registration of dialects 2022-05-17 03:54:35 +08:00			`func.func @scatter_update_scalar_1D(`
Add bufferization pass for TMTensor ops The pass is mostly borrowed from the BufferizeAnyLinalgOp pass in mlir upstream with some minor changes. At a high level, it's a naive partial bufferization pass which allocate new buffers for all the output tensors. The initial value of an output buffer is copied from the original buffer if there are uses of the original value. One difference from linalg bufferization pass is the way to tell if the loop body uses the init value of output operand. For TMTensor ops, it differs from op to op because the payload region doesn't represent the entire loop body. 2022-02-26 07:04:33 +08:00			`%original: tensor<8xi32>, %indices: tensor<3x1xi32>,`
			`%updates: tensor<3xi32>) -> tensor<8xi32> {`
			`%0 = tm_tensor.scatter unique_indices(true)`
			`ins(%updates, %indices : tensor<3xi32>, tensor<3x1xi32>)`
			`outs(%original : tensor<8xi32>) {`
			`^bb0(%update: i32, %orig: i32): // no predecessors`
			`tm_tensor.yield %update: i32`
			`} -> tensor<8xi32>`
			`return %0 : tensor<8xi32>`
			`}`

mlir: bump llvm tag to 5380e3 (#856) In addition to updating the llvm-project submodule, this patch also: 1. updates shape functions and tests so that `func` and `call` operations refer to the `func` dialect 2. avoid duplicate registration of dialects 2022-05-17 03:54:35 +08:00			`// CHECK-LABEL: func.func @scatter_add_scalar_1D(`
Add bufferization pass for TMTensor ops The pass is mostly borrowed from the BufferizeAnyLinalgOp pass in mlir upstream with some minor changes. At a high level, it's a naive partial bufferization pass which allocate new buffers for all the output tensors. The initial value of an output buffer is copied from the original buffer if there are uses of the original value. One difference from linalg bufferization pass is the way to tell if the loop body uses the init value of output operand. For TMTensor ops, it differs from op to op because the payload region doesn't represent the entire loop body. 2022-02-26 07:04:33 +08:00			`// CHECK-SAME: %[[ORIG_TENSOR:.*]]: tensor<8xi32>,`
			`// CHECK-SAME: %[[INDICES_TENSOR:.*]]: tensor<3x1xi32>,`
			`// CHECK-SAME: %[[UPDATES_TENSOR:.*]]: tensor<3xi32>) -> tensor<8xi32> {`
			`// CHECK: %[[UPDATES_MEMREF:.*]] = bufferization.to_memref %[[UPDATES_TENSOR]] : memref<3xi32>`
			`// CHECK: %[[INDICES_MEMREF:.*]] = bufferization.to_memref %[[INDICES_TENSOR]] : memref<3x1xi32>`
			`// CHECK: %[[ORIG_MEMREF:.*]] = bufferization.to_memref %[[ORIG_TENSOR]] : memref<8xi32>`
			`// CHECK: %[[ORIG_MEMREF_NEW:.*]] = memref.alloc() : memref<8xi32>`
			`// CHECK: memref.copy %[[ORIG_MEMREF]], %[[ORIG_MEMREF_NEW]] : memref<8xi32> to memref<8xi32>`
			`// CHECK: tm_tensor.scatter unique_indices(true) ins(%[[UPDATES_MEMREF]], %[[INDICES_MEMREF]]`
			`// CHECK-SAME: : memref<3xi32>, memref<3x1xi32>) outs(%[[ORIG_MEMREF_NEW]] : memref<8xi32>) {`
			`// CHECK: ^bb0(%[[UPDATE_SCALAR:.]]: i32, %[[ORIG_SCALAR:.]]: i32):`
			`// CHECK: %[[CST1:.*]] = arith.constant 1 : i32`
			`// CHECK: %[[ADD:.*]] = arith.addi %[[ORIG_SCALAR]], %[[CST1]] : i32`
			`// CHECK: tm_tensor.yield %[[ADD]] : i32`
			`// CHECK: }`
			`// CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ORIG_MEMREF_NEW]] : memref<8xi32>`
			`// CHECK: return %[[OUT_TENSOR]] : tensor<8xi32>`
mlir: bump llvm tag to 5380e3 (#856) In addition to updating the llvm-project submodule, this patch also: 1. updates shape functions and tests so that `func` and `call` operations refer to the `func` dialect 2. avoid duplicate registration of dialects 2022-05-17 03:54:35 +08:00			`func.func @scatter_add_scalar_1D(`
Add bufferization pass for TMTensor ops The pass is mostly borrowed from the BufferizeAnyLinalgOp pass in mlir upstream with some minor changes. At a high level, it's a naive partial bufferization pass which allocate new buffers for all the output tensors. The initial value of an output buffer is copied from the original buffer if there are uses of the original value. One difference from linalg bufferization pass is the way to tell if the loop body uses the init value of output operand. For TMTensor ops, it differs from op to op because the payload region doesn't represent the entire loop body. 2022-02-26 07:04:33 +08:00			`%original: tensor<8xi32>, %indices: tensor<3x1xi32>,`
			`%updates: tensor<3xi32>) -> tensor<8xi32> {`
			`%0 = tm_tensor.scatter unique_indices(true)`
			`ins(%updates, %indices : tensor<3xi32>, tensor<3x1xi32>)`
			`outs(%original : tensor<8xi32>) {`
			`^bb0(%update: i32, %orig: i32): // no predecessors`
			`%cst1 = arith.constant 1: i32`
			`%add = arith.addi %orig, %cst1: i32`
			`tm_tensor.yield %add: i32`
			`} -> tensor<8xi32>`
			`return %0 : tensor<8xi32>`
			`}`