Remove TCF and TCP.

These were legacy concepts that are now superceded by direct Torch to linalg-on-tensors lowering. These were based on some very early thinking related to the layering of frontends vs codegen, which is now obsolete because: - We expected a lot more centralization at the frontend (TCF) level. It turns out that frontend needs really vary a lot, and there is no grand unifying TCF dialect plausible. The additional layer isn't worth it. - Linalg-on-tensors obsoletes the primary need for TCP. There are still a few things not representable with linalg-on-tensors, but the support is growing and the whole "not included in linalg-on-tensors" direction needs to be rethought. Our TCP dialect didn't cover any of the actually important things in this space (such as sort, FFT, top-k, etc.). See historical [slides](https://drive.google.com/file/d/1iljcpTQ5NPaMfGpoPDFml1XkYxjK_6A4/view) / [recording](https://drive.google.com/file/d/1jSPa8TwPKUt0WuLquGc8OgSUVYJHMvWZ/view) for more details on the origin story here. Their presence was confusing users too [bug](https://github.com/llvm/mlir-npcomp/issues/248). Also, - Trim down npcomp-run-mlir testing. It was testing TCF to TCP lowering for the most part. The essential stuff is retained and rephrased with linalg-on-tensors. (we should probably rename it "refback-run" or something, as it is just a way to invoke RefBackend) - test/Python/Backend/RefJIT/simple_invoke_numpy.py is XFAIL'ed. Our "anti-framework" direction seems to be the likely future path.
2021-08-02 10:27:16 -07:00 · 2021-08-02 10:27:16 -07:00 · f168cacd6d
parent 7c788dbfec
commit f168cacd6d
93 changed files with 52 additions and 2581 deletions
--- a/include/npcomp/Conversion/NumpyToTCF/Passes.h
+++ b/include/npcomp/Conversion/NumpyToTCF/Passes.h
@ -1,21 +0,0 @@
-//===------------------------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LaLVM
-// Exceptions. See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_CONVERSION_NUMPYTOTCF_PASSES_H
-#define NPCOMP_CONVERSION_NUMPYTOTCF_PASSES_H
-
-#include "mlir/Pass/Pass.h"
-#include <memory>
-
-namespace mlir {
-namespace NPCOMP {
-std::unique_ptr<OperationPass<FuncOp>> createConvertNumpyToTCFPass();
-}
-} // namespace mlir
-
-#endif // NPCOMP_CONVERSION_NUMPYTOTCF_PASSES_H
--- a/include/npcomp/Conversion/Passes.td
+++ b/include/npcomp/Conversion/Passes.td
@ -36,8 +36,8 @@ def ConvertTorchToLinalg : Pass<"convert-torch-to-linalg", "FuncOp"> {

    To model the termination of the program for implementing error guards,
    we use the `std.assert` op.
-    This is a design decision that is at variance from other passes in npcomp,
-    such as `convert-tcf-to-std` and `convert-tcf-to-linalg` which use the
+    This is a design decision that is at variance from other passes in the
+    ecosystem, which use the
    `shape` dialect's witness system (`shape.cstr_*` family of ops feeding into
    `shape.assuming` regions). This is a change in design decisions
    from those passes (which will be subsumed by this one). The reasons for this
@ -113,46 +113,4 @@ def ConvertBasicpyToStd : Pass<"convert-basicpy-to-std", "FuncOp"> {
  let constructor = "mlir::NPCOMP::createConvertBasicpyToStdPass()";
 }

-//===----------------------------------------------------------------------===//
-// Numpy conversions
-//===----------------------------------------------------------------------===//
-
-def ConvertNumpyToTCF : Pass<"convert-numpy-to-tcf", "FuncOp"> {
-  let summary = "Convert the numpy dialect to supported TCF ops";
-  let constructor = "mlir::NPCOMP::createConvertNumpyToTCFPass()";
-}
-
-//===----------------------------------------------------------------------===//
-// TCFToTCP
-//===----------------------------------------------------------------------===//
-
-def ConvertTCFToLinalg : Pass<"convert-tcf-to-linalg", "FuncOp"> {
-  let summary = "Convert TCF to Linalg";
-  let description = [{
-    The intention is for this pass to convert mainly to linalg named ops.
-
-    Because linalg is at the "TCP" layer of abstraction, this pass has to
-    concern itself with generating guards for error cases.
-  }];
-  let constructor = "mlir::NPCOMP::createConvertTCFToLinalgPass()";
-}
-
-//===----------------------------------------------------------------------===//
-// TCFToStd
-//===----------------------------------------------------------------------===//
-
-def ConvertTCFToStd : Pass<"convert-tcf-to-std", "FuncOp"> {
-  let summary = "Convert TCF to Std";
-  let constructor = "mlir::NPCOMP::createConvertTCFToStdPass()";
-}
-
-//===----------------------------------------------------------------------===//
-// TCFToTCP
-//===----------------------------------------------------------------------===//
-
-def ConvertTCFToTCP : Pass<"convert-tcf-to-tcp", "FuncOp"> {
-  let summary = "Convert TCF to TCP";
-  let constructor = "mlir::NPCOMP::createConvertTCFToTCPPass()";
-}
-
 #endif // NPCOMP_CONVERSION_PASSES
--- a/include/npcomp/Conversion/TCFToLinalg/TCFToLinalg.h
+++ b/include/npcomp/Conversion/TCFToLinalg/TCFToLinalg.h
@ -1,21 +0,0 @@
-//===------------------------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_CONVERSION_TCFTOLINALG_TCFTOLINALG_H
-#define NPCOMP_CONVERSION_TCFTOLINALG_TCFTOLINALG_H
-
-#include "mlir/Pass/Pass.h"
-#include <memory>
-
-namespace mlir {
-namespace NPCOMP {
-std::unique_ptr<OperationPass<FuncOp>> createConvertTCFToLinalgPass();
-}
-} // namespace mlir
-
-#endif // NPCOMP_CONVERSION_TCFTOLINALG_TCFTOLINALG_H
--- a/include/npcomp/Conversion/TCFToStd/TCFToStd.h
+++ b/include/npcomp/Conversion/TCFToStd/TCFToStd.h
@ -1,21 +0,0 @@
-//===------------------------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_CONVERSION_TCFTOTCP_CONVERTTCFTOSTD_H
-#define NPCOMP_CONVERSION_TCFTOTCP_CONVERTTCFTOSTD_H
-
-#include "mlir/Pass/Pass.h"
-#include <memory>
-
-namespace mlir {
-namespace NPCOMP {
-std::unique_ptr<OperationPass<FuncOp>> createConvertTCFToStdPass();
-}
-} // namespace mlir
-
-#endif // NPCOMP_CONVERSION_TCFTOTCP_CONVERTTCFTOSTD_H
--- a/include/npcomp/Conversion/TCFToTCP/TCFToTCP.h
+++ b/include/npcomp/Conversion/TCFToTCP/TCFToTCP.h
@ -1,21 +0,0 @@
-//===------------------------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_CONVERSION_TCFTOTCP_CONVERTTCFTOTCP_H
-#define NPCOMP_CONVERSION_TCFTOTCP_CONVERTTCFTOTCP_H
-
-#include "mlir/Pass/Pass.h"
-#include <memory>
-
-namespace mlir {
-namespace NPCOMP {
-std::unique_ptr<OperationPass<FuncOp>> createConvertTCFToTCPPass();
-}
-} // namespace mlir
-
-#endif // NPCOMP_CONVERSION_TCFTOTCP_CONVERTTCFTOTCP_H
--- a/include/npcomp/Dialect/CMakeLists.txt
+++ b/include/npcomp/Dialect/CMakeLists.txt
@ -2,6 +2,4 @@ add_subdirectory(Basicpy)
 add_subdirectory(Numpy)
 add_subdirectory(Refback)
 add_subdirectory(Refbackrt)
-add_subdirectory(TCF)
-add_subdirectory(TCP)
 add_subdirectory(Torch)
--- a/include/npcomp/Dialect/TCF/CMakeLists.txt
+++ b/include/npcomp/Dialect/TCF/CMakeLists.txt
@ -1,2 +0,0 @@
-add_subdirectory(IR)
-add_subdirectory(Transforms)
--- a/include/npcomp/Dialect/TCF/IR/CMakeLists.txt
+++ b/include/npcomp/Dialect/TCF/IR/CMakeLists.txt
@ -1 +0,0 @@
-add_mlir_dialect(TCFOps tcf)
--- a/include/npcomp/Dialect/TCF/IR/TCFBase.td
+++ b/include/npcomp/Dialect/TCF/IR/TCFBase.td
@ -1,42 +0,0 @@
-//===-------------------------------------------------------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TCF_BASE
-#define TCF_BASE
-
-include "mlir/IR/OpBase.td"
-
-def TCF_Dialect : Dialect {
-  let name = "tcf";
-  let cppNamespace = "::mlir::NPCOMP::tcf";
-  let description = [{
-The `tcf` dialect is a key facilitator for ingesting into the MLIR ecosystem
-dynamic frontend languages with a "tensor" primitive type.
-
-Some of its key features are:
- Ops that safely report errors, such as mismatching sizes for a matrix
-  multiplication.
- Parameters controlling op behavior are dynamic operands, such as
-  convolution window sizes.
- Support for a rank-dynamic programming model.
- Support for implicit broadcasting, following the industry-standard numpy
-  broadcasting rules.
-
-These features make this dialect interoperate well with highly-dynamic
-programming models as are common in many frontends.
-
-This dialect is optimized for compiler analysis and transformation, especially
-lowering to lower levels of abstraction in the compiler.
-Tensor programs, as represented in this dialect, are not necessarily represented
-in the most efficient way for op-by-op execution.
-The goal is that most frontend ops are representable in a small, but
-not-necessarily-just-one set of ops from this dialect.
-  }];
-}
-
-#endif // #ifndef TCF_BASE
--- a/include/npcomp/Dialect/TCF/IR/TCFDialect.h
+++ b/include/npcomp/Dialect/TCF/IR/TCFDialect.h
@ -1,16 +0,0 @@
-//===------------------------------------------------------------*- C++ -*-===//
-//
-// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_DIALECT_TCF_IR_TCFDIALECT_H
-#define NPCOMP_DIALECT_TCF_IR_TCFDIALECT_H
-
-#include "mlir/IR/Dialect.h"
-
-#include "npcomp/Dialect/TCF/IR/TCFOpsDialect.h.inc"
-
-#endif // NPCOMP_DIALECT_TCF_IR_TCFDIALECT_H
--- a/include/npcomp/Dialect/TCF/IR/TCFOps.h
+++ b/include/npcomp/Dialect/TCF/IR/TCFOps.h
@ -1,19 +0,0 @@
-//===------------------------------------------------------------*- C++ -*-===//
-//
-// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_DIALECT_TCF_IR_TCFOPS_H
-#define NPCOMP_DIALECT_TCF_IR_TCFOPS_H
-
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/OpImplementation.h"
-
-#define GET_OP_CLASSES
-#include "npcomp/Dialect/TCF/IR/TCFOps.h.inc"
-
-#endif // NPCOMP_DIALECT_TCF_IR_TCFOPS_H
--- a/include/npcomp/Dialect/TCF/IR/TCFOps.td
+++ b/include/npcomp/Dialect/TCF/IR/TCFOps.td
@ -1,119 +0,0 @@
-//===-------------------------------------------------------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TCF_OPS
-#define TCF_OPS
-
-include "npcomp/Dialect/TCF/IR/TCFBase.td"
-
-class TCF_Op<string mnemonic, list<OpTrait> traits = []>
-    : Op<TCF_Dialect, mnemonic, traits> {
-}
-
-// TODO: investigate effects framework for defining error semantics
-// TODO: define in a general way across the dialect what "encounters an error" means.
-
-class BinaryArithmeticOp<string mnemonic, list<OpTrait> traits = []> :
-  TCF_Op<mnemonic, traits> {
-  let arguments = (ins AnyTensor:$lhs, AnyTensor:$rhs);
-  let results = (outs AnyTensor:$result);
-  let assemblyFormat = "$lhs `,` $rhs attr-dict `:` functional-type(operands, results)";
-}
-
-def TCF_AddOp : BinaryArithmeticOp<"add"> {
-  let summary = "Addition of two tensors.";
-  let description = [{
-    Addition of two tensors.
-
-    Numpy-style broadcasting is allowed.
-  }];
-}
-
-def TCF_MaxOp : BinaryArithmeticOp<"max"> {
-  let summary = "Maximum of two tensors.";
-  let description = [{
-    Maximum of two tensors.
-
-    Numpy-style broadcasting is allowed.
-  }];
-}
-
-def TCF_MulOp : BinaryArithmeticOp<"mul"> {
-  let summary = "Multiply an input tensor by a scalar tensor.";
-  let description = [{
-    Multiplies each element of the input `input` with the scalar `other` and returns a new resulting tensor. The tensor types must match and shapes must be broadcastable.
-  }];
-}
-
-class UnaryArithmeticOp<string mnemonic, list<OpTrait> traits = []> :
-  TCF_Op<mnemonic,
-        !listconcat(traits, [AllTypesMatch<["operand", "result"]>])>,
-  AllTypesMatch<["operand", "result"]> {
-  let arguments = (ins AnyTensor:$operand);
-  let results = (outs AnyTensor:$result);
-  let assemblyFormat = "$operand attr-dict `:` type($operand)";
-}
-
-def TCF_ExpOp : UnaryArithmeticOp<"exp"> {
-  let summary = "base-e exponential";
-  let description = [{
-    See std.exp for more details.
-  }];
-}
-
-def TCF_TanhOp : UnaryArithmeticOp<"tanh"> {
-  let summary = "hyperbolic tangent";
-  let description = [{
-    See std.tanh for more details.
-  }];
-}
-
-// TODO: Generalize this op appropriately and add more verification.
-// For example, an unranked operand probably should be allowed and verified
-// dynamically in TCF->TCP lowering if needed.
-def TCF_MatmulOp : TCF_Op<"matmul"> {
-  let summary = "Performs a matrix multiplication";
-  let description = [{
-    Performs a matrix multiplication.
-
-    The tensors have dimensions:
-    - lhs: [M, K]
-    - rhs: [K, N]
-    - result: [M, N]
-
-    If the `K` dimension mismatches between the operands, this op aborts the
-    program.
-  }];
-  let arguments = (ins 2DTensorOf<[F32]>:$lhs, 2DTensorOf<[F32]>:$rhs);
-  let results = (outs 2DTensorOf<[F32]>:$result);
-
-  let assemblyFormat = "$lhs `,` $rhs attr-dict `:` functional-type(operands, results)";
-}
-
-def TCF_ConvNCHWOp : TCF_Op<"conv_2d_nchw"> {
-  let summary = "2-D convolution";
-  let description = [{
-    Performs 2-D convolution. This op is inspired by PyTorch's Conv2d layer (https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html).
-
-    The tensors have dimensions:
-    - in:     [N, Cin, H, W]
-    - filter: [Cout, Cin, KH, KW]
-    - result: [N, Cout, Hout, Wout]
-
-    The tensors must meet the following conditions; otherwise, this op aborts the program.
-    - H is greater than or equal to KH
-    - W is greater than or equal to KW
-    - Cin matches between in and filter
-  }];
-  let arguments = (ins 4DTensorOf<[F32]>:$in, 4DTensorOf<[F32]>:$filter);
-  let results = (outs 4DTensorOf<[F32]>:$result);
-
-  let assemblyFormat = "$in `,` $filter attr-dict `:` functional-type(operands, results)";
-}
-
-#endif // #ifndef TCF_OPS
--- a/include/npcomp/Dialect/TCF/Transforms/CMakeLists.txt
+++ b/include/npcomp/Dialect/TCF/Transforms/CMakeLists.txt
@ -1,5 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS Passes.td)
-mlir_tablegen(Passes.h.inc -gen-pass-decls)
-add_public_tablegen_target(NPCOMPTCFPassIncGen)
-
-add_mlir_doc(Passes NPCOMPTCFTransforms ./ -gen-pass-doc)
--- a/include/npcomp/Dialect/TCF/Transforms/Passes.h
+++ b/include/npcomp/Dialect/TCF/Transforms/Passes.h
@ -1,30 +0,0 @@
-//===------------------------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_DIALECT_TCF_TRANSFORMS_PASSES_H
-#define NPCOMP_DIALECT_TCF_TRANSFORMS_PASSES_H
-
-#include "mlir/Pass/Pass.h"
-
-#include <memory>
-
-namespace mlir {
-namespace NPCOMP {
-namespace tcf {
-
-std::unique_ptr<OperationPass<FuncOp>> createShapeRefinementPass();
-
-} // namespace tcf
-
-/// Registers all TCF transformation passes.
-void registerTCFPasses();
-
-} // namespace NPCOMP
-} // namespace mlir
-
-#endif // NPCOMP_DIALECT_TCF_TRANSFORMS_PASSES_H
--- a/include/npcomp/Dialect/TCF/Transforms/Passes.td
+++ b/include/npcomp/Dialect/TCF/Transforms/Passes.td
@ -1,19 +0,0 @@
-//===-- Passes.td - Pass definition file -------------------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_TCF_PASSES
-#define NPCOMP_TCF_PASSES
-
-include "mlir/Pass/PassBase.td"
-
-def TCFShapeRefinement : Pass<"tcf-shape-refinement", "FuncOp"> {
-  let summary = "Refines shapes of tensors";
-  let constructor = "mlir::NPCOMP::tcf::createShapeRefinementPass()";
-}
-
-#endif // NPCOMP_TCF_PASSES
--- a/include/npcomp/Dialect/TCP/CMakeLists.txt
+++ b/include/npcomp/Dialect/TCP/CMakeLists.txt
@ -1,2 +0,0 @@
-add_subdirectory(IR)
-add_subdirectory(Transforms)
--- a/include/npcomp/Dialect/TCP/IR/CMakeLists.txt
+++ b/include/npcomp/Dialect/TCP/IR/CMakeLists.txt
@ -1 +0,0 @@
-add_mlir_dialect(TCPOps tcp)
--- a/include/npcomp/Dialect/TCP/IR/TCPBase.td
+++ b/include/npcomp/Dialect/TCP/IR/TCPBase.td
@ -1,40 +0,0 @@
-//===-------------------------------------------------------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TCP_BASE
-#define TCP_BASE
-
-include "mlir/IR/OpBase.td"
-
-def TCP_Dialect : Dialect {
-  let name = "tcp";
-  let cppNamespace = "::mlir::NPCOMP::tcp";
-  let description = [{
-The `tcp` dialect is the gateway to MLIR's code generation infrastructure.
-It is also a great place to do algebraic transformations making use of
-semantically-charged named ops.
-
-Features:
- Requires ranked tensors (except for a handful of special ops).
- No implicit broadcasting.
- Performance-critical parameters like convolution window sizes are represented
-  with attributes.
- Attention to detail modeling ops that are logically "pure" but have
-  preconditions.
-
-Together these features allow a relatively large class of "common-sense"
-optimizations to be done with only modestly complex considerations.
-// TODO: consider having these ops take a "witness" argument
-// that makes them truly NoSideEffect?
-// Or have a totally pure "tcp.island" op?
-// Figure it out when doing the tcf to tcp lowering.
-  }];
-}
-
-
-#endif // TCP_BASE
--- a/include/npcomp/Dialect/TCP/IR/TCPDialect.h
+++ b/include/npcomp/Dialect/TCP/IR/TCPDialect.h
@ -1,16 +0,0 @@
-//===------------------------------------------------------------*- C++ -*-===//
-//
-// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_DIALECT_TCP_IR_TCPDIALECT_H
-#define NPCOMP_DIALECT_TCP_IR_TCPDIALECT_H
-
-#include "mlir/IR/Dialect.h"
-
-#include "npcomp/Dialect/TCP/IR/TCPOpsDialect.h.inc"
-
-#endif // NPCOMP_DIALECT_TCP_IR_TCPDIALECT_H
--- a/include/npcomp/Dialect/TCP/IR/TCPOps.h
+++ b/include/npcomp/Dialect/TCP/IR/TCPOps.h
@ -1,22 +0,0 @@
-//===------------------------------------------------------------*- C++ -*-===//
-//
-// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_DIALECT_TCP_IR_TCPOPS_H
-#define NPCOMP_DIALECT_TCP_IR_TCPOPS_H
-
-#include "mlir/Dialect/Shape/IR/Shape.h"
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/OpDefinition.h"
-#include "mlir/IR/OpImplementation.h"
-#include "mlir/IR/SymbolTable.h"
-#include "mlir/Interfaces/SideEffectInterfaces.h"
-
-#define GET_OP_CLASSES
-#include "npcomp/Dialect/TCP/IR/TCPOps.h.inc"
-
-#endif // NPCOMP_DIALECT_TCP_IR_TCPOPS_H
--- a/include/npcomp/Dialect/TCP/IR/TCPOps.td
+++ b/include/npcomp/Dialect/TCP/IR/TCPOps.td
@ -1,77 +0,0 @@
-//===-------------------------------------------------------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TCP_OPS
-#define TCP_OPS
-
-include "npcomp/Dialect/TCP/IR/TCPBase.td"
-include "mlir/Dialect/Shape/IR/ShapeBase.td"
-include "mlir/Interfaces/SideEffectInterfaces.td"
-include "mlir/Interfaces/InferTypeOpInterface.td"
-include "mlir/Interfaces/ControlFlowInterfaces.td"
-include "mlir/IR/SymbolInterfaces.td"
-
-class TCP_Op<string mnemonic, list<OpTrait> traits = []>
-    : Op<TCP_Dialect, mnemonic, traits> {
-}
-
-def TCP_BroadcastToOp : TCP_Op<"broadcast_to"> {
-  let summary = "Broadcasts an operand to a given shape.";
-  let description = [{
-Broadcasts `operand` to the shape `shape`.
-
-It is undefined behavior if such a broadcast is not legal.
-  }];
-  let arguments = (ins AnyRankedTensor:$operand, Shape_ExtentTensorType:$shape);
-  let results = (outs AnyRankedTensor:$result);
-
-  let assemblyFormat = "$operand `,` $shape attr-dict `:` functional-type(operands, results)";
-}
-
-def TCP_SplattedOp : TCP_Op<"splatted"> {
-  let summary = "Creates a tensor filled with a particular scalar value.";
-  let description = [{
-    Creates a tensor of shape `shape` with all elements filled with `splatVal`.
-
-    This op is somewhat redundant with tcp.broadcast_to. However,
-    tcp.broadcast_to handles degenerate "size-1" broadcasting which structurally
-    cannot happen with this op. So to avoid losing that information, we keep
-    this op separate.
-
-    NOTE: The name "splatted" separates it from std.splat, which currently
-    only handles statically shaped memrefs.
-
-    TODO: Improve std.splat to take dynamic shapes.
-  }];
-  let arguments = (ins AnyType:$splatVal, Shape_ExtentTensorType:$shape);
-  let results = (outs AnyRankedTensor:$result);
-
-  let assemblyFormat = "$splatVal `,` $shape attr-dict `:` functional-type(operands, results)";
-}
-
-def TCP_PadOp : TCP_Op<"pad"> {
-  let summary = "Pads a tensor with a fill value";
-  let description = [{
-    Pads a tensor with `fillVal` along the borders of each dimension according
-    to `lowerExpansion` and `upperExpansion`. Note that this op is unmanaged,
-    meaning that it assumes its operands and their shapes are valid.
-
-    The tensors have dimensions:
-    - operand:   [D1, D2, ..., DN]
-    - lowerExpansion: [L1, L2, ..., LN]
-    - upperExpansion: [U1, U2, ..., UN]
-    - fillVal:   scalar
-    - result:    [D1+L1+U1, D2+L2+U2, ..., DN+LN+UN]
-  }];
-  let arguments = (ins AnyRankedTensor:$operand, Shape_ExtentTensorType:$lowerExpansion, Shape_ExtentTensorType:$upperExpansion, AnyType:$fillVal);
-  let results = (outs AnyRankedTensor:$result);
-
-  let assemblyFormat = "$operand `,` $lowerExpansion `,` $upperExpansion `,` $fillVal attr-dict `:` functional-type(operands, results)";
-}
-
-#endif // TCP_OPS
--- a/include/npcomp/Dialect/TCP/Transforms/CMakeLists.txt
+++ b/include/npcomp/Dialect/TCP/Transforms/CMakeLists.txt
@ -1,5 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS Passes.td)
-mlir_tablegen(Passes.h.inc -gen-pass-decls)
-add_public_tablegen_target(NPCOMPTCPPassIncGen)
-
-add_mlir_doc(Passes NPCOMPTCPTransforms ./ -gen-pass-doc)
--- a/include/npcomp/Dialect/TCP/Transforms/Passes.h
+++ b/include/npcomp/Dialect/TCP/Transforms/Passes.h
@ -1,27 +0,0 @@
-//===------------------------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_DIALECT_TCP_TRANSFORMS_PASSES_H
-#define NPCOMP_DIALECT_TCP_TRANSFORMS_PASSES_H
-
-#include "mlir/Pass/Pass.h"
-
-#include <memory>
-
-namespace mlir {
-namespace NPCOMP {
-
-std::unique_ptr<OperationPass<FuncOp>> createTCPBufferizePass();
-
-/// Registers all TCP transformation passes.
-void registerTCPPasses();
-
-} // namespace NPCOMP
-} // namespace mlir
-
-#endif // NPCOMP_DIALECT_TCP_TRANSFORMS_PASSES_H
--- a/include/npcomp/Dialect/TCP/Transforms/Passes.td
+++ b/include/npcomp/Dialect/TCP/Transforms/Passes.td
@ -1,19 +0,0 @@
-//===-- Passes.td - Pass definition file -------------------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_TCP_PASSES
-#define NPCOMP_TCP_PASSES
-
-include "mlir/Pass/PassBase.td"
-
-def TCPBufferize : Pass<"tcp-bufferize", "FuncOp"> {
-  let summary = "Bufferizes the tcp dialect";
-  let constructor = "mlir::NPCOMP::createTCPBufferizePass()";
-}
-
-#endif // NPCOMP_TCP_PASSES
--- a/include/npcomp/RefBackend/RefBackend.h
+++ b/include/npcomp/RefBackend/RefBackend.h
@ -45,25 +45,6 @@ struct RefBackendLoweringPipelineOptions
 void createRefBackendLoweringPipeline(
    OpPassManager &pm, const RefBackendLoweringPipelineOptions &options);

-// Helper pipeline that runs TCF->TCP lowering.
-//
-// For now, just piggy-back on the same set of options since this is such a
-// simple set of passes.
-//
-// TODO: Move this out of RefBackend once the TCF->TCP conversions
-// become more substantial.
-void createRefBackendTCFToTCPPipeline(
-    OpPassManager &pm, const RefBackendLoweringPipelineOptions &options);
-
-// Helper pipeline that runs TCF->TCP lowering before invoking
-// RefBackendLoweringPipeline.
-// For now, just piggy-back on the same set of options since this is such a
-// thin wrapper.
-// Longer-term, the reference backend should fit into some sort of
-// "target interface" and this helper won't be needed.
-void createTCFRefBackendLoweringPipeline(
-    OpPassManager &pm, const RefBackendLoweringPipelineOptions &options);
-
 } // namespace NPCOMP
 } // namespace mlir

--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@ -31,8 +31,6 @@ add_npcomp_library(NPCOMPInitAll
  NPCOMPIREEBackend
  NPCOMPRefBackend
  NPCOMPRefbackDialect
-  NPCOMPTCPDialect
-  NPCOMPTCFDialect
  NPCOMPTorchDialect
  NPCOMPRefbackrtDialect
  NPCOMPBasicpyDialect
@ -40,7 +38,6 @@ add_npcomp_library(NPCOMPInitAll
  NPCOMPConversionPasses
  NPCOMPNumpyDialect
  NPCOMPNumpyPasses
-  NPCOMPTCFPasses
  NPCOMPTypingPasses

  # TODO: We shouldn't need npcomp_conversion_libs here, but we have
--- a/lib/Conversion/CMakeLists.txt
+++ b/lib/Conversion/CMakeLists.txt
@ -2,10 +2,6 @@ add_subdirectory(TorchToLinalg)
 add_subdirectory(TorchToSCF)
 add_subdirectory(TorchToStd)
 add_subdirectory(BasicpyToStd)
-add_subdirectory(NumpyToTCF)
-add_subdirectory(TCFToLinalg)
-add_subdirectory(TCFToStd)
-add_subdirectory(TCFToTCP)

 get_property(npcomp_conversion_libs GLOBAL PROPERTY NPCOMP_CONVERSION_LIBS)

--- a/lib/Conversion/NumpyToTCF/CMakeLists.txt
+++ b/lib/Conversion/NumpyToTCF/CMakeLists.txt
@ -1,16 +0,0 @@
-add_npcomp_conversion_library(NPCOMPNumpyToTCF
-  Passes.cpp
-
-  DEPENDS
-  NPCOMPConversionPassIncGen
-
-  LINK_COMPONENTS
-  Core
-
-  LINK_LIBS PUBLIC
-  MLIRIR
-  MLIRPass
-  MLIRTransforms
-  NPCOMPBasicpyDialect
-  NPCOMPNumpyDialect
-)
--- a/lib/Conversion/NumpyToTCF/Passes.cpp
+++ b/lib/Conversion/NumpyToTCF/Passes.cpp
@ -1,66 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "npcomp/Conversion/NumpyToTCF/Passes.h"
-
-#include "../PassDetail.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "npcomp/Dialect/Numpy/IR/NumpyOps.h"
-#include "npcomp/Dialect/TCF/IR/TCFDialect.h"
-#include "npcomp/Dialect/TCF/IR/TCFOps.h"
-
-using namespace mlir;
-using namespace mlir::NPCOMP;
-
-namespace {
-template <typename TargetTcfOp>
-class ConvertBinaryBuiltinUfuncCallOp
-    : public OpRewritePattern<Numpy::BuiltinUfuncCallOp> {
-public:
-  ConvertBinaryBuiltinUfuncCallOp(MLIRContext *context, StringRef qualifiedName,
-                                  PatternBenefit benefit = 1)
-      : OpRewritePattern(context, benefit), qualifiedName(qualifiedName) {}
-  LogicalResult matchAndRewrite(Numpy::BuiltinUfuncCallOp op,
-                                PatternRewriter &rewriter) const override {
-    if (op.qualified_name() != qualifiedName)
-      return failure();
-    if (op.inputs().size() != 2)
-      return failure();
-
-    rewriter.replaceOpWithNewOp<TargetTcfOp>(op, op.getResult().getType(),
-                                             op.inputs()[0], op.inputs()[1]);
-    return success();
-  }
-
-private:
-  StringRef qualifiedName;
-};
-} // namespace
-
-namespace {
-class ConvertNumpyToTCF : public ConvertNumpyToTCFBase<ConvertNumpyToTCF> {
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<NPCOMP::tcf::TCFDialect>();
-  }
-
-  void runOnOperation() override {
-    FuncOp func = getOperation();
-    MLIRContext *context = &getContext();
-
-    RewritePatternSet patterns(context);
-    patterns.add<ConvertBinaryBuiltinUfuncCallOp<tcf::AddOp>>(context,
-                                                              "numpy.add");
-    (void)applyPatternsAndFoldGreedily(func, std::move(patterns));
-  }
-};
-} // namespace
-
-std::unique_ptr<OperationPass<FuncOp>>
-mlir::NPCOMP::createConvertNumpyToTCFPass() {
-  return std::make_unique<ConvertNumpyToTCF>();
-}
--- a/lib/Conversion/Passes.cpp
+++ b/lib/Conversion/Passes.cpp
@ -9,10 +9,6 @@
 #include "npcomp/Conversion/Passes.h"

 #include "npcomp/Conversion/BasicpyToStd/Passes.h"
-#include "npcomp/Conversion/NumpyToTCF/Passes.h"
-#include "npcomp/Conversion/TCFToLinalg/TCFToLinalg.h"
-#include "npcomp/Conversion/TCFToStd/TCFToStd.h"
-#include "npcomp/Conversion/TCFToTCP/TCFToTCP.h"
 #include "npcomp/Conversion/TorchToLinalg/TorchToLinalg.h"
 #include "npcomp/Conversion/TorchToSCF/TorchToSCF.h"
 #include "npcomp/Conversion/TorchToStd/TorchToStd.h"
--- a/lib/Conversion/TCFToLinalg/CMakeLists.txt
+++ b/lib/Conversion/TCFToLinalg/CMakeLists.txt
@ -1,20 +0,0 @@
-add_npcomp_conversion_library(NPCOMPTCFToLinalg
-  TCFToLinalg.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${PROJECT_SOURCE_DIR}/include/npcomp/Conversion/TCFToLinalg
-
-  DEPENDS
-  NPCOMPConversionPassIncGen
-
-  LINK_COMPONENTS
-  Core
-
-  LINK_LIBS PUBLIC
-  MLIRIR
-  MLIRPass
-  MLIRTransforms
-  MLIRShape
-  MLIRTensor
-  NPCOMPTCFDialect
-)
--- a/lib/Conversion/TCFToLinalg/TCFToLinalg.cpp
+++ b/lib/Conversion/TCFToLinalg/TCFToLinalg.cpp
@ -1,242 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "npcomp/Conversion/TCFToLinalg/TCFToLinalg.h"
-
-#include "../PassDetail.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Shape/IR/Shape.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tensor/IR/Tensor.h"
-#include "mlir/Dialect/Traits.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "npcomp/Dialect/TCF/IR/TCFOps.h"
-#include "npcomp/Dialect/TCP/IR/TCPDialect.h"
-#include "npcomp/Dialect/TCP/IR/TCPOps.h"
-
-using namespace mlir;
-using namespace mlir::NPCOMP;
-
-static SmallVector<Value, 6> bypassResultShapes(Operation *op,
-                                                OpBuilder &builder) {
-
-  if (auto matmul = dyn_cast<tcf::MatmulOp>(op)) {
-    auto lhsRows = builder.create<tensor::DimOp>(op->getLoc(), matmul.lhs(), 0);
-    auto rhsCols = builder.create<tensor::DimOp>(op->getLoc(), matmul.rhs(), 1);
-    auto shape = builder.create<tensor::FromElementsOp>(
-        op->getLoc(), ValueRange({lhsRows, rhsCols}));
-    return {shape};
-  }
-  // TODO: This only supports the NCHW data format. Consider other formats and
-  // lower ranks.
-  if (auto conv2dNCHW = dyn_cast<tcf::ConvNCHWOp>(op)) {
-    // TODO: Replace hard-coded stride/dilation/padding constant-ops.
-    // TODO: Consider migrating this SSA shape-computing graph to a complex op
-    // or use the `mlir-linalg-ods-gen` approach and define a `*.tc` spec file.
-    auto cI0 = builder.create<ConstantOp>(
-        op->getLoc(), builder.getIntegerAttr(builder.getIndexType(), 0));
-    auto cI1 = builder.create<ConstantOp>(
-        op->getLoc(), builder.getIntegerAttr(builder.getIndexType(), 1));
-    auto cI2 = builder.create<ConstantOp>(
-        op->getLoc(), builder.getIntegerAttr(builder.getIndexType(), 2));
-    auto stride = cI1;
-    auto dilation = cI1;
-    auto padding = cI0;
-    auto strideHeight = stride;
-    auto strideWidth = stride;
-    auto dilationHeight = dilation;
-    auto dilationWidth = dilation;
-    auto paddingHeight = padding;
-    auto paddingWidth = padding;
-    auto batch =
-        builder.create<tensor::DimOp>(op->getLoc(), conv2dNCHW.in(), 0);
-    auto height =
-        builder.create<tensor::DimOp>(op->getLoc(), conv2dNCHW.in(), 2);
-    auto width =
-        builder.create<tensor::DimOp>(op->getLoc(), conv2dNCHW.in(), 3);
-    auto filterOutChannels =
-        builder.create<tensor::DimOp>(op->getLoc(), conv2dNCHW.filter(), 0);
-    auto filterHeight =
-        builder.create<tensor::DimOp>(op->getLoc(), conv2dNCHW.filter(), 2);
-    auto filterWidth =
-        builder.create<tensor::DimOp>(op->getLoc(), conv2dNCHW.filter(), 3);
-    // Output height
-    auto twicePaddingHeight =
-        builder.create<MulIOp>(op->getLoc(), paddingHeight, cI2);
-    auto heightPlusTwicePadding =
-        builder.create<SubIOp>(op->getLoc(), height, twicePaddingHeight);
-    auto filterHeightMinusOne =
-        builder.create<SubIOp>(op->getLoc(), filterHeight, cI1);
-    auto dilationFilterHeight = builder.create<MulIOp>(
-        op->getLoc(), dilationHeight, filterHeightMinusOne);
-    auto outHeightUnstridedPlusOne = builder.create<SubIOp>(
-        op->getLoc(), heightPlusTwicePadding, dilationFilterHeight);
-    auto outHeightUnstrided =
-        builder.create<SubIOp>(op->getLoc(), outHeightUnstridedPlusOne, cI1);
-    auto outHeightMinusOne = builder.create<UnsignedDivIOp>(
-        op->getLoc(), outHeightUnstrided, strideHeight);
-    auto outHeight =
-        builder.create<AddIOp>(op->getLoc(), outHeightMinusOne, cI1);
-    // Output width
-    auto twicePaddingWidth =
-        builder.create<MulIOp>(op->getLoc(), paddingWidth, cI2);
-    auto widthPlusTwicePadding =
-        builder.create<SubIOp>(op->getLoc(), width, twicePaddingWidth);
-    auto filterWidthMinusOne =
-        builder.create<SubIOp>(op->getLoc(), filterWidth, cI1);
-    auto dilationFilterWidth = builder.create<MulIOp>(
-        op->getLoc(), dilationWidth, filterWidthMinusOne);
-    auto outWidthUnstridedPlusOne = builder.create<SubIOp>(
-        op->getLoc(), widthPlusTwicePadding, dilationFilterWidth);
-    auto outWidthUnstrided =
-        builder.create<SubIOp>(op->getLoc(), outWidthUnstridedPlusOne, cI1);
-    auto outWidthMinusOne = builder.create<UnsignedDivIOp>(
-        op->getLoc(), outWidthUnstrided, strideWidth);
-    auto outWidth = builder.create<AddIOp>(op->getLoc(), outWidthMinusOne, cI1);
-    // Output shape
-    auto shape = builder.create<tensor::FromElementsOp>(
-        op->getLoc(),
-        ValueRange({batch, filterOutChannels, outHeight, outWidth}));
-    return {shape};
-  }
-
-  // No shape transfer function.
-  return {};
-}
-
-namespace {
-class ConvertMatmul : public OpRewritePattern<tcf::MatmulOp> {
-public:
-  using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(tcf::MatmulOp op,
-                                PatternRewriter &rewriter) const override {
-    // Create the constraints, and the assuming region.
-    Value lhsK = rewriter.create<tensor::DimOp>(op.getLoc(), op.lhs(), 1);
-    Value rhsK = rewriter.create<tensor::DimOp>(op.getLoc(), op.rhs(), 0);
-    Value matchingK =
-        rewriter.create<CmpIOp>(op.getLoc(), CmpIPredicate::eq, lhsK, rhsK);
-    Value witness = rewriter.create<shape::CstrRequireOp>(
-        op.getLoc(), matchingK, "mismatching contracting dimension for matmul");
-    auto assuming = rewriter.create<shape::AssumingOp>(
-        op.getLoc(), ArrayRef<Type>{op.getType()}, witness);
-
-    // Build the region body.
-    rewriter.createBlock(&assuming.doRegion());
-    // Create the init tensor for the matmul.
-    // TODO: Expand supported data types.
-    Value c0 =
-        rewriter.create<ConstantOp>(op.getLoc(), rewriter.getF32FloatAttr(0.0));
-    Value shape = bypassResultShapes(op, rewriter)[0];
-    Value initTensor =
-        rewriter.create<tcp::SplattedOp>(op.getLoc(), op.getType(), c0, shape);
-
-    // Create the matmul.
-    auto matmul = rewriter.create<linalg::MatmulOp>(
-        op.getLoc(), TypeRange(op.getType()), op.getOperands(),
-        ValueRange(initTensor));
-    rewriter.create<shape::AssumingYieldOp>(op.getLoc(), matmul.getResult(0));
-
-    // Finally, replace with the results of the shape.assuming
-    rewriter.replaceOp(op, assuming.getResults());
-    return success();
-  }
-};
-} // namespace
-
-namespace {
-class ConvertConvNCHW : public OpRewritePattern<tcf::ConvNCHWOp> {
-public:
-  using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(tcf::ConvNCHWOp op,
-                                PatternRewriter &rewriter) const override {
-    // Create the constraints, and the assuming region.
-    Value inputCin = rewriter.create<tensor::DimOp>(op.getLoc(), op.in(), 1);
-    Value inputH = rewriter.create<tensor::DimOp>(op.getLoc(), op.in(), 2);
-    Value inputW = rewriter.create<tensor::DimOp>(op.getLoc(), op.in(), 3);
-    Value filterCin =
-        rewriter.create<tensor::DimOp>(op.getLoc(), op.filter(), 1);
-    Value filterKH =
-        rewriter.create<tensor::DimOp>(op.getLoc(), op.filter(), 2);
-    Value filterKW =
-        rewriter.create<tensor::DimOp>(op.getLoc(), op.filter(), 3);
-    Value matchingCin = rewriter.create<CmpIOp>(op.getLoc(), CmpIPredicate::eq,
-                                                inputCin, filterCin);
-    Value validFilterH = rewriter.create<CmpIOp>(
-        op.getLoc(), CmpIPredicate::uge, inputH, filterKH);
-    Value validFilterW = rewriter.create<CmpIOp>(
-        op.getLoc(), CmpIPredicate::uge, inputW, filterKW);
-    Value witnessCin = rewriter.create<shape::CstrRequireOp>(
-        op.getLoc(), matchingCin, "input and filter in-channels must be equal");
-    Value witnessFilterH = rewriter.create<shape::CstrRequireOp>(
-        op.getLoc(), validFilterH,
-        "input height must be greater than or equal to filter KH-dimension");
-    Value witnessFilterW = rewriter.create<shape::CstrRequireOp>(
-        op.getLoc(), validFilterW,
-        "input width must be greater than or equal to filter KW-dimension");
-    Value assumingAll = rewriter.create<shape::AssumingAllOp>(
-        op.getLoc(), witnessCin.getType(),
-        ValueRange({witnessCin, witnessFilterH, witnessFilterW}));
-    auto assuming = rewriter.create<shape::AssumingOp>(
-        op.getLoc(), ArrayRef<Type>{op.getType()}, assumingAll);
-
-    // Build the region body.
-    rewriter.createBlock(&assuming.doRegion());
-    // Create the init tensor for the ConvNCHW.
-    // TODO: Expand supported data types.
-    Value c0 =
-        rewriter.create<ConstantOp>(op.getLoc(), rewriter.getF32FloatAttr(0.0));
-    Value shape = bypassResultShapes(op, rewriter)[0];
-    Value initTensor =
-        rewriter.create<tcp::SplattedOp>(op.getLoc(), op.getType(), c0, shape);
-
-    // Unit strides and dilations.
-    auto strides = rewriter.getI64VectorAttr({1, 1});
-    auto dilations = rewriter.getI64VectorAttr({1, 1});
-
-    // Create the ConvNCHW.
-    auto conv2dNCHW = rewriter.create<linalg::Conv2DNchwOp>(
-        op.getLoc(), TypeRange(op.getType()),
-        ValueRange({op.in(), op.filter()}), ValueRange(initTensor), strides,
-        dilations);
-    rewriter.create<shape::AssumingYieldOp>(op.getLoc(),
-                                            conv2dNCHW.getResults());
-
-    // Finally, replace with the results of the shape.assuming
-    rewriter.replaceOp(op, assuming.getResults());
-    return success();
-  }
-};
-} // namespace
-
-namespace {
-class ConvertTCFToLinalg : public ConvertTCFToLinalgBase<ConvertTCFToLinalg> {
-public:
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<shape::ShapeDialect, tcp::TCPDialect, tensor::TensorDialect,
-                    memref::MemRefDialect, linalg::LinalgDialect>();
-  }
-
-  void runOnOperation() override {
-    (void)applyPatternsAndFoldGreedily(getOperation(), getPatterns());
-  }
-
-  FrozenRewritePatternSet getPatterns() {
-    MLIRContext *context = &getContext();
-    RewritePatternSet patterns(context);
-    patterns.add<ConvertMatmul>(context);
-    patterns.add<ConvertConvNCHW>(context);
-    return std::move(patterns);
-  }
-};
-} // namespace
-
-std::unique_ptr<OperationPass<FuncOp>>
-mlir::NPCOMP::createConvertTCFToLinalgPass() {
-  return std::make_unique<ConvertTCFToLinalg>();
-}
--- a/lib/Conversion/TCFToStd/CMakeLists.txt
+++ b/lib/Conversion/TCFToStd/CMakeLists.txt
@ -1,21 +0,0 @@
-add_npcomp_conversion_library(NPCOMPTCFToStd
-  TCFToStd.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${PROJECT_SOURCE_DIR}/include/npcomp/Conversion/TCFToStd
-
-  DEPENDS
-  NPCOMPConversionPassIncGen
-
-  LINK_COMPONENTS
-  Core
-
-  LINK_LIBS PUBLIC
-  MLIRIR
-  MLIRPass
-  MLIRTransforms
-  MLIRShape
-  MLIRStandard
-  MLIRLinalg
-  NPCOMPTCFDialect
-)
--- a/lib/Conversion/TCFToStd/TCFToStd.cpp
+++ b/lib/Conversion/TCFToStd/TCFToStd.cpp
@ -1,162 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "npcomp/Conversion/TCFToStd/TCFToStd.h"
-
-#include "../PassDetail.h"
-#include "mlir/Dialect/Math/IR/Math.h"
-#include "mlir/Dialect/Shape/IR/Shape.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Traits.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "npcomp/Dialect/TCF/IR/TCFOps.h"
-#include "npcomp/Dialect/TCP/IR/TCPDialect.h"
-#include "npcomp/Dialect/TCP/IR/TCPOps.h"
-
-using namespace mlir;
-using namespace mlir::NPCOMP;
-
-static RankedTensorType getExtentTensorType(Builder &builder) {
-  return RankedTensorType::get({ShapedType::kDynamicSize},
-                               builder.getIndexType());
-}
-
-// Non-templated version of the body of ConvertBinaryElementwise to keep things
-// simple.
-static LogicalResult
-matchAndRewriteBinaryElementwise(Operation *op, PatternRewriter &rewriter) {
-  Value lhs = op->getOperand(0);
-  Value rhs = op->getOperand(1);
-  Location loc = op->getLoc();
-  Value result = op->getResult(0);
-
-  auto lhsType = lhs.getType().dyn_cast<RankedTensorType>();
-  auto rhsType = rhs.getType().dyn_cast<RankedTensorType>();
-  if (!lhsType || !rhsType)
-    return rewriter.notifyMatchFailure(op, "requires ranked tensors");
-
-  Value lhsShape = rewriter.create<shape::ShapeOfOp>(loc, lhs);
-  Value rhsShape = rewriter.create<shape::ShapeOfOp>(loc, rhs);
-
-  // Create the constraints, and the assuming region.
-  Value witness =
-      rewriter.create<shape::CstrBroadcastableOp>(loc, lhsShape, rhsShape);
-  auto assuming = rewriter.create<shape::AssumingOp>(
-      loc, ArrayRef<Type>{result.getType()}, witness);
-
-  // Start building the region body.
-  rewriter.createBlock(&assuming.doRegion());
-  Value broadcastedShape = rewriter.create<shape::BroadcastOp>(
-      loc, getExtentTensorType(rewriter), lhsShape, rhsShape,
-      /*error=*/nullptr);
-
-  // TODO: It's annoying to do the dynamic broadcast above then
-  // do the static transfer function here. Would be nice if they could
-  // somehow be unified.
-  SmallVector<int64_t, 6> broadcastedStaticShape;
-  OpTrait::util::getBroadcastedShape(lhsType.getShape(), rhsType.getShape(),
-                                     broadcastedStaticShape);
-  auto resultType =
-      RankedTensorType::get(broadcastedStaticShape, lhsType.getElementType());
-  Value lhsBroadcasted = rewriter.create<tcp::BroadcastToOp>(
-      loc, resultType, lhs, broadcastedShape);
-  Value rhsBroadcasted = rewriter.create<tcp::BroadcastToOp>(
-      loc, resultType, rhs, broadcastedShape);
-  Value binaryOpResult;
-  if (isa<tcf::AddOp>(op)) {
-    binaryOpResult = rewriter.create<AddFOp>(loc, result.getType(),
-                                             lhsBroadcasted, rhsBroadcasted);
-  } else if (isa<tcf::MaxOp>(op)) {
-    // XXX: remove TCP dep
-    // XXX: remove TCP ops from TCP
-    auto pred = rewriter.create<CmpFOp>(loc, CmpFPredicate::OGT, lhsBroadcasted,
-                                        rhsBroadcasted);
-    binaryOpResult =
-        rewriter.create<SelectOp>(loc, pred, lhsBroadcasted, rhsBroadcasted);
-  } else if (isa<tcf::MulOp>(op)) {
-    binaryOpResult = rewriter.create<MulFOp>(loc, result.getType(),
-                                             lhsBroadcasted, rhsBroadcasted);
-  } else {
-    op->dump();
-    llvm::report_fatal_error(
-        "unhandled op (see dump above): TCF->Std binary elementwise");
-  }
-  rewriter.create<shape::AssumingYieldOp>(loc, binaryOpResult);
-
-  // Finally, replace with the results of the shape.assuming
-  rewriter.replaceOp(op, assuming.getResults());
-  return success();
-}
-
-namespace {
-template <typename SourceOp>
-class ConvertBinaryElementwise : public OpRewritePattern<SourceOp> {
-public:
-  using OpRewritePattern<SourceOp>::OpRewritePattern;
-  LogicalResult matchAndRewrite(SourceOp op,
-                                PatternRewriter &rewriter) const override {
-    return matchAndRewriteBinaryElementwise(op, rewriter);
-  }
-};
-} // namespace
-
-static LogicalResult
-matchAndRewriteUnaryElementwise(Operation *op, PatternRewriter &rewriter) {
-  if (isa<tcf::ExpOp>(op)) {
-    rewriter.replaceOpWithNewOp<math::ExpOp>(op, op->getOperand(0));
-  } else if (isa<tcf::TanhOp>(op)) {
-    rewriter.replaceOpWithNewOp<math::TanhOp>(op, op->getOperand(0));
-  } else {
-    op->dump();
-    llvm::report_fatal_error(
-        "unhandled op (see dump above): TCF->TCP unary elementwise");
-  }
-  return success();
-}
-
-namespace {
-template <typename SourceOp>
-class ConvertUnaryElementwise : public OpRewritePattern<SourceOp> {
-public:
-  using OpRewritePattern<SourceOp>::OpRewritePattern;
-  LogicalResult matchAndRewrite(SourceOp op,
-                                PatternRewriter &rewriter) const override {
-    return matchAndRewriteUnaryElementwise(op, rewriter);
-  }
-};
-} // namespace
-
-namespace {
-class ConvertTCFToStd : public ConvertTCFToStdBase<ConvertTCFToStd> {
-public:
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<math::MathDialect, shape::ShapeDialect, tcp::TCPDialect>();
-  }
-
-  void runOnOperation() override {
-    (void)applyPatternsAndFoldGreedily(getOperation(), getPatterns());
-  }
-
-  FrozenRewritePatternSet getPatterns() {
-    MLIRContext *context = &getContext();
-    RewritePatternSet patterns(context);
-    patterns.add<ConvertUnaryElementwise<tcf::ExpOp>,
-                 ConvertUnaryElementwise<tcf::TanhOp>>(context);
-    patterns.add<ConvertBinaryElementwise<tcf::AddOp>,
-                 ConvertBinaryElementwise<tcf::MaxOp>,
-                 ConvertBinaryElementwise<tcf::MulOp>>(context);
-    return std::move(patterns);
-  }
-};
-} // namespace
-
-std::unique_ptr<OperationPass<FuncOp>>
-mlir::NPCOMP::createConvertTCFToStdPass() {
-  return std::make_unique<ConvertTCFToStd>();
-}
--- a/lib/Conversion/TCFToTCP/CMakeLists.txt
+++ b/lib/Conversion/TCFToTCP/CMakeLists.txt
@ -1,20 +0,0 @@
-add_npcomp_conversion_library(NPCOMPTCFToTCP
-  TCFToTCP.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${PROJECT_SOURCE_DIR}/include/npcomp/Conversion/TCFToTCP
-
-  DEPENDS
-  NPCOMPConversionPassIncGen
-
-  LINK_COMPONENTS
-  Core
-
-  LINK_LIBS PUBLIC
-  MLIRIR
-  MLIRPass
-  MLIRTransforms
-  MLIRShape
-  NPCOMPTCFDialect
-  NPCOMPTCPDialect
-)
--- a/lib/Conversion/TCFToTCP/TCFToTCP.cpp
+++ b/lib/Conversion/TCFToTCP/TCFToTCP.cpp
@ -1,48 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "npcomp/Conversion/TCFToTCP/TCFToTCP.h"
-
-#include "../PassDetail.h"
-#include "mlir/Dialect/Shape/IR/Shape.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Traits.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "npcomp/Dialect/TCF/IR/TCFOps.h"
-#include "npcomp/Dialect/TCP/IR/TCPDialect.h"
-#include "npcomp/Dialect/TCP/IR/TCPOps.h"
-
-using namespace mlir;
-using namespace mlir::NPCOMP;
-
-namespace {
-class ConvertTCFToTCP : public ConvertTCFToTCPBase<ConvertTCFToTCP> {
-public:
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<shape::ShapeDialect, tcp::TCPDialect>();
-  }
-
-  void runOnOperation() override {
-    (void)applyPatternsAndFoldGreedily(getOperation(), getPatterns());
-  }
-
-  FrozenRewritePatternSet getPatterns() {
-    // NOTE: We are keeping this pass around, even though it currently does
-    // nothing, in order to avoid having to reintroduce the same
-    // boilerplate.
-    RewritePatternSet patterns(getOperation().getContext());
-    return std::move(patterns);
-  }
-};
-} // namespace
-
-std::unique_ptr<OperationPass<FuncOp>>
-mlir::NPCOMP::createConvertTCFToTCPPass() {
-  return std::make_unique<ConvertTCFToTCP>();
-}
--- a/lib/Dialect/CMakeLists.txt
+++ b/lib/Dialect/CMakeLists.txt
@ -2,6 +2,4 @@ add_subdirectory(Basicpy)
 add_subdirectory(Numpy)
 add_subdirectory(Refback)
 add_subdirectory(Refbackrt)
-add_subdirectory(TCF)
-add_subdirectory(TCP)
 add_subdirectory(Torch)
--- a/lib/Dialect/TCF/CMakeLists.txt
+++ b/lib/Dialect/TCF/CMakeLists.txt
@ -1,2 +0,0 @@
-add_subdirectory(IR)
-add_subdirectory(Transforms)
--- a/lib/Dialect/TCF/IR/CMakeLists.txt
+++ b/lib/Dialect/TCF/IR/CMakeLists.txt
@ -1,17 +0,0 @@
-add_npcomp_dialect_library(NPCOMPTCFDialect
-  TCFDialect.cpp
-  TCFOps.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${PROJECT_SOURCE_DIR}/include/npcomp/Dialect/TCF
-
-  DEPENDS
-  MLIRTCFOpsIncGen
-
-  LINK_COMPONENTS
-  Core
-
-  LINK_LIBS PUBLIC
-  MLIRIR
-  MLIRSupport
-)
--- a/lib/Dialect/TCF/IR/TCFDialect.cpp
+++ b/lib/Dialect/TCF/IR/TCFDialect.cpp
@ -1,22 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "npcomp/Dialect/TCF/IR/TCFDialect.h"
-#include "npcomp/Dialect/TCF/IR/TCFOps.h"
-
-using namespace mlir;
-using namespace mlir::NPCOMP::tcf;
-
-#include "npcomp/Dialect/TCF/IR/TCFOpsDialect.cpp.inc"
-
-void TCFDialect::initialize() {
-  addOperations<
-#define GET_OP_LIST
-#include "npcomp/Dialect/TCF/IR/TCFOps.cpp.inc"
-      >();
-}
--- a/lib/Dialect/TCF/IR/TCFOps.cpp
+++ b/lib/Dialect/TCF/IR/TCFOps.cpp
@ -1,15 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "npcomp/Dialect/TCF/IR/TCFOps.h"
-
-using namespace mlir;
-using namespace mlir::NPCOMP::tcf;
-
-#define GET_OP_CLASSES
-#include "npcomp/Dialect/TCF/IR/TCFOps.cpp.inc"
--- a/lib/Dialect/TCF/Transforms/CMakeLists.txt
+++ b/lib/Dialect/TCF/Transforms/CMakeLists.txt
@ -1,18 +0,0 @@
-add_npcomp_conversion_library(NPCOMPTCFPasses
-  Passes.cpp
-  ShapeRefinement.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${PROJECT_SOURCE_DIR}/include/npcomp/Dialect/TCF/Transforms
-
-  DEPENDS
-  NPCOMPTCFPassIncGen
-
-  LINK_COMPONENTS
-  Core
-
-  LINK_LIBS PUBLIC
-  MLIRIR
-  MLIRPass
-  NPCOMPTCFDialect
-)
--- a/lib/Dialect/TCF/Transforms/PassDetail.h
+++ b/lib/Dialect/TCF/Transforms/PassDetail.h
@ -1,25 +0,0 @@
-//===- PassDetail.h - Pass details ------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_DIALECT_TCF_TRANSFORMS_PASSDETAIL_H
-#define NPCOMP_DIALECT_TCF_TRANSFORMS_PASSDETAIL_H
-
-#include "mlir/Pass/Pass.h"
-
-namespace mlir {
-namespace NPCOMP {
-namespace tcf {
-
-#define GEN_PASS_CLASSES
-#include "npcomp/Dialect/TCF/Transforms/Passes.h.inc"
-
-} // namespace tcf
-} // namespace NPCOMP
-} // end namespace mlir
-
-#endif // NPCOMP_DIALECT_TCF_TRANSFORMS_PASSDETAIL_H
--- a/lib/Dialect/TCF/Transforms/Passes.cpp
+++ b/lib/Dialect/TCF/Transforms/Passes.cpp
@ -1,20 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "npcomp/Dialect/TCF/Transforms/Passes.h"
-
-//===----------------------------------------------------------------------===//
-// Pass registration
-//===----------------------------------------------------------------------===//
-
-namespace {
-#define GEN_PASS_REGISTRATION
-#include "npcomp/Dialect/TCF/Transforms/Passes.h.inc"
-} // end namespace
-
-void mlir::NPCOMP::registerTCFPasses() { ::registerPasses(); }
--- a/lib/Dialect/TCF/Transforms/ShapeRefinement.cpp
+++ b/lib/Dialect/TCF/Transforms/ShapeRefinement.cpp
@ -1,65 +0,0 @@
-//===- ShapeRefinement.cpp - Shape refinement pass ---------------*- C++-*-===//
-//
-// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "PassDetail.h"
-
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "npcomp/Dialect/TCF/IR/TCFDialect.h"
-#include "npcomp/Dialect/TCF/IR/TCFOps.h"
-#include "npcomp/Dialect/TCF/Transforms/Passes.h"
-
-using namespace mlir;
-using namespace mlir::NPCOMP;
-using namespace mlir::NPCOMP::tcf;
-
-namespace {
-
-class ShapeRefinementPass : public TCFShapeRefinementBase<ShapeRefinementPass> {
-  void runOnOperation() override {
-    auto func = getOperation();
-    // TODO: Implement for real.
-    func.walk([](tcf::AddOp addOp) {
-      auto lhsType = addOp.lhs().getType();
-      auto rhsType = addOp.rhs().getType();
-      if (lhsType == rhsType) {
-        addOp.result().setType(lhsType);
-      }
-    });
-
-    // If the change cascaded to any returns, need to update the function
-    // signature.
-    Optional<ReturnOp> firstReturnOp;
-    func.walk([&](ReturnOp returnOp) {
-      if (!firstReturnOp) {
-        firstReturnOp = returnOp;
-      } else {
-        if (returnOp.getOperandTypes() != firstReturnOp->getOperandTypes()) {
-          returnOp.emitError() << "after refining shapes, different "
-                                  "terminators have different types";
-          signalPassFailure();
-        }
-      }
-    });
-
-    assert(firstReturnOp && "function lacks a terminator");
-    auto funcType = func.getType();
-    SmallVector<Type, 4> resultTypes(firstReturnOp->getOperandTypes().begin(),
-                                     firstReturnOp->getOperandTypes().end());
-    func.setType(FunctionType::get(funcType.getContext(), funcType.getInputs(),
-                                   resultTypes));
-  }
-};
-
-} // namespace
-
-std::unique_ptr<OperationPass<FuncOp>>
-mlir::NPCOMP::tcf::createShapeRefinementPass() {
-  return std::make_unique<ShapeRefinementPass>();
-}
--- a/lib/Dialect/TCP/CMakeLists.txt
+++ b/lib/Dialect/TCP/CMakeLists.txt
@ -1,2 +0,0 @@
-add_subdirectory(IR)
-add_subdirectory(Transforms)
--- a/lib/Dialect/TCP/IR/CMakeLists.txt
+++ b/lib/Dialect/TCP/IR/CMakeLists.txt
@ -1,19 +0,0 @@
-add_npcomp_dialect_library(NPCOMPTCPDialect
-  TCPDialect.cpp
-  TCPOps.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${PROJECT_SOURCE_DIR}/include/npcomp/Dialect/TCP
-
-  DEPENDS
-  MLIRTCPOpsIncGen
-
-  LINK_COMPONENTS
-  Core
-
-  LINK_LIBS PUBLIC
-  MLIRIR
-  MLIRSupport
-  MLIRSideEffectInterfaces
-  MLIRShape
-  )
--- a/lib/Dialect/TCP/IR/TCPDialect.cpp
+++ b/lib/Dialect/TCP/IR/TCPDialect.cpp
@ -1,42 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "npcomp/Dialect/TCP/IR/TCPDialect.h"
-#include "mlir/Transforms/InliningUtils.h"
-#include "npcomp/Dialect/TCP/IR/TCPOps.h"
-
-using namespace mlir;
-using namespace mlir::NPCOMP::tcp;
-
-#include "npcomp/Dialect/TCP/IR/TCPOpsDialect.cpp.inc"
-
-//===----------------------------------------------------------------------===//
-// TCPDialect Dialect Interfaces
-//===----------------------------------------------------------------------===//
-
-namespace {
-struct TCPInlinerInterface : public DialectInlinerInterface {
-  using DialectInlinerInterface::DialectInlinerInterface;
-  bool isLegalToInline(Region *dest, Region *src, bool wouldBeCloned,
-                       BlockAndValueMapping &valueMapping) const final {
-    return true;
-  }
-  bool isLegalToInline(Operation *, Region *, bool wouldBeCloned,
-                       BlockAndValueMapping &) const final {
-    return true;
-  }
-};
-} // end anonymous namespace
-
-void TCPDialect::initialize() {
-  addOperations<
-#define GET_OP_LIST
-#include "npcomp/Dialect/TCP/IR/TCPOps.cpp.inc"
-      >();
-  addInterfaces<TCPInlinerInterface>();
-}
--- a/lib/Dialect/TCP/IR/TCPOps.cpp
+++ b/lib/Dialect/TCP/IR/TCPOps.cpp
@ -1,19 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "npcomp/Dialect/TCP/IR/TCPOps.h"
-#include "mlir/Dialect/Shape/IR/Shape.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "llvm/ADT/STLExtras.h"
-
-using namespace mlir;
-using namespace mlir::NPCOMP;
-using namespace mlir::NPCOMP::tcp;
-
-#define GET_OP_CLASSES
-#include "npcomp/Dialect/TCP/IR/TCPOps.cpp.inc"
--- a/lib/Dialect/TCP/Transforms/Bufferize.cpp
+++ b/lib/Dialect/TCP/Transforms/Bufferize.cpp
@ -1,270 +0,0 @@
-//===- Bufferize.cpp - Bufferization for TCP dialect -------------*- C++-*-===//
-//
-// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "PassDetail.h"
-
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tensor/IR/Tensor.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Transforms/Bufferize.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "npcomp/Dialect/Refback/IR/RefbackDialect.h"
-#include "npcomp/Dialect/Refback/IR/RefbackOps.h"
-#include "npcomp/Dialect/TCP/IR/TCPDialect.h"
-#include "npcomp/Dialect/TCP/IR/TCPOps.h"
-#include "npcomp/Dialect/TCP/Transforms/Passes.h"
-
-using namespace mlir;
-using namespace mlir::NPCOMP;
-
-// TODO: Don't just open-code all shape transfer functions here.
-static SmallVector<Value, 6> bypassResultShapes(Operation &op) {
-  OpBuilder builder(&op);
-
-  if (auto broadcastTo = dyn_cast<tcp::BroadcastToOp>(op)) {
-    return {broadcastTo.shape()};
-  }
-
-  if (auto splatted = dyn_cast<tcp::SplattedOp>(op)) {
-    return {splatted.shape()};
-  }
-
-  if (auto pad = dyn_cast<tcp::PadOp>(op)) {
-    SmallVector<Value, 6> outDims;
-    auto inputType = pad.operand().getType().cast<RankedTensorType>();
-    for (int i = 0, e = inputType.getRank(); i < e; i++) {
-      auto dimIndex = builder.create<ConstantIndexOp>(op.getLoc(), i);
-      auto lowerExpansion =
-        builder.create<tensor::ExtractOp>(op.getLoc(), pad.lowerExpansion(),
-            ValueRange({dimIndex}));
-      auto upperExpansion =
-        builder.create<tensor::ExtractOp>(op.getLoc(), pad.upperExpansion(),
-            ValueRange({dimIndex}));
-      auto operandDim =
-          builder.create<tensor::DimOp>(op.getLoc(), pad.operand(), i);
-      auto totalExpansion =
-        builder.create<AddIOp>(op.getLoc(), lowerExpansion, upperExpansion);
-      auto outDim =
-        builder.create<AddIOp>(op.getLoc(), totalExpansion, operandDim);
-      outDims.push_back(outDim);
-    }
-    Value outDimTensor = builder.create<tensor::FromElementsOp>(op.getLoc(), ValueRange(outDims));
-    return {outDimTensor};
-  }
-
-  // No shape transfer function.
-  return {};
-}
-
-static FailureOr<SmallVector<Value, 6>>
-allocateResults(Operation *op, ConversionPatternRewriter &rewriter,
-                Location loc,
-                SmallVectorImpl<Value> *resultShapesOut = nullptr) {
-  auto resultShapes = bypassResultShapes(*op);
-  SmallVector<Value, 6> results;
-  for (auto t : llvm::zip(op->getResults(), resultShapes)) {
-    auto result = std::get<0>(t);
-    auto resultShape = std::get<1>(t);
-    auto tensorType = result.getType().cast<RankedTensorType>();
-    auto memrefType =
-        MemRefType::get(tensorType.getShape(), tensorType.getElementType());
-    auto memref =
-        rewriter.create<refback::AllocMemRefOp>(loc, memrefType, resultShape);
-    results.push_back(memref);
-  }
-  if (resultShapesOut)
-    resultShapesOut->append(resultShapes.begin(), resultShapes.end());
-  return results;
-}
-
-namespace {
-// TODO: Lower to a "buffer version" of tcp::BroadcastTo instead of directly to
-// loops.
-class LowerBroadcastToToLoopsPattern
-    : public OpConversionPattern<tcp::BroadcastToOp> {
-public:
-  using OpConversionPattern::OpConversionPattern;
-  LogicalResult
-  matchAndRewrite(tcp::BroadcastToOp op, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto resultType = op.getType().cast<RankedTensorType>();
-    auto inputType = op.operand().getType().cast<RankedTensorType>();
-    SmallVector<Value, 6> resultShapes;
-    auto resultsOrFailure =
-        allocateResults(op, rewriter, op.getLoc(), &resultShapes);
-    if (failed(resultsOrFailure))
-      return failure();
-    Value resultMemref = (*resultsOrFailure)[0];
-    auto resultShape = resultShapes[0];
-    Value inputMemref = operands[0];
-
-    SmallVector<Value, 6> outputExtents;
-    for (int i = 0, e = resultType.getRank(); i < e; i++) {
-      Value dimIndex = rewriter.create<ConstantIndexOp>(op.getLoc(), i);
-      Value outputExtent = rewriter.create<tensor::ExtractOp>(
-          op.getLoc(), resultShape, ValueRange({dimIndex}));
-      outputExtents.push_back(outputExtent);
-    }
-    int rankDiff = resultType.getRank() - inputType.getRank();
-    SmallVector<Value, 6> inputDimRequiresBroadcasting;
-    for (int i = 0, e = inputType.getRank(); i < e; i++) {
-      // Calculate the relevant extents.
-      Value inputExtent =
-          rewriter.create<tensor::DimOp>(op.getLoc(), op.operand(), i);
-      inputDimRequiresBroadcasting.push_back(
-          rewriter.create<CmpIOp>(op.getLoc(), CmpIPredicate::ne, inputExtent,
-                                  outputExtents[rankDiff + i]));
-    }
-
-    {
-      OpBuilder::InsertionGuard guard(rewriter);
-      Value c0 = rewriter.create<ConstantIndexOp>(op.getLoc(), 0);
-      Value c1 = rewriter.create<ConstantIndexOp>(op.getLoc(), 1);
-
-      SmallVector<Value, 6> inductionVariables;
-      // Create the (perfectly nested) loops.
-      // Loop invariant: At the start of iteration `i`, the rewriter insertion
-      // point is inside `i` nested loops.
-      for (int i = 0, e = resultType.getRank(); i < e; i++) {
-        auto loop = rewriter.create<scf::ForOp>(
-            op.getLoc(), c0, outputExtents[i], c1, ValueRange({}));
-        Block *body = loop.getBody();
-        inductionVariables.push_back(body->getArgument(0));
-        // Leave the insertion point at the beginning of the body.
-        rewriter.setInsertionPointToStart(body);
-      }
-
-      // Create the inner loop body.
-      // When reading from the input, clamp any indices for dimensions that are
-      // being broadcast.
-      SmallVector<Value, 6> inputIndices;
-      for (int i = 0, e = inputType.getRank(); i < e; i++) {
-        auto c0 = rewriter.create<ConstantIndexOp>(op.getLoc(), 0);
-        auto select = rewriter.create<SelectOp>(
-            op.getLoc(), inputDimRequiresBroadcasting[i], c0,
-            inductionVariables[rankDiff + i]);
-        inputIndices.push_back(select);
-      }
-      Value load = rewriter.create<memref::LoadOp>(op.getLoc(), inputMemref,
-                                                   inputIndices);
-      rewriter.create<memref::StoreOp>(op.getLoc(), load, resultMemref,
-                                       inductionVariables);
-    }
-    rewriter.replaceOp(op, resultMemref);
-    return success();
-  }
-};
-} // namespace
-
-namespace {
-class BufferizeSplattedOp : public OpConversionPattern<tcp::SplattedOp> {
-public:
-  using OpConversionPattern::OpConversionPattern;
-  LogicalResult
-  matchAndRewrite(tcp::SplattedOp op, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto resultsOrFailure = allocateResults(op, rewriter, op.getLoc());
-    if (failed(resultsOrFailure))
-      return failure();
-    auto results = *resultsOrFailure;
-    rewriter.create<linalg::FillOp>(op.getLoc(), op.splatVal(), results[0]);
-    rewriter.replaceOp(op, results);
-    return success();
-  }
-};
-} // namespace
-
-namespace {
-class BufferizePadOp : public OpConversionPattern<tcp::PadOp> {
-public:
-  using OpConversionPattern::OpConversionPattern;
-  LogicalResult
-  matchAndRewrite(tcp::PadOp op, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto resultsOrFailure = allocateResults(op, rewriter, op.getLoc());
-    if (failed(resultsOrFailure))
-      return failure();
-    auto results = *resultsOrFailure;
-    auto c1 =
-      rewriter.create<ConstantOp>(op.getLoc(), rewriter.getIntegerAttr(
-            rewriter.getIndexType(), 1));
-    SmallVector<Value, 6> offsets, sizes, strides;
-    auto resultType = op.getType().cast<RankedTensorType>();
-    for (int i = 0, e = resultType.getRank(); i < e; i++) {
-      auto dimIndex = rewriter.create<ConstantIndexOp>(op.getLoc(), i);
-      auto offset =
-        rewriter.create<tensor::ExtractOp>(op.getLoc(), op.lowerExpansion(),
-            ValueRange({dimIndex}));
-      auto size = rewriter.create<tensor::DimOp>(op.getLoc(), op.operand(), i);
-      auto stride   = c1;
-      offsets.push_back(offset);
-      sizes.push_back(size);
-      strides.push_back(stride);
-    }
-    rewriter.create<linalg::FillOp>(op.getLoc(), op.fillVal(), results[0]);
-    auto unpadded = rewriter.create<memref::SubViewOp>(
-        op.getLoc(), results[0], ValueRange(offsets), ValueRange(sizes),
-        ValueRange(strides));
-    auto inputMemref = operands[0];
-    rewriter.create<linalg::CopyOp>(op.getLoc(), inputMemref, unpadded);
-    rewriter.replaceOp(op, results);
-    return success();
-  }
-};
-} // namespace
-
-namespace {
-class TCPBufferizePass : public TCPBufferizeBase<TCPBufferizePass> {
-  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
-    registry.insert<refback::RefbackDialect>();
-    registry.insert<memref::MemRefDialect>();
-    registry.insert<linalg::LinalgDialect>();
-    registry.insert<scf::SCFDialect>();
-  }
-
-  void runOnOperation() override {
-    auto func = getOperation();
-    auto *context = &getContext();
-
-    BufferizeTypeConverter typeConverter;
-
-    RewritePatternSet patterns(context);
-
-    ConversionTarget target(*context);
-
-    // All lowering to buffers involves refback.alloc_memref ops.
-    // TODO: This makes the tests cleaner, but otherwise isn't too essential as
-    // we can just open-code the extents for the alloc.
-    target.addLegalOp<refback::AllocMemRefOp>();
-
-    patterns.add<LowerBroadcastToToLoopsPattern>(typeConverter, context);
-    target.addIllegalOp<tcp::BroadcastToOp>();
-    patterns.add<BufferizeSplattedOp>(typeConverter, context);
-    target.addIllegalOp<tcp::SplattedOp>();
-    patterns.add<BufferizePadOp>(typeConverter, context);
-    target.addIllegalOp<tcp::PadOp>();
-
-    target.addLegalDialect<linalg::LinalgDialect>();
-    target.addLegalDialect<StandardOpsDialect>();
-    target.addLegalDialect<scf::SCFDialect>();
-    target.addLegalDialect<tensor::TensorDialect>();
-    target.addLegalDialect<memref::MemRefDialect>();
-
-    if (failed(applyPartialConversion(func, target, std::move(patterns))))
-      return signalPassFailure();
-  }
-};
-} // namespace
-
-std::unique_ptr<OperationPass<FuncOp>> mlir::NPCOMP::createTCPBufferizePass() {
-  return std::make_unique<TCPBufferizePass>();
-}
--- a/lib/Dialect/TCP/Transforms/CMakeLists.txt
+++ b/lib/Dialect/TCP/Transforms/CMakeLists.txt
@ -1,18 +0,0 @@
-add_npcomp_conversion_library(NPCOMPTCPPasses
-  Passes.cpp
-  Bufferize.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${PROJECT_SOURCE_DIR}/include/npcomp/Dialect/TCP/Transforms
-
-  DEPENDS
-  NPCOMPTCPPassIncGen
-
-  LINK_COMPONENTS
-  Core
-
-  LINK_LIBS PUBLIC
-  MLIRIR
-  MLIRPass
-  NPCOMPTCPDialect
-)
--- a/lib/Dialect/TCP/Transforms/PassDetail.h
+++ b/lib/Dialect/TCP/Transforms/PassDetail.h
@ -1,23 +0,0 @@
-//===- PassDetail.h - Pass details ------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_DIALECT_TCP_TRANSFORMS_PASSDETAIL_H
-#define NPCOMP_DIALECT_TCP_TRANSFORMS_PASSDETAIL_H
-
-#include "mlir/Pass/Pass.h"
-
-namespace mlir {
-namespace NPCOMP {
-
-#define GEN_PASS_CLASSES
-#include "npcomp/Dialect/TCP/Transforms/Passes.h.inc"
-
-} // namespace NPCOMP
-} // end namespace mlir
-
-#endif // NPCOMP_DIALECT_TCP_TRANSFORMS_PASSDETAIL_H
--- a/lib/Dialect/TCP/Transforms/Passes.cpp
+++ b/lib/Dialect/TCP/Transforms/Passes.cpp
@ -1,20 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "npcomp/Dialect/TCP/Transforms/Passes.h"
-
-//===----------------------------------------------------------------------===//
-// Pass registration
-//===----------------------------------------------------------------------===//
-
-namespace {
-#define GEN_PASS_REGISTRATION
-#include "npcomp/Dialect/TCP/Transforms/Passes.h.inc"
-} // end namespace
-
-void mlir::NPCOMP::registerTCPPasses() { ::registerPasses(); }
--- a/lib/InitAll.cpp
+++ b/lib/InitAll.cpp
@ -18,10 +18,6 @@
 #include "npcomp/Dialect/Numpy/Transforms/Passes.h"
 #include "npcomp/Dialect/Refback/IR/RefbackDialect.h"
 #include "npcomp/Dialect/Refbackrt/IR/RefbackrtDialect.h"
-#include "npcomp/Dialect/TCF/IR/TCFDialect.h"
-#include "npcomp/Dialect/TCF/Transforms/Passes.h"
-#include "npcomp/Dialect/TCP/IR/TCPDialect.h"
-#include "npcomp/Dialect/TCP/Transforms/Passes.h"
 #include "npcomp/Dialect/Torch/IR/TorchDialect.h"
 #include "npcomp/Dialect/Torch/Transforms/Passes.h"
 #include "npcomp/RefBackend/RefBackend.h"
@ -33,8 +29,6 @@ void mlir::NPCOMP::registerAllDialects(mlir::DialectRegistry &registry) {
                  Numpy::NumpyDialect,
                  refbackrt::RefbackrtDialect,
                  refback::RefbackDialect,
-                  tcf::TCFDialect,
-                  tcp::TCPDialect,
                  mlir::NPCOMP::Torch::TorchDialect>();
  // clang-format on
 }
@ -44,8 +38,6 @@ void mlir::NPCOMP::registerAllPasses() {
  mlir::NPCOMP::registerConversionPasses();
  mlir::NPCOMP::registerBasicpyPasses();
  mlir::NPCOMP::registerNumpyPasses();
-  mlir::NPCOMP::registerTCFPasses();
-  mlir::NPCOMP::registerTCPPasses();
  mlir::NPCOMP::registerTorchPasses();
  mlir::NPCOMP::registerTypingPasses();
  mlir::NPCOMP::IREEBackend::registerIREEBackendPasses();
--- a/lib/RefBackend/JITHelpers/JITModule.cpp
+++ b/lib/RefBackend/JITHelpers/JITModule.cpp
@ -33,7 +33,7 @@ void JITModule::buildBackendCompilationPipeline(PassManager &pm,
                                                bool optimize) {
  NPCOMP::RefBackendLoweringPipelineOptions options;
  options.optimize = optimize;
-  NPCOMP::createTCFRefBackendLoweringPipeline(pm, options);
+  NPCOMP::createRefBackendLoweringPipeline(pm, options);
 }

 llvm::Expected<std::unique_ptr<JITModule>>
--- a/lib/RefBackend/RefBackend.cpp
+++ b/lib/RefBackend/RefBackend.cpp
@ -8,18 +8,13 @@
 //
 // This is the base file for npcomp's "reference backend".
 //
-// The input to this backend is a layer that we call "TCP" + a mix of scalar
-// ops. TCP is currently a concrete dialect, but more generally it refers to a
-// layer of the compilation stack consisting of named ops on entire tensors,
-// with their preconditions checked. For example, a "matmul" op that assumes
-// that the contracting ("k") dimensions of both operands are equal. Earlier
-// code in the compilation stack should ensure that these preconditions are met
-// (such as during TCF->TCP lowering).
+// The input to this backend is a layer that consists of linalg-on-tensors
+// together with std scalar ops and control flow.
 //
 // The output of this backend is LLVM IR suitable for JITing.
 //
 // We expect that other backends will appear that have a similar kind of
-// interface (TCP + scalar ops ---> LLVM IR / other "executable").
+// interface. IREE already uses this layering.
 //
 //===----------------------------------------------------------------------===//

@ -43,13 +38,7 @@
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "mlir/Transforms/Passes.h"
-#include "npcomp/Conversion/TCFToLinalg/TCFToLinalg.h"
-#include "npcomp/Conversion/TCFToStd/TCFToStd.h"
-#include "npcomp/Conversion/TCFToTCP/TCFToTCP.h"
 #include "npcomp/Dialect/Refback/IR/RefbackOps.h"
-#include "npcomp/Dialect/TCP/IR/TCPDialect.h"
-#include "npcomp/Dialect/TCP/IR/TCPOps.h"
-#include "npcomp/Dialect/TCP/Transforms/Passes.h"

 using namespace mlir;
 using namespace mlir::NPCOMP;
@ -69,18 +58,6 @@ void mlir::NPCOMP::registerRefBackendPasses() {
  mlir::PassPipelineRegistration<RefBackendLoweringPipelineOptions>(
      "refback-lowering-pipeline", "RefBackend lowering pipeline.",
      mlir::NPCOMP::createRefBackendLoweringPipeline);
-  // TODO: Move this out of RefBackend once the TCF->TCP conversions
-  // become more substantial.
-  mlir::PassPipelineRegistration<RefBackendLoweringPipelineOptions>(
-      "refback-tcf-to-tcp-pipeline",
-      "RefBackend lowering pipeline converting TCF ops to TCP-level ops (not "
-      "just TCP dialect).",
-      mlir::NPCOMP::createRefBackendTCFToTCPPipeline);
-  mlir::PassPipelineRegistration<RefBackendLoweringPipelineOptions>(
-      "tcf-refback-lowering-pipeline",
-      "RefBackend lowering pipeline, starting from TCF. (equivalent to "
-      "refback-tcf-to-tcp-pipeline + refback-lowering-pipeline)",
-      mlir::NPCOMP::createTCFRefBackendLoweringPipeline);
 }

 //===----------------------------------------------------------------------===//
@ -245,7 +222,6 @@ void mlir::NPCOMP::createRefBackendLoweringPipeline(
  // so we try to bracket the entire bufferization pipeline with the module
  // passes to allow maximum parallelism.
  pm.addPass(createTensorConstantBufferizePass());
-  pm.addNestedPass<FuncOp>(createTCPBufferizePass());
  // refback::AllocMemRefOp takes a shape (i.e. extent tensor) as an argument.
  // We need to resolve this to std.alloc which takes individual extents.
  pm.addNestedPass<FuncOp>(createLowerAllocMemRefOpsPass());
@ -308,31 +284,3 @@ void mlir::NPCOMP::createRefBackendLoweringPipeline(
    pm.addNestedPass<FuncOp>(createCSEPass());
  }
 }
-
-void mlir::NPCOMP::createRefBackendTCFToTCPPipeline(
-    OpPassManager &pm, const RefBackendLoweringPipelineOptions &options) {
-  // Convert from TCF dialect to TCP-level ops.
-  //
-  // TCF has implicit broadcasting, and issues errors "inside the ops" in the
-  // case of invalid broadcasts.
-  //
-  // TCP-level ops do not. So we need to reify the broadcasting and error
-  // checking.
-  //
-  // Note that TCP-level ops includes ops outside the TCP dialect itself, such
-  // as std elementwise ops on tensors and linalg ops on tensors.
-  pm.addNestedPass<FuncOp>(createConvertTCFToStdPass());
-  pm.addNestedPass<FuncOp>(createConvertTCFToLinalgPass());
-  pm.addNestedPass<FuncOp>(createConvertTCFToTCPPass());
-
-  if (options.optimize) {
-    pm.addNestedPass<FuncOp>(createCanonicalizerPass());
-    pm.addNestedPass<FuncOp>(createCSEPass());
-  }
-}
-
-void mlir::NPCOMP::createTCFRefBackendLoweringPipeline(
-    OpPassManager &pm, const RefBackendLoweringPipelineOptions &options) {
-  createRefBackendTCFToTCPPipeline(pm, options);
-  createRefBackendLoweringPipeline(pm, options);
-}
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@ -95,13 +95,6 @@ declare_mlir_dialect_python_bindings(
  SOURCES npcomp/dialects/numpy.py
  DIALECT_NAME numpy)

-declare_mlir_dialect_python_bindings(
-  ADD_TO_PARENT NPCOMPPythonSources.Dialects
-  ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}"
-  TD_FILE npcomp/dialects/TCFBind.td
-  SOURCES npcomp/dialects/tcf.py
-  DIALECT_NAME tcf)
-
 declare_mlir_dialect_python_bindings(
  ADD_TO_PARENT NPCOMPPythonSources.Dialects
  ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}"
@ -157,4 +150,3 @@ add_mlir_python_modules(NPCOMPPythonModules
  COMMON_CAPI_LINK_LIBS
    NPCOMPPythonCAPI
 )
-
--- a/python/npcomp/compiler/generic/backend/refjit.py
+++ b/python/npcomp/compiler/generic/backend/refjit.py
@ -10,7 +10,6 @@ _refjit = None
 BACKEND_PASSES = (
    "builtin.func(convert-scf-to-std)",
    "builtin.func(canonicalize)",
-    "builtin.func(tcf-shape-refinement)",
 )


--- a/python/npcomp/compiler/numpy/backend/refjit.py
+++ b/python/npcomp/compiler/numpy/backend/refjit.py
@ -17,10 +17,8 @@ __all__ = [
 FRONTEND_PASSES = (
    "builtin.func(npcomp-cpa-type-inference)",
    "numpy-public-functions-to-tensor",
-    "builtin.func(convert-numpy-to-tcf)",
    "builtin.func(convert-scf-to-std)",
    "builtin.func(canonicalize)",
-    "builtin.func(tcf-shape-refinement)",
 )

 # Re-export.
--- a/python/npcomp/compiler/pytorch/backend/frontend_lowering.py
+++ b/python/npcomp/compiler/pytorch/backend/frontend_lowering.py
@ -30,11 +30,11 @@ def lower_module(imported_module: Module):
        # Frontend.
        pipeline_str = "torch-globalized-module-to-npcomp-backend-pipeline"
        if logging.debug_enabled():
-            logging.debug("Running Torch->TCP pipeline '{}'", pipeline_str)
+            logging.debug("Running Torch->backend pipeline '{}'", pipeline_str)
        pm = PassManager.parse(pipeline_str)
        pm.run(imported_module)
        if logging.debug_enabled():
-            logging.debug("TCP IR:\n{}", imported_module)
+            logging.debug("Backend IR:\n{}", imported_module)
    return imported_module

 def lower_object_graph(imported_module: Module):
--- a/python/npcomp/compiler/pytorch/backend/refjit.py
+++ b/python/npcomp/compiler/pytorch/backend/refjit.py
@ -45,7 +45,7 @@ class RefjitNpcompBackend(NpcompBackend):

  def compile(self, imported_module: Module):
    """Compiles an imported module, with a flat list of functions.
-    The module is expected to be in "TCP + scalar code" form.
+    The module is expected to be in linalg-on-tensors + scalar code form.
    TODO: More clearly define the backend contract. Generally this will
    extend to support globals, lists, and other stuff.

--- a/python/npcomp/dialects/TCFBind.td
+++ b/python/npcomp/dialects/TCFBind.td
@ -1,15 +0,0 @@
-//===-- TCFBind.td - TCF dialect bind ----------------------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NPCOMP_PYTHON_DIALECTS_TCF_BIND
-#define NPCOMP_PYTHON_DIALECTS_TCF_BIND
-
-include "mlir/Bindings/Python/Attributes.td"
-include "npcomp/Dialect/TCF/IR/TCFOps.td"
-
-#endif
--- a/python/npcomp/dialects/tcf.py
+++ b/python/npcomp/dialects/tcf.py
@ -1,5 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-from ._tcf_ops_gen import *
--- a/test/Conversion/NumpyToTCF/builtin_ufunc_call.mlir
+++ b/test/Conversion/NumpyToTCF/builtin_ufunc_call.mlir
@ -1,25 +0,0 @@
-// RUN: npcomp-opt <%s -convert-numpy-to-tcf | FileCheck %s --dump-input=fail
-
-
-// CHECK-LABEL: func @unknownBuiltinUfunc
-func @unknownBuiltinUfunc(%arg0: tensor<?xf32>, %arg1: tensor<?x?xf32>) -> tensor<*xf32> {
-  // CHECK: numpy.builtin_ufunc_call
-  // CHECK-NOT: tcf.add
-  %0 = numpy.builtin_ufunc_call<"NON_EXISTING"> (%arg0, %arg1) : (tensor<?xf32>, tensor<?x?xf32>) -> tensor<*xf32>
-  return %0 : tensor<*xf32>
-}
-
-// CHECK-LABEL: func @illagalTernary
-func @illagalTernary(%arg0: tensor<?xf32>, %arg1: tensor<?x?xf32>) -> tensor<*xf32> {
-  // CHECK: numpy.builtin_ufunc_call
-  // CHECK-NOT: tcf.add
-  %0 = numpy.builtin_ufunc_call<"numpy.add"> (%arg0, %arg1, %arg0) : (tensor<?xf32>, tensor<?x?xf32>, tensor<?xf32>) -> tensor<*xf32>
-  return %0 : tensor<*xf32>
-}
-
-// CHECK-LABEL: func @numpyAdd
-func @numpyAdd(%arg0: tensor<?xf32>, %arg1: tensor<?x?xf32>) -> tensor<*xf32> {
-  // CHECK: tcf.add %arg0, %arg1 : (tensor<?xf32>, tensor<?x?xf32>) -> tensor<*xf32>
-  %0 = numpy.builtin_ufunc_call<"numpy.add"> (%arg0, %arg1) : (tensor<?xf32>, tensor<?x?xf32>) -> tensor<*xf32>
-  return %0 : tensor<*xf32>
-}
--- a/test/Conversion/TCFToLinalg/basic.mlir
+++ b/test/Conversion/TCFToLinalg/basic.mlir
@ -1,72 +0,0 @@
-// RUN: npcomp-opt <%s -convert-tcf-to-linalg | FileCheck %s --dump-input=fail
-
-// CHECK-LABEL:   func @tcf_matmul(
-// CHECK-SAME:                     %[[LHS:.*]]: tensor<?x?xf32>,
-// CHECK-SAME:                     %[[RHS:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
-// CHECK:           %[[C0F32:.*]] = constant 0.000000e+00 : f32
-// CHECK:           %[[C0:.*]] = constant 0 : index
-// CHECK:           %[[C1:.*]] = constant 1 : index
-// CHECK:           %[[LHSK:.*]] = tensor.dim %[[LHS]], %[[C1]] : tensor<?x?xf32>
-// CHECK:           %[[RHSK:.*]] = tensor.dim %[[RHS]], %[[C0]] : tensor<?x?xf32>
-// CHECK:           %[[KEQUAL:.*]] = cmpi eq, %[[LHSK]], %[[RHSK]] : index
-// CHECK:           %[[WINESS:.*]] = shape.cstr_require %[[KEQUAL]], "mismatching contracting dimension for matmul"
-// CHECK:           %[[RET:.*]] = shape.assuming %[[WINESS]] -> (tensor<?x?xf32>) {
-// CHECK:             %[[LHSROWS:.*]] = tensor.dim %[[LHS]], %[[C0]] : tensor<?x?xf32>
-// CHECK:             %[[RHSCOLS:.*]] = tensor.dim %[[RHS]], %[[C1]] : tensor<?x?xf32>
-// CHECK:             %[[SHAPE:.*]] = tensor.from_elements %[[LHSROWS]], %[[RHSCOLS]] : tensor<2xindex>
-// CHECK:             %[[INIT_TENSOR:.*]] = tcp.splatted %[[C0F32]], %[[SHAPE]] : (f32, tensor<2xindex>) -> tensor<?x?xf32>
-// CHECK:             %[[MATMUL:.*]] = linalg.matmul ins(%[[LHS]], %[[RHS]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[INIT_TENSOR]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
-// CHECK:             shape.assuming_yield %[[MATMUL]] : tensor<?x?xf32>
-// CHECK:           }
-// CHECK:           return %[[RET:.*]] : tensor<?x?xf32>
-func @tcf_matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = tcf.matmul %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-
-// CHECK-LABEL:   func @tcf_conv_2d_nchw(
-// CHECK-SAME:                     %[[IN:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
-// CHECK-SAME:                     %[[FILTER:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
-// CHECK:           %[[C0F32:.*]] = constant 0.000000e+00 : f32
-// CHECK:           %[[C1:.*]] = constant 1 : index
-// CHECK:           %[[C0:.*]] = constant 0 : index
-// CHECK:           %[[C2:.*]] = constant 2 : index
-// CHECK:           %[[C3:.*]] = constant 3 : index
-// CHECK:           %[[CHANNELS:.*]] = tensor.dim %[[IN]], %[[C1]] : tensor<?x?x?x?xf32>
-// CHECK:           %[[HEIGHT:.*]] = tensor.dim %[[IN]], %[[C2]] : tensor<?x?x?x?xf32>
-// CHECK:           %[[WIDTH:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<?x?x?x?xf32>
-// CHECK:           %[[FILTERCHANNELS:.*]] = tensor.dim %[[FILTER]], %[[C1]] : tensor<?x?x?x?xf32>
-// CHECK:           %[[FILTERHEIGHT:.*]] = tensor.dim %[[FILTER]], %[[C2]] : tensor<?x?x?x?xf32>
-// CHECK:           %[[FILTERWIDTH:.*]] = tensor.dim %[[FILTER]], %[[C3]] : tensor<?x?x?x?xf32>
-// CHECK:           %[[CMPCHANNELS:.*]] = cmpi eq, %[[CHANNELS]], %[[FILTERCHANNELS]] : index
-// CHECK:           %[[CMPHEIGHT:.*]] = cmpi uge, %[[HEIGHT]], %[[FILTERHEIGHT]] : index
-// CHECK:           %[[CMPWIDTH:.*]] = cmpi uge, %[[WIDTH]], %[[FILTERWIDTH]] : index
-// CHECK:           %[[CSTRCHANNELS:.*]] = shape.cstr_require %[[CMPCHANNELS]], "input and filter in-channels must be equal"
-// CHECK:           %[[CSTRHEIGHT:.*]] = shape.cstr_require %[[CMPHEIGHT]], "input height must be greater than or equal to filter KH-dimension"
-// CHECK:           %[[CSTRWIDTH:.*]] = shape.cstr_require %[[CMPWIDTH]], "input width must be greater than or equal to filter KW-dimension"
-// CHECK:           %[[WITNESS:.*]] = shape.assuming_all %[[CSTRCHANNELS]], %[[CSTRHEIGHT]], %[[CSTRWIDTH]]
-// CHECK:           %[[RET:.*]] = shape.assuming %[[WITNESS]] -> (tensor<?x?x?x?xf32>) {
-// CHECK:             %[[BATCH:.*]] = tensor.dim %[[IN]], %[[C0]] : tensor<?x?x?x?xf32>
-// CHECK:             %[[HEIGHT:.*]] = tensor.dim %[[IN]], %[[C2]] : tensor<?x?x?x?xf32>
-// CHECK:             %[[WIDTH:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<?x?x?x?xf32>
-// CHECK:             %[[OUTCHANNELS:.*]] = tensor.dim %[[FILTER]], %[[C0]] : tensor<?x?x?x?xf32>
-// CHECK:             %[[FILTERHEIGHT:.*]] = tensor.dim %[[FILTER]], %[[C2]] : tensor<?x?x?x?xf32>
-// CHECK:             %[[FILTERWIDTH:.*]] = tensor.dim %[[FILTER]], %[[C3]] : tensor<?x?x?x?xf32>
-// CHECK:             %[[FILTERHEIGHTM1:.*]] = subi %[[FILTERHEIGHT]], %[[C1]] : index
-// CHECK:             %[[HEIGHTV0:.*]] = subi %[[HEIGHT]], %[[FILTERHEIGHTM1]] : index
-// CHECK:             %[[HEIGHTV0M1:.*]] = subi %[[HEIGHTV0]], %[[C1]] : index
-// CHECK:             %[[OUTHEIGHT:.*]] = addi %[[HEIGHTV0M1]], %[[C1]] : index
-// CHECK:             %[[FILTERWIDTHM1:.*]] = subi %[[FILTERWIDTH]], %[[C1]] : index
-// CHECK:             %[[WIDTHV0:.*]] = subi %[[WIDTH]], %[[FILTERWIDTHM1]] : index
-// CHECK:             %[[WIDTHV0M1:.*]] = subi %[[WIDTHV0]], %[[C1]] : index
-// CHECK:             %[[OUTWIDTH:.*]] = addi %[[WIDTHV0M1]], %[[C1]] : index
-// CHECK:             %[[SHAPE:.*]] = tensor.from_elements %[[BATCH]], %[[OUTCHANNELS]], %[[OUTHEIGHT]], %[[OUTWIDTH]] : tensor<4xindex>
-// CHECK:             %[[INIT_TENSOR:.*]] = tcp.splatted %[[C0F32]], %[[SHAPE]] : (f32, tensor<4xindex>) -> tensor<?x?x?x?xf32>
-// CHECK:             %[[CONVNCHW:.*]] = linalg.conv_2d_nchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%[[IN]], %[[FILTER]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) outs(%[[INIT_TENSOR]] : tensor<?x?x?x?xf32>)  -> tensor<?x?x?x?xf32>
-// CHECK:             shape.assuming_yield %[[CONVNCHW]] : tensor<?x?x?x?xf32>
-// CHECK:           }
-// CHECK:           return %[[RET:.*]] : tensor<?x?x?x?xf32>
-func @tcf_conv_2d_nchw(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
-  %0 = tcf.conv_2d_nchw %arg0, %arg1 : (tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
-  return %0 : tensor<?x?x?x?xf32>
-}
--- a/test/Conversion/TCFToStd/basic.mlir
+++ b/test/Conversion/TCFToStd/basic.mlir
@ -1,31 +0,0 @@
-// RUN: npcomp-opt <%s -convert-tcf-to-std | FileCheck %s
-
-// CHECK-LABEL:   func @unary_ops(
-// CHECK-SAME:                    %[[ARG:.*]]: tensor<?xf32>) -> tensor<?xf32> {
-// CHECK:           %[[RET:.*]] = math.exp %[[ARG]] : tensor<?xf32>
-// CHECK:           return %[[RET]] : tensor<?xf32>
-// CHECK:         }
-func @unary_ops(%arg0: tensor<?xf32>) -> tensor<?xf32> {
-  %0 = tcf.exp %arg0 : tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
-
-// CHECK-LABEL:   func @tcf_add(
-// CHECK-SAME:            %[[LHS:.*]]: tensor<?xf32>,
-// CHECK-SAME:            %[[RHS:.*]]: tensor<?xf32>) -> tensor<?xf32> {
-// CHECK:           %[[LHSSHAPE:.*]] = shape.shape_of %[[LHS]]
-// CHECK:           %[[RHSSHAPE:.*]] = shape.shape_of %[[RHS]]
-// CHECK:           %[[WITNESS:.*]] = shape.cstr_broadcastable %[[LHSSHAPE]], %[[RHSSHAPE]]
-// CHECK:           %[[RET:.*]] = shape.assuming %[[WITNESS]] -> (tensor<?xf32>) {
-// CHECK:             %[[RESULTSHAPE:.*]] = shape.broadcast %[[LHSSHAPE]], %[[RHSSHAPE]]
-// CHECK:             %[[LHSBCAST:.*]] = tcp.broadcast_to %[[LHS]], %[[RESULTSHAPE]]
-// CHECK:             %[[RHSBCAST:.*]] = tcp.broadcast_to %[[RHS]], %[[RESULTSHAPE]]
-// CHECK:             %[[ADD:.*]] = addf %[[LHSBCAST]], %[[RHSBCAST]]
-// CHECK:             shape.assuming_yield %[[ADD]] : tensor<?xf32>
-// CHECK:           }
-// CHECK:           return %[[RET:.*]] : tensor<?xf32>
-// CHECK:         }
-func @tcf_add(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
-  %0 = tcf.add %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
--- a/test/Conversion/TCFToTCP/basic.mlir
+++ b/test/Conversion/TCFToTCP/basic.mlir
@ -1,10 +0,0 @@
-// RUN: npcomp-opt <%s -convert-tcf-to-tcp | FileCheck %s
-
-// NOTE: We are keeping this pass around, even though it currently does
-// nothing, in order to avoid having to reintroduce the same
-// boilerplate.
-
-// CHECK: @f
-func @f() {
-  return
-}
--- a/test/Dialect/TCF/ops.mlir
+++ b/test/Dialect/TCF/ops.mlir
@ -1,26 +0,0 @@
-// RUN: npcomp-opt <%s | npcomp-opt | FileCheck %s --dump-input=fail
-
-// CHECK-LABEL: func @binary_elementwise
-func @binary_elementwise(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) {
-  // CHECK: tcf.add %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  // CHECK: tcf.max %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  // CHECK: tcf.exp %arg0 : tensor<?xf32>
-  %0 = tcf.add %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  %1 = tcf.max %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  %2 = tcf.exp %arg0 : tensor<?xf32>
-  return
-}
-
-// CHECK-LABEL: func @matmul
-func @matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  // CHECK: tcf.matmul %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  %0 = tcf.matmul %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-
-// CHECK-LABEL: func @conv_2d_nchw
-func @conv_2d_nchw(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
-  // CHECK: tcf.conv_2d_nchw %arg0, %arg1 : (tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
-  %0 = tcf.conv_2d_nchw %arg0, %arg1 : (tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
-  return %0 : tensor<?x?x?x?xf32>
-}
--- a/test/Dialect/TCP/bufferize.mlir
+++ b/test/Dialect/TCP/bufferize.mlir
@ -1,59 +0,0 @@
-// RUN: npcomp-opt -tcp-bufferize <%s | FileCheck %s
-
-// CHECK-LABEL:   func @tcp_broadcast_to(
-// CHECK-SAME:                           %[[TENSOR:.*]]: tensor<?xf32>,
-// CHECK-SAME:                           %[[SHAPE:.*]]: tensor<?xindex>) -> tensor<?x?xf32> {
-// CHECK:           refback.alloc_memref %[[SHAPE]] : memref<?x?xf32>
-// Check for two nested loops, but don't look at more detail for now.
-// TODO: This pass should not create loops. Instead it should create a
-// buffer version of tcp.broadcast_to
-// CHECK:           scf.for
-// CHECK:             scf.for
-func @tcp_broadcast_to(%arg0: tensor<?xf32>, %arg1: tensor<?xindex>) -> tensor<?x?xf32> {
-  %0 = tcp.broadcast_to %arg0, %arg1 : (tensor<?xf32>, tensor<?xindex>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-
-// CHECK-LABEL:   func @tcp_splatted(
-// CHECK-SAME:                       %[[SPLAT_VAL:.*]]: f32,
-// CHECK-SAME:                       %[[SHAPE:.*]]: tensor<?xindex>) -> tensor<?x?xf32> {
-// CHECK:           %[[RESULT:.*]] = refback.alloc_memref %[[SHAPE]] : memref<?x?xf32>
-// CHECK:           linalg.fill(%[[SPLAT_VAL]], %[[RESULT]]) : f32, memref<?x?xf32>
-// CHECK:           %[[RESULT_TENSOR:.*]] = memref.tensor_load %[[RESULT]] : memref<?x?xf32>
-// CHECK:           return %[[RESULT_TENSOR]] : tensor<?x?xf32>
-func @tcp_splatted(%arg0: f32, %arg1: tensor<?xindex>) -> tensor<?x?xf32> {
-  %0 = tcp.splatted %arg0, %arg1 : (f32, tensor<?xindex>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-
-// CHECK-LABEL:   func @tcp_pad(
-// CHECK-SAME:                  %[[TENSOR:[a-zA-Z0-9]+]]: tensor<?xf32>,
-// CHECK-SAME:                  %[[LOWER_EXPANSION:[a-zA-Z0-9]+]]: tensor<?xindex>,
-// CHECK-SAME:                  %[[UPPER_EXPANSION:[a-zA-Z0-9]+]]: tensor<?xindex>,
-// CHECK-SAME:                  %[[FILL_VAL:[a-zA-Z0-9]+]]: f32) -> tensor<?xf32> {
-// CHECK:           %[[TENSOR_MREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<?xf32>
-// CHECK:           %[[LOWER_EXPANSION_MREF:.*]] = memref.buffer_cast %[[LOWER_EXPANSION]] : memref<?xindex>
-// CHECK:           %[[UPPER_EXPANSION_MREF:.*]] = memref.buffer_cast %[[UPPER_EXPANSION]] : memref<?xindex>
-// CHECK:           %[[C0:.*]] = constant 0 : index
-// CHECK:           %[[LOWER_EXTENT_D1:.*]] = tensor.extract %[[LOWER_EXPANSION]][%[[C0]]] : tensor<?xindex>
-// CHECK:           %[[UPPER_EXTENT_D1:.*]] = tensor.extract %[[UPPER_EXPANSION]][%[[C0]]] : tensor<?xindex>
-// CHECK:           %[[C0_0:.*]] = constant 0 : index
-// CHECK:           %[[D1:.*]] = tensor.dim %[[TENSOR]], %[[C0_0]] : tensor<?xf32>
-// CHECK:           %[[D1_EXPANSION:.*]] = addi %[[LOWER_EXTENT_D1]], %[[UPPER_EXTENT_D1]] : index
-// CHECK:           %[[D1_OUT:.*]] = addi %[[D1_EXPANSION]], %[[D1]] : index
-// CHECK:           %[[D1_OUT_TENSOR:.*]] = tensor.from_elements %[[D1_OUT]] : tensor<1xindex>
-// CHECK:           %[[D1_OUT_MREF:.*]] = refback.alloc_memref %[[D1_OUT_TENSOR]] : memref<?xf32>
-// CHECK:           %[[C1:.*]] = constant 1 : index
-// CHECK:           %[[C0_1:.*]] = constant 0 : index
-// CHECK:           %[[LOWER_EXTENT_D1_1:.*]] = tensor.extract %[[LOWER_EXPANSION]][%[[C0_1]]] : tensor<?xindex>
-// CHECK:           %[[C0_2:.*]] = constant 0 : index
-// CHECK:           %[[D1_1:.*]] = tensor.dim %[[TENSOR]], %[[C0_2]] : tensor<?xf32>
-// CHECK:           linalg.fill(%[[FILL_VAL]], %[[D1_OUT_MREF]]) : f32, memref<?xf32>
-// CHECK:           %[[SUBVIEW:.*]] = memref.subview %[[D1_OUT_MREF]][%[[LOWER_EXTENT_D1_1]]] [%[[D1_1]]] [%[[C1]]] : memref<?xf32> to memref<?xf32, #map>
-// CHECK:           linalg.copy(%0, %[[SUBVIEW]]) : memref<?xf32>, memref<?xf32, #map>
-// CHECK:           %[[RESULT_TENSOR:.*]] = memref.tensor_load %[[D1_OUT_MREF]] : memref<?xf32>
-// CHECK:           return %[[RESULT_TENSOR]] : tensor<?xf32>
-func @tcp_pad(%arg0: tensor<?xf32>, %arg1: tensor<?xindex>, %arg2: tensor<?xindex>, %arg3: f32) -> tensor<?xf32> {
-  %0 = tcp.pad %arg0, %arg1, %arg2, %arg3 : (tensor<?xf32>, tensor<?xindex>, tensor<?xindex>, f32) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
--- a/test/Dialect/TCP/ops.mlir
+++ b/test/Dialect/TCP/ops.mlir
@ -1,22 +0,0 @@
-// RUN: npcomp-opt <%s | npcomp-opt | FileCheck %s
-
-// CHECK-LABEL: @broadcast_to
-func @broadcast_to(%arg0: tensor<?xf32>, %arg1: tensor<?xindex>) -> tensor<?x?xf32> {
-  // CHECK: tcp.broadcast_to
-  %0 = tcp.broadcast_to %arg0, %arg1 : (tensor<?xf32>, tensor<?xindex>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-
-// CHECK-LABEL: @splatted
-func @splatted(%arg0: f32, %arg1: tensor<?xindex>) -> tensor<?x?xf32> {
-  // CHECK: tcp.splatted
-  %0 = tcp.splatted %arg0, %arg1 : (f32, tensor<?xindex>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-
-// CHECK-LABEL: @pad
-func @pad(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor<?xindex>, %arg2: tensor<?xindex>, %arg3: f32) -> tensor<?x?x?x?xf32> {
-  // CHECK: tcp.pad
-  %0 = tcp.pad %arg0, %arg1, %arg2, %arg3 : (tensor<?x?x?x?xf32>, tensor<?xindex>, tensor<?xindex>, f32) -> tensor<?x?x?x?xf32>
-  return %0 : tensor<?x?x?x?xf32>
-}
--- a/test/Python/Backend/RefJIT/simple_invoke_numpy.py
+++ b/test/Python/Backend/RefJIT/simple_invoke_numpy.py
@ -1,5 +1,8 @@
 # RUN: %PYTHON %s | FileCheck %s --dump-input=fail

+# TODO: Rebase this path on linalg-on-tensors or Torch dialect.
+# XFAIL: *
+
 import numpy as np

 from npcomp.compiler.numpy.backend import refjit
--- a/test/RefBackend/e2e-basic.mlir
+++ b/test/RefBackend/e2e-basic.mlir
@ -1,11 +0,0 @@
-// RUN: npcomp-opt <%s -pass-pipeline=tcf-refback-lowering-pipeline | FileCheck %s --dump-input=fail
-// RUN: npcomp-opt <%s -pass-pipeline=tcf-refback-lowering-pipeline{optimize} | FileCheck %s --dump-input=fail
-
-// This is the simplest case, which is easy to stare at for debugging
-// purposes.
-
-// CHECK-LABEL: func @rank1
-func @rank1(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
-  %0 = tcf.add %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
--- a/test/RefBackend/e2e-constants.mlir
+++ b/test/RefBackend/e2e-constants.mlir
@ -1,12 +0,0 @@
-// RUN: npcomp-opt <%s -pass-pipeline=tcf-refback-lowering-pipeline | FileCheck %s --dump-input=fail
-// RUN: npcomp-opt <%s -pass-pipeline=tcf-refback-lowering-pipeline{optimize} | FileCheck %s --dump-input=fail
-
-// -----
-// CHECK-LABEL: func @global_add
-func @global_add() -> tensor<2xf32> {
-  %c34 = constant dense<[3.000000e+00, 4.000000e+00]> : tensor<2xf32>
-  %c12 = constant dense<[1.000000e+00, 2.000000e+00]> : tensor<2xf32>
-  %0 = tcf.add %c34, %c12 : (tensor<2xf32>, tensor<2xf32>) -> tensor<2xf32>
-  %1 = tcf.add %c12, %0 : (tensor<2xf32>, tensor<2xf32>) -> tensor<2xf32>
-  return %1 : tensor<2xf32>
-}
--- a/test/RefBackend/e2e-mixed-ranks.mlir
+++ b/test/RefBackend/e2e-mixed-ranks.mlir
@ -1,20 +0,0 @@
-// RUN: npcomp-opt <%s -pass-pipeline=tcf-refback-lowering-pipeline | FileCheck %s --dump-input=fail
-// RUN: npcomp-opt <%s -pass-pipeline=tcf-refback-lowering-pipeline{optimize} | FileCheck %s --dump-input=fail
-
-// CHECK-LABEL: func @rank1
-func @rank1(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
-  %0 = tcf.add %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
-
-// CHECK-LABEL: func @rank2
-func @rank2(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = tcf.add %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-
-// CHxCK-LABEL: func @rank1and2
-func @rank1and2(%arg0: tensor<?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = tcf.add %arg0, %arg1 : (tensor<?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
--- a/test/npcomp-run-mlir/basic.mlir
+++ b/test/npcomp-run-mlir/basic.mlir
@ -1,11 +1,27 @@
 // RUN: npcomp-run-mlir %s \
-// RUN:   -invoke basic \
-// RUN:   -arg-value="dense<[1.0]> : tensor<1xf32>" \
+// RUN:   -invoke forward \
+// RUN:   -arg-value="dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf32>" \
+// RUN:   -arg-value="dense<[10.0, 20.0]> : tensor<2xf32>" \
 // RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
 // RUN:   | FileCheck %s

-// CHECK: output #0: dense<2.000000e+00> : tensor<1xf32>
-func @basic(%arg0: tensor<?xf32>) -> tensor<?xf32> {
-  %0 =  tcf.add %arg0, %arg0 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
+// CHECK{LITERAL}: output #0: dense<[[1.100000e+01, 2.200000e+01], [1.300000e+01, 2.400000e+01]]> : tensor<2x2xf32>
+#map0 = affine_map<(d0, d1) -> (d0, d1)>
+#map1 = affine_map<(d0, d1) -> (d1)>
+
+builtin.func @forward(%arg0: tensor<?x?xf32>, %arg1: tensor<?xf32>) -> tensor<?x?xf32> {
+  %c1 = constant 1 : index
+  %c0 = constant 0 : index
+  %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
+  %2 = tensor.dim %arg1, %c0 : tensor<?xf32>
+  %3 = cmpi eq, %1, %2 : index
+  assert %3, "mismatched size for broadcast"
+  %4 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
+  %5 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?xf32>) outs(%4 : tensor<?x?xf32>) {
+  ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):  // no predecessors
+    %6 = addf %arg2, %arg3 : f32
+    linalg.yield %6 : f32
+  } -> tensor<?x?xf32>
+  return %5 : tensor<?x?xf32>
+}
--- a/test/npcomp-run-mlir/broadcast.mlir
+++ b/test/npcomp-run-mlir/broadcast.mlir
@ -1,17 +0,0 @@
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke broadcast \
-// RUN:   -arg-value="dense<[[1.0], [10.0]]> : tensor<2x1xf32>" \
-// RUN:   -arg-value="dense<[[3.0, 4.0]]> : tensor<1x2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s
-
-//   2x1     1x2       2x2
-//  [ 1] + [3, 4] == [ 4,  5]
-//  [10]          == [13, 14]
-
-// CHECK: output #0: dense<[
-// CHECK-SAME: [4.000000e+00, 5.000000e+00], [1.300000e+01, 1.400000e+01]]> : tensor<2x2xf32>
-func @broadcast(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = tcf.add %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
--- a/test/npcomp-run-mlir/constant-add-scalar.mlir
+++ b/test/npcomp-run-mlir/constant-add-scalar.mlir
@ -1,12 +0,0 @@
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke constant_add_scalar \
-// RUN:   -arg-value="dense<3.0> : tensor<f32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s
-
-// CHECK: output #0: dense<4.000000e+00> : tensor<f32>
-func @constant_add_scalar(%arg0: tensor<f32>) -> tensor<f32> {
-  %0 = constant dense<1.0> : tensor<f32>
-  %1 = tcf.add %arg0, %0 : (tensor<f32>, tensor<f32>) -> tensor<f32>
-  return %1 : tensor<f32>
-}
--- a/test/npcomp-run-mlir/constant-add.mlir
+++ b/test/npcomp-run-mlir/constant-add.mlir
@ -1,12 +0,0 @@
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke constant_add \
-// RUN:   -arg-value="dense<[3.0, 5.0]> : tensor<2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s
-
-// CHECK: output #0: dense<[4.000000e+00, 7.000000e+00]> : tensor<2xf32>
-func @constant_add(%arg0: tensor<2xf32>) -> tensor<2xf32> {
-  %0 = constant dense<[1.0, 2.0]> : tensor<2xf32>
-  %1 = tcf.add %arg0, %0 : (tensor<2xf32>, tensor<2xf32>) -> tensor<2xf32>
-  return %1 : tensor<2xf32>
-}
--- a/test/npcomp-run-mlir/control-flow-basic.mlir
+++ b/test/npcomp-run-mlir/control-flow-basic.mlir
@ -1,28 +0,0 @@
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke pow2 \
-// RUN:   -arg-value="dense<8.0> : tensor<f32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s
-
-// 2^8 == 256
-// CHECK: output #0: dense<2.560000e+02> : tensor<f32>
-func @pow2(%arg0: tensor<f32>) -> tensor<f32> {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-
-  // Slight awkwardness: convert the tensor<f32> to an index.
-  // TODO: Allow passing plain integers/floats (not tensors) at
-  // calling convention boundaries.
-
-  %num_iters_float = tensor.extract %arg0[] : tensor<f32>
-  %num_iters_i32 = fptosi %num_iters_float : f32 to i32
-  %num_iters = index_cast %num_iters_i32 : i32 to index
-
-  // Repeatedly add the value to itself %num_iters times.
-  %tensor_c1 = constant dense<1.0> : tensor<f32>
-  %ret = scf.for %iv = %c0 to %num_iters step %c1 iter_args(%iter = %tensor_c1) -> tensor<f32> {
-    %doubled = tcf.add %iter, %iter : (tensor<f32>, tensor<f32>) -> tensor<f32>
-    scf.yield %doubled : tensor<f32>
-  }
-  return %ret : tensor<f32>
-}
--- a/test/npcomp-run-mlir/conv_2d_nchw.mlir
+++ b/test/npcomp-run-mlir/conv_2d_nchw.mlir
@ -1,67 +0,0 @@
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke conv_2d_nchw \
-// RUN:   -arg-value="dense<0.0> : tensor<2x1x1x1xf32>" \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x1x1xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=BATCH
-
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke conv_2d_nchw \
-// RUN:   -arg-value="dense<0.0> : tensor<1x2x1x1xf32>" \
-// RUN:   -arg-value="dense<0.0> : tensor<2x2x1x1xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=SAME_CHANNELS
-
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke conv_2d_nchw \
-// RUN:   -arg-value="dense<0.0> : tensor<1x2x1x1xf32>" \
-// RUN:   -arg-value="dense<0.0> : tensor<1x2x1x1xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=DIFFERENT_CHANNELS
-
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke conv_2d_nchw \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x2x2xf32>" \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x1x1xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=TINY_SQUARE
-
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke conv_2d_nchw \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x32x32xf32>" \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x32x32xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=HUGE_SQUARE
-
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke conv_2d_nchw \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x2x2xf32>" \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x0x0xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=ZERO_KH_KW
-
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke conv_2d_nchw \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x0x0xf32>" \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x0x0xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=ZERO_H_W
-
-// BATCH: output #0: dense<0.000000e+00> : tensor<2x1x1x1xf32>
-
-// SAME_CHANNELS: output #0: dense<0.000000e+00> : tensor<1x2x1x1xf32>
-
-// DIFFERENT_CHANNELS: output #0: dense<0.000000e+00> : tensor<1x1x1x1xf32>
-
-// TINY_SQUARE: output #0: dense<0.000000e+00> : tensor<1x1x2x2xf32>
-
-// HUGE_SQUARE: output #0: dense<0.000000e+00> : tensor<1x1x1x1xf32>
-
-// ZERO_KH_KW: output #0: dense<0.000000e+00> : tensor<1x1x3x3xf32>
-
-// ZERO_H_W: output #0: dense<0.000000e+00> : tensor<1x1x1x1xf32>
-
-func @conv_2d_nchw(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
-  %0 = tcf.conv_2d_nchw %arg0, %arg1 : (tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
-  return %0 : tensor<?x?x?x?xf32>
-}
--- a/test/npcomp-run-mlir/elementwise.mlir
+++ b/test/npcomp-run-mlir/elementwise.mlir
@ -1,52 +0,0 @@
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke max \
-// RUN:   -arg-value="dense<[1.0]> : tensor<1xf32>" \
-// RUN:   -arg-value="dense<[3.0]> : tensor<1xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=MAX
-
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke mul \
-// RUN:   -arg-value="dense<[1.0, 2.0]> : tensor<2xf32>" \
-// RUN:   -arg-value="dense<[3.0, 4.0]> : tensor<2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=MUL
-
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke exp \
-// RUN:   -arg-value="dense<[0.0, 1.0]> : tensor<2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=EXP
-
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke tanh \
-// RUN:   -arg-value="dense<[0.0, 1.0]> : tensor<2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=TANH
-
-// These ops share a lot of code paths. So we don't test the exact
-// broadcasting behavior and error checking for all of them.
-
-// MAX: output #0: dense<3.000000e+00> : tensor<1xf32>
-func @max(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
-  %0 =  tcf.max %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
-
-// MUL: output #0: dense<[3.000000e+00, 8.000000e+00]> : tensor<2xf32>
-func @mul(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
-  %0 =  tcf.mul %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
-
-// EXP: output #0: dense<[1.000000e+00, 2.71828175]> : tensor<2xf32>
-func @exp(%arg0: tensor<?xf32>) -> tensor<?xf32> {
-  %0 = tcf.exp %arg0 : tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
-
-// TANH: output #0: dense<[0.000000e+00, 0.761594116]> : tensor<2xf32>
-func @tanh(%arg0: tensor<?xf32>) -> tensor<?xf32> {
-  %0 = tcf.tanh %arg0 : tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
--- a/test/npcomp-run-mlir/invalid-broadcast.mlir
+++ b/test/npcomp-run-mlir/invalid-broadcast.mlir
@ -1,12 +0,0 @@
-// RUN: not npcomp-run-mlir %s \
-// RUN:   -invoke invalid_broadcast \
-// RUN:   -arg-value="dense<[1.0, 2.0]> : tensor<2xf32>" \
-// RUN:   -arg-value="dense<[3.0, 4.0, 5.0]> : tensor<3xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s
-
-// CHECK: NPCOMP: aborting: required broadcastable shapes
-func @invalid_broadcast(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
-  %0 = tcf.add %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
--- a/test/npcomp-run-mlir/invalid-conv_2d_nchw.mlir
+++ b/test/npcomp-run-mlir/invalid-conv_2d_nchw.mlir
@ -1,28 +0,0 @@
-// RUN: not npcomp-run-mlir %s \
-// RUN:   -invoke conv_2d_nchw \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x2x2xf32>" \
-// RUN:   -arg-value="dense<0.0> : tensor<1x2x2x2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=CHANNELS
-
-// RUN: not npcomp-run-mlir %s \
-// RUN:   -invoke conv_2d_nchw \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x2x2xf32>" \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x3x2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=HEIGHT
-
-// RUN: not npcomp-run-mlir %s \
-// RUN:   -invoke conv_2d_nchw \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x2x2xf32>" \
-// RUN:   -arg-value="dense<0.0> : tensor<1x1x2x3xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=WIDTH
-
-// CHANNELS: NPCOMP: aborting: input and filter in-channels must be equal
-// HEIGHT: NPCOMP: aborting: input height must be greater than or equal to filter KH-dimension
-// WIDTH: NPCOMP: aborting: input width must be greater than or equal to filter KW-dimension
-func @conv_2d_nchw(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
-  %0 = tcf.conv_2d_nchw %arg0, %arg1 : (tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
-  return %0 : tensor<?x?x?x?xf32>
-}
--- a/test/npcomp-run-mlir/invalid-input-shapes.mlir
+++ b/test/npcomp-run-mlir/invalid-input-shapes.mlir
@ -1,26 +0,0 @@
-// RUN: not npcomp-run-mlir %s \
-// RUN:   -invoke invalid_input_shape \
-// RUN:   -arg-value="dense<1.0> : tensor<2x2x2x2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s -check-prefix=ARG0-INVALID
-
-// RUN: not npcomp-run-mlir %s \
-// RUN:   -invoke invalid_input_shape_arg1 \
-// RUN:   -arg-value="dense<1.0> : tensor<1x2x5xf32>" \
-// RUN:   -arg-value="dense<1.0> : tensor<1x2x10xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s -check-prefix=ARG1-INVALID
-
-// ARG0-INVALID: invoking 'invalid_input_shape': input shape mismatch (%arg0).
-// ARG0-INVALID-SAME: actual (provided by user): (2x2x2x2)
-// ARG0-INVALID-SAME: expected (from compiler): (1x2x3x4)
-func @invalid_input_shape(%arg0: tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32> {
-  return %arg0: tensor<1x2x3x4xf32>
-}
-
-// ARG1-INVALID: invoking 'invalid_input_shape_arg1': input shape mismatch (%arg1)
-// ARG1-INVALID-SAME: actual (provided by user): (1x2x10)
-// ARG1-INVALID-SAME: expected (from compiler): (1x4x?)
-func @invalid_input_shape_arg1(%arg0: tensor<1x2x?xf32>, %arg1: tensor<1x4x?xf32>) {
-  return 
-}
--- a/test/npcomp-run-mlir/invalid-input-types.mlir
+++ b/test/npcomp-run-mlir/invalid-input-types.mlir
@ -8,6 +8,5 @@
 // CHECK-SAME: actual (provided by user): Float
 // CHECK-SAME: expected (from compiler): kTensor
 func @expects_one_tensor(%arg0: tensor<?xf32>) -> tensor<?xf32> {
-  %0 = tcf.add %arg0, %arg0 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
+  return %arg0 : tensor<?xf32>
+}
--- a/test/npcomp-run-mlir/invalid-matmul.mlir
+++ b/test/npcomp-run-mlir/invalid-matmul.mlir
@ -1,17 +0,0 @@
-// RUN: not npcomp-run-mlir %s \
-// RUN:   -invoke matmul \
-// RUN:   -arg-value="dense<[[1.0, 0.0, 1.0], [1.0, 1.0, 1.0]]> : tensor<2x3xf32>" \
-// RUN:   -arg-value="dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s
-
-// Invalid: contracting dimensions don't match.
-// [1 0 1] * [1 2] = [6  8]
-// [1 1 1]   [3 4]   [9 12]
-
-// CHECK: NPCOMP: aborting: mismatching contracting dimension for matmul
-func @matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = tcf.matmul %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-
--- a/test/npcomp-run-mlir/invalid-num-inputs.mlir
+++ b/test/npcomp-run-mlir/invalid-num-inputs.mlir
@ -5,6 +5,5 @@

 // CHECK: invoking 'requires_one_input': expected 1 inputs
 func @requires_one_input(%arg0: tensor<?xf32>) -> tensor<?xf32> {
-  %0 = tcf.add %arg0, %arg0 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
+  return %arg0 : tensor<?xf32>
+}
--- a/test/npcomp-run-mlir/matmul.mlir
+++ b/test/npcomp-run-mlir/matmul.mlir
@ -1,32 +0,0 @@
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke matmul \
-// RUN:   -arg-value="dense<[[1.0, 0.0, 1.0], [1.0, 1.0, 1.0]]> : tensor<2x3xf32>" \
-// RUN:   -arg-value="dense<[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]> : tensor<3x2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s
-
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke matmul \
-// RUN:   -arg-value="dense<0.0> : tensor<2x3xf32>" \
-// RUN:   -arg-value="dense<0.0> : tensor<3x2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=ZEROS
-
-// Basic correctness check:
-// [1 0 1] * [1 2] = [6  8]
-// [1 1 1]   [3 4]   [9 12]
-//           [5 6]
-
-// CHECK: output #0: dense<[
-// CHECK-SAME:   [6.000000e+00, 8.000000e+00], [9.000000e+00, 1.200000e+01]
-// CHECK-SAME: ]> : tensor<2x2xf32>
-
-// Check with zeros as well. The result should be identically zeros.
-// If any uninitialized data sneaks in (even very small values that would be
-// rounding errors for the test case above), it will show up here.
-// ZEROS: output #0: dense<0.000000e+00> : tensor<2x2xf32>
-func @matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = tcf.matmul %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-
--- a/test/npcomp-run-mlir/mixed-rank.mlir
+++ b/test/npcomp-run-mlir/mixed-rank.mlir
@ -1,13 +0,0 @@
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke mixed_rank \
-// RUN:   -arg-value="dense<[1.0]> : tensor<1xf32>" \
-// RUN:   -arg-value="dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s
-
-// CHECK: output #0: dense<[
-// CHECK-SAME: [2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00]]> : tensor<2x2xf32>
-func @mixed_rank(%arg0: tensor<?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = tcf.add %arg0, %arg1 : (tensor<?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
--- a/test/npcomp-run-mlir/multi-output.mlir
+++ b/test/npcomp-run-mlir/multi-output.mlir
@ -6,7 +6,15 @@

 // CHECK: output #0: dense<2.000000e+00> : tensor<1xf32>
 // CHECK: output #1: dense<2.000000e+00> : tensor<1xf32>
+#map0 = affine_map<(d0) -> (d0)>
 func @multi_output(%arg0: tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>) {
-  %0 = tcf.add %arg0, %arg0 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  return %0, %0 : tensor<?xf32>, tensor<?xf32>
+  %c0 = constant 0 : index
+  %0 = tensor.dim %arg0, %c0 : tensor<?xf32>
+  %1 = linalg.init_tensor [%0] : tensor<?xf32>
+  %2 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"]} ins(%arg0, %arg0 : tensor<?xf32>, tensor<?xf32>) outs(%1 : tensor<?xf32>) {
+  ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):  // no predecessors
+    %6 = addf %arg2, %arg3 : f32
+    linalg.yield %6 : f32
+  } -> tensor<?xf32>
+  return %2, %2 : tensor<?xf32>, tensor<?xf32>
 }
--- a/test/npcomp-run-mlir/multiple-ops.mlir
+++ b/test/npcomp-run-mlir/multiple-ops.mlir
@ -1,15 +0,0 @@
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke multiple_ops \
-// RUN:   -arg-value="dense<1.0> : tensor<f32>" \
-// RUN:   -arg-value="dense<[1.0]> : tensor<1xf32>" \
-// RUN:   -arg-value="dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s
-
-// CHECK: output #0: dense<[
-// CHECK-SAME: [3.000000e+00, 4.000000e+00], [5.000000e+00, 6.000000e+00]]> : tensor<2x2xf32>
-func @multiple_ops(%arg0: tensor<f32>, %arg1: tensor<?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = tcf.add %arg1, %arg2 : (tensor<?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  %1 = tcf.add %arg0, %0  : (tensor<f32>, tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %1 : tensor<?x?xf32>
-}
--- a/test/npcomp-run-mlir/pad.mlir
+++ b/test/npcomp-run-mlir/pad.mlir
@ -1,18 +0,0 @@
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke pad \
-// RUN:   -arg-value="dense<[1.2, 3.4]> : tensor<2xf32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s
-
-// CHECK: output #0: dense<
-// CHECK-SAME:   [0.000000e+00, 1.200000e+00, 3.400000e+00, 0.000000e+00, 0.000000e+00]
-// CHECK-SAME: > : tensor<5xf32>
-
-func @pad(%arg0: tensor<?xf32> ) -> tensor<?xf32> {
-  %lowerExpansion = shape.const_shape [1] : tensor<?xindex>
-  %upperExpansion = shape.const_shape [2] : tensor<?xindex>
-  %fillVal = constant 0.0 : f32
-  %0 = tcp.pad %arg0, %lowerExpansion, %upperExpansion, %fillVal : (tensor<?xf32>, tensor<?xindex>, tensor<?xindex>, f32) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
-
--- a/test/npcomp-run-mlir/scalar.mlir
+++ b/test/npcomp-run-mlir/scalar.mlir
@ -1,22 +1,10 @@
-// RUN: npcomp-run-mlir %s \
-// RUN:   -invoke scalar \
-// RUN:   -arg-value="dense<1.0> : tensor<f32>" \
-// RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=SCALAR
-
 // RUN: npcomp-run-mlir %s \
 // RUN:   -invoke scalar_arg \
 // RUN:   -arg-value="2.5 : f32" \
 // RUN:   -shared-libs=%npcomp_runtime_shlib 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=SCALAR_ARG
+// RUN:   | FileCheck %s

-// SCALAR: output #0: dense<2.000000e+00> : tensor<f32>
-func @scalar(%arg0: tensor<f32>) -> tensor<f32> {
-  %0 = tcf.add %arg0, %arg0 : (tensor<f32>, tensor<f32>) -> tensor<f32>
-  return %0 : tensor<f32>
-}
-
-// SCALAR_ARG: output #0: 2.500000e+00 : f32
+// CHECK: output #0: 2.500000e+00 : f32
 func @scalar_arg(%arg0: f32) -> f32 {
  return %arg0 : f32
-}
+}