[Torch Dialect] Support Einsum Op (#2230)

As title, support torch.aten.einsum op Right now only support Static Shape, because of the known issue, the fixed solution is here: https://github.com/llvm/torch-mlir/pull/2154 Co-authored-by: Jiawei Wu [wujiawei.aml@bytedance.com](mailto:wujiawei.aml@bytedance.com)
2023-12-09 20:30:37 -08:00 · 2023-12-09 20:30:37 -08:00 · 96fcde4d77
parent 07c3e11f56
commit 96fcde4d77
8 changed files with 554 additions and 0 deletions
--- a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
+++ b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
@ -8447,6 +8447,31 @@ def Torch_AtenOneHotOp : Torch_Op<"aten.one_hot", [
  }];
 }
 def Torch_AtenEinsumOp : Torch_Op<"aten.einsum", [
    AllowsTypeRefinement,
    HasValueSemantics,
    ReadOnly
  ]> {
  let summary = "Generated op for `aten::einsum : (str, Tensor[], int[]?) -> (Tensor)`";
  let arguments = (ins
    Torch_StringType:$equation,
    AnyTorchListOfTensorType:$tensors,
    AnyTorchOptionalListOfTorchIntType:$path
  );
  let results = (outs
    AnyTorchTensorType:$result
  );
  let hasCustomAssemblyFormat = 1;
  let extraClassDefinition = [{
    ParseResult AtenEinsumOp::parse(OpAsmParser &parser, OperationState &result) {
      return parseDefaultTorchOp(parser, result, 3, 1);
    }
    void AtenEinsumOp::print(OpAsmPrinter &printer) {
      printDefaultTorchOp(printer, *this, 3, 1);
    }
  }];
 }
 def Torch_AtenBucketizeTensorOp : Torch_Op<"aten.bucketize.Tensor", [
    AllowsTypeRefinement,
    HasValueSemantics,
--- a/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp
+++ b/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp
@ -11321,6 +11321,33 @@ StringRef mlir::torch::Torch::getAbstractInterpLibrary() {
 "    %5 = call @__torch__.torch_mlir.jit_ir_importer.build_tools.library_generator.promote_dtypes(%0, %1) : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int\n"
 "    return %5 : !torch.int\n"
 "  }\n"
 "  func.func @\"__torch_mlir_dtype_fn.aten.einsum\"(%arg0: !torch.str, %arg1: !torch.list<tuple<int, int>>, %arg2: !torch.optional<list<int>>) -> !torch.int {\n"
 "    %true = torch.constant.bool true\n"
 "    %none = torch.constant.none\n"
 "    %str = torch.constant.str \"AssertionError: \"\n"
 "    %int0 = torch.constant.int 0\n"
 "    %0 = torch.prim.ListConstruct  : () -> !torch.list<optional<int>>\n"
 "    %1 = torch.prim.ListConstruct  : () -> !torch.list<int>\n"
 "    %2 = torch.aten.len.t %arg1 : !torch.list<tuple<int, int>> -> !torch.int\n"
 "    %3 = torch.aten.ne.int %2, %int0 : !torch.int, !torch.int -> !torch.bool\n"
 "    torch.prim.If %3 -> () {\n"
 "      torch.prim.If.yield\n"
 "    } else {\n"
 "      torch.prim.RaiseException %str, %none : !torch.str, !torch.none\n"
 "      torch.prim.If.yield\n"
 "    }\n"
 "    %4 = torch.aten.len.t %arg1 : !torch.list<tuple<int, int>> -> !torch.int\n"
 "    torch.prim.Loop %4, %true, init() {\n"
 "    ^bb0(%arg3: !torch.int):\n"
 "      %6 = torch.aten.__getitem__.t %arg1, %arg3 : !torch.list<tuple<int, int>>, !torch.int -> !torch.tuple<int, int>\n"
 "      %7:2 = torch.prim.TupleUnpack %6 : !torch.tuple<int, int> -> !torch.int, !torch.int\n"
 "      %8 = torch.aten.append.t %0, %7#0 : !torch.list<optional<int>>, !torch.int -> !torch.list<optional<int>>\n"
 "      %9 = torch.aten.append.t %1, %7#1 : !torch.list<int>, !torch.int -> !torch.list<int>\n"
 "      torch.prim.Loop.condition %true, iter()\n"
 "    } : (!torch.int, !torch.bool) -> ()\n"
 "    %5 = call @__torch__.torch_mlir.jit_ir_importer.build_tools.library_generator.promote_dtypes(%0, %1) : (!torch.list<optional<int>>, !torch.list<int>) -> !torch.int\n"
 "    return %5 : !torch.int\n"
 "  }\n"
 "  func.func @\"__torch_mlir_dtype_fn.aten._shape_as_tensor\"(%arg0: !torch.tuple<int, int>) -> !torch.int {\n"
 "    %int4 = torch.constant.int 4\n"
 "    return %int4 : !torch.int\n"
--- a/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp
+++ b/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp
@ -187,6 +187,358 @@ static SmallVector<int64_t> computeDimsOrderForMoveDim(int64_t srcDimInt,
  return dimsOrder;
 }
 static bool parseEquation(const std::string &equation,
                          SmallVector<SmallVector<char>> &inputTokens,
                          SmallVector<char> &resultTokens) {
  SmallVector<char> inputToken;
  size_t index = 0;
  enum EquationVariable { kIsInput, kIsResult };
  EquationVariable currentVariable = kIsInput;
  while (index < equation.size()) {
    if (std::isalpha(equation[index])) {
      if (currentVariable == kIsInput) {
        inputToken.push_back(equation[index]);
      } else {
        resultTokens.push_back(equation[index]);
      }
    } else if (equation[index] == ',') {
      inputTokens.push_back(inputToken);
      inputToken.clear();
    } else if ((index < (equation.size() - 1)) &&
               (equation.substr(index, 2).find("->") != std::string::npos)) {
      inputTokens.push_back(inputToken);
      inputToken.clear();
      currentVariable = kIsResult;
      index++;
    } else {
      return false;
    }
    index++;
  }
  return true;
 }
 // [*batchingDims, *lhsOtherDims, *lhsReduceDims, *lhsContractingDims] =>
 // [batchingDimsProd, lhsOtherDimsProd, lhsContractingDimsProd]
 static Value collapseDimForMatmul(PatternRewriter &rewriter, Location loc,
                                  Value input, int64_t batchDimsLength,
                                  int64_t contractingDimsLength,
                                  int64_t otherDimsLength,
                                  int64_t reduceDimsLength, bool isLhs) {
  auto inputType = input.getType().cast<BaseTensorType>();
  auto inputRank = batchDimsLength + contractingDimsLength + otherDimsLength +
                   reduceDimsLength;
  SmallVector<Value> inputShapeTensor;
  for (auto i = 0; i < inputRank; ++i) {
    inputShapeTensor.emplace_back(rewriter.create<AtenSizeIntOp>(
        loc, input,
        rewriter.create<Torch::ConstantIntOp>(loc,
                                              rewriter.getI64IntegerAttr(i))));
  }
  SmallVector<Value> outShapeTensor;
  Value constOne =
      rewriter.create<Torch::ConstantIntOp>(loc, rewriter.getI64IntegerAttr(1));
  auto dimOffset = 0;
  auto appendDims = [&](int64_t dimLength) {
    Value prod = constOne;
    for (auto i = 0; i < dimLength; ++i) {
      prod = rewriter.create<AtenMulIntOp>(loc, prod,
                                           inputShapeTensor[i + dimOffset]);
    }
    outShapeTensor.emplace_back(prod);
    dimOffset += dimLength;
  };
  appendDims(batchDimsLength);
  if (!isLhs)
    appendDims(contractingDimsLength);
  appendDims(otherDimsLength + reduceDimsLength);
  if (isLhs)
    appendDims(contractingDimsLength);
  auto outShapeValue = rewriter.create<Torch::PrimListConstructOp>(
      loc, Torch::ListType::get(Torch::IntType::get(input.getContext())),
      outShapeTensor);
  auto outType = inputType.getWithSizesAndDtype(std::nullopt,
                                                inputType.getOptionalDtype());
  return rewriter.create<Torch::AtenReshapeOp>(loc, outType, input,
                                               outShapeValue);
 }
 // classify every dim token into different categories. Note that although we
 // parse out reduce dims, we delay their execution until
 // `performLastPermuteAndReduce`.
 static void parseDimTokens(
    SmallVector<char> &lhsTokens, SmallVector<char> &rhsTokens,
    SmallVector<char> &finalResultTokens, SmallVector<char> &contractingDims,
    SmallVector<char> &lhsReduceDims, SmallVector<char> &rhsReduceDims,
    SmallVector<char> &batchingDims, SmallVector<char> &lhsOtherDims,
    SmallVector<char> &rhsOtherDims) {
  llvm::SmallDenseSet<char> lhsTokenSet(lhsTokens.begin(), lhsTokens.end());
  llvm::SmallDenseSet<char> rhsTokenSet(rhsTokens.begin(), rhsTokens.end());
  llvm::SmallDenseSet<char> finalResultTokenSet(finalResultTokens.begin(),
                                                finalResultTokens.end());
  for (size_t i = 0; i < lhsTokens.size(); ++i) {
    bool rhsContains = rhsTokenSet.contains(lhsTokens[i]);
    bool finalResultConatins = finalResultTokenSet.contains(lhsTokens[i]);
    // batching dim
    if (rhsContains && finalResultConatins) {
      batchingDims.push_back(lhsTokens[i]);
      // reduce dim of lhs
    } else if (!rhsContains && !finalResultConatins) {
      lhsReduceDims.push_back(lhsTokens[i]);
      // other dim of lhs
    } else if (finalResultConatins) {
      lhsOtherDims.push_back(lhsTokens[i]);
      // contracting dim of lhs
    } else if (rhsContains) {
      contractingDims.push_back(lhsTokens[i]);
    }
  }
  for (size_t i = 0; i < rhsTokens.size(); ++i) {
    bool lhsContains = lhsTokenSet.contains(rhsTokens[i]);
    bool finalResultConatins = finalResultTokenSet.contains(rhsTokens[i]);
    // batching dim
    if (lhsContains && finalResultConatins) {
      // reduce dim of rhs
    } else if (!lhsContains && !finalResultConatins) {
      rhsReduceDims.push_back(rhsTokens[i]);
      // other dim of rhs
    } else if (finalResultConatins) {
      rhsOtherDims.push_back(rhsTokens[i]);
      // contracting dim of rhs
    } else if (lhsContains) {
    }
  }
 }
 static void generateIdealReusltDimTokens(SmallVector<char> &batchingDims,
                                         SmallVector<char> &lhsOtherDims,
                                         SmallVector<char> &rhsOtherDims,
                                         SmallVector<char> &lhsReduceDims,
                                         SmallVector<char> &rhsReduceDims,
                                         SmallVector<char> &resultTokens) {
  // generate ideal result dims, i.e.,
  // [*batchingDims, *lhsOtherDims, *lhsReduceDims, *rhsOtherDims,
  // *rhsReduceDims]
  resultTokens.insert(resultTokens.end(), batchingDims.begin(),
                      batchingDims.end());
  resultTokens.insert(resultTokens.end(), lhsOtherDims.begin(),
                      lhsOtherDims.end());
  resultTokens.insert(resultTokens.end(), lhsReduceDims.begin(),
                      lhsReduceDims.end());
  resultTokens.insert(resultTokens.end(), rhsOtherDims.begin(),
                      rhsOtherDims.end());
  resultTokens.insert(resultTokens.end(), rhsReduceDims.begin(),
                      rhsReduceDims.end());
 }
 static Value permuteTensorForMatmul(PatternRewriter &rewriter, Location loc,
                                    Value input, SmallVector<char> &dimTokens,
                                    SmallVector<char> &batchingDims,
                                    SmallVector<char> &contractingDims,
                                    SmallVector<char> &otherDims,
                                    SmallVector<char> &reduceDims, bool isLhs) {
  auto inputType = input.getType().cast<BaseTensorType>();
  llvm::SmallDenseMap<char, int64_t> dimTokenMap;
  for (size_t idx = 0; idx < dimTokens.size(); ++idx) {
    dimTokenMap[dimTokens[idx]] = idx;
  }
  SmallVector<Value> permuteVec;
  auto appendDims = [&](SmallVector<char> dimTokens) {
    for (auto d : dimTokens) {
      permuteVec.push_back(rewriter.create<Torch::ConstantIntOp>(
          loc, rewriter.getI64IntegerAttr(dimTokenMap[d])));
    }
  };
  appendDims(batchingDims);
  if (!isLhs)
    appendDims(contractingDims);
  appendDims(otherDims);
  appendDims(reduceDims);
  if (isLhs)
    appendDims(contractingDims);
  Value dstDims = rewriter.create<Torch::PrimListConstructOp>(
      loc, Torch::ListType::get(Torch::IntType::get(rewriter.getContext())),
      permuteVec);
  auto outType = inputType.getWithSizesAndDtype(std::nullopt,
                                                inputType.getOptionalDtype());
  return rewriter.create<Torch::AtenPermuteOp>(loc, outType, input, dstDims);
 }
 static LogicalResult performMatmul(PatternRewriter &rewriter, Location loc,
                                   Value lhs, SmallVector<char> &lhsTokens,
                                   Value rhs, SmallVector<char> &rhsTokens,
                                   Value &result,
                                   SmallVector<char> &resultTokens,
                                   SmallVector<char> &finalResultTokens) {
  auto lhsType = lhs.getType().cast<BaseTensorType>();
  auto rhsType = rhs.getType().cast<BaseTensorType>();
  Type outputDType = lhsType.hasDtype() ? lhsType.getOptionalDtype()
                                      : rhsType.getOptionalDtype();
  llvm::SmallDenseMap<char, Value> lhsDimShapeMap;
  for (size_t idx = 0; idx < lhsTokens.size(); ++idx) {
    char d = lhsTokens[idx];
    lhsDimShapeMap[d] = rewriter.create<AtenSizeIntOp>(
        loc, lhs,
        rewriter.create<Torch::ConstantIntOp>(loc,
                                              rewriter.getI64IntegerAttr(idx)));
  }
  llvm::SmallDenseMap<char, Value> rhsDimShapeMap;
  for (size_t idx = 0; idx < rhsTokens.size(); ++idx) {
    char d = rhsTokens[idx];
    rhsDimShapeMap[d] = rewriter.create<AtenSizeIntOp>(
        loc, rhs,
        rewriter.create<Torch::ConstantIntOp>(loc,
                                              rewriter.getI64IntegerAttr(idx)));
  }
  // parse batch, contracting, other, reduce dims of lhs and rhs
  SmallVector<char> contractingDims;
  SmallVector<char> lhsReduceDims;
  SmallVector<char> rhsReduceDims;
  SmallVector<char> lhsOtherDims;
  SmallVector<char> rhsOtherDims;
  SmallVector<char> batchingDims;
  parseDimTokens(lhsTokens, rhsTokens, finalResultTokens, contractingDims,
                 lhsReduceDims, rhsReduceDims, batchingDims, lhsOtherDims,
                 rhsOtherDims);
  llvm::SmallDenseMap<char, Value> outDimShapeMap;
  auto generateOutDimShapeMap = [&](SmallVector<char> &dims) {
    for (auto d : dims) {
      bool lhsContains = lhsDimShapeMap.count(d) > 0;
      bool rhsContains = rhsDimShapeMap.count(d) > 0;
      if (lhsContains && rhsContains) {
        outDimShapeMap[d] = rewriter.create<Torch::PrimMaxIntOp>(
            loc, lhsDimShapeMap[d], rhsDimShapeMap[d]);
      } else if (lhsContains) {
        outDimShapeMap[d] = lhsDimShapeMap[d];
      } else if (rhsContains) {
        outDimShapeMap[d] = rhsDimShapeMap[d];
      }
    }
  };
  generateOutDimShapeMap(contractingDims);
  generateOutDimShapeMap(batchingDims);
  generateOutDimShapeMap(lhsReduceDims);
  generateOutDimShapeMap(rhsReduceDims);
  generateOutDimShapeMap(lhsOtherDims);
  generateOutDimShapeMap(rhsOtherDims);
  if (contractingDims.size() == 0 && lhsOtherDims.size() == 0 &&
      rhsOtherDims.size() == 0) {
    return rewriter.notifyMatchFailure(
        loc, "Hadamard product is currently not supported");
  }
  // shape: [*batchingDims, *lhsOtherDims, *lhsReduceDims, *lhsContractingDims]
  lhs = permuteTensorForMatmul(rewriter, loc, lhs, lhsTokens, batchingDims,
                               contractingDims, lhsOtherDims, lhsReduceDims,
                               true);
  // shape: [*batchingDims, *rhsContractingDims, *rhsOtherDims, *rhsReduceDims]
  rhs = permuteTensorForMatmul(rewriter, loc, rhs, rhsTokens, batchingDims,
                               contractingDims, rhsOtherDims, rhsReduceDims,
                               false);
  // shape: [batchingDimsProd, lhsOtherDimsProd, lhsContractingDimsProd]
  lhs = collapseDimForMatmul(rewriter, loc, lhs, batchingDims.size(),
                             contractingDims.size(), lhsOtherDims.size(),
                             lhsReduceDims.size(), true);
  // shape: [batchingDimsProd, rhsContractingDimsProd, rhsOtherDimsProd]
  rhs = collapseDimForMatmul(rewriter, loc, rhs, batchingDims.size(),
                             contractingDims.size(), rhsOtherDims.size(),
                             rhsReduceDims.size(), false);
  // perform matmul
  auto outType = lhsType.getWithSizesAndDtype(std::nullopt, outputDType);
  result = rewriter.create<Torch::AtenMatmulOp>(loc, outType, lhs, rhs);
  // generate ideal result dims.
  generateIdealReusltDimTokens(batchingDims, lhsOtherDims, rhsOtherDims,
                               lhsReduceDims, rhsReduceDims, resultTokens);
  // reshape matmul result to ideal shape:
  // [batchingDimsProd, lhsOtherDimsProd, rhsOtherDimsProd] =>
  // [*batchingDims, *lhsOtherDims, *lhsReduceDims, *rhsOtherDims,
  // *rhsReduceDims]
  SmallVector<Value> outShapeTensors;
  for (char d : resultTokens) {
    outShapeTensors.emplace_back(outDimShapeMap[d]);
  }
  auto outResultShape = rewriter.create<Torch::PrimListConstructOp>(
      loc, Torch::ListType::get(Torch::IntType::get(lhs.getContext())),
      outShapeTensors);
  result = rewriter.create<Torch::AtenReshapeOp>(
      loc, lhsType.getWithSizesAndDtype(std::nullopt, outputDType), result,
      outResultShape);
  return success();
 }
 static Value performLastReduceAndPermute(PatternRewriter &rewriter,
                                         Location loc, Type outType,
                                         Value input,
                                         SmallVector<char> &inputTokens,
                                         SmallVector<char> &outTokens) {
  auto inputType = input.getType().cast<BaseTensorType>();
  llvm::SmallDenseSet<char> outTokenSet(outTokens.begin(), outTokens.end());
  SmallVector<int64_t> sumDims;
  llvm::SmallDenseMap<char, int64_t> inputDimToIdx;
  int64_t idx = 0;
  for (size_t i = 0; i < inputTokens.size(); ++i) {
    char d = inputTokens[i];
    if (!outTokenSet.contains(d)) {
      sumDims.emplace_back(i);
    } else {
      inputDimToIdx[d] = idx++;
    }
  }
  if (sumDims.size() > 0) {
    SmallVector<Value> sumDimsTensor;
    for (auto d : sumDims) {
      sumDimsTensor.emplace_back(rewriter.create<Torch::ConstantIntOp>(
          loc, rewriter.getI64IntegerAttr(d)));
    }
    auto sumDimsListValue = rewriter.create<Torch::PrimListConstructOp>(
        loc, Torch::ListType::get(Torch::IntType::get(rewriter.getContext())),
        sumDimsTensor);
    auto falseValue = rewriter.create<Torch::ConstantBoolOp>(
        loc, rewriter.getBoolAttr(false));
    auto noneValue = rewriter.create<Torch::ConstantNoneOp>(loc);
    input = rewriter.create<Torch::AtenSumDimIntListOp>(
        loc,
        inputType.getWithSizesAndDtype(std::nullopt,
                                       inputType.getOptionalDtype()),
        input, sumDimsListValue, falseValue, noneValue);
  }
  SmallVector<Value> permuteDimsTensor;
  for (auto d : outTokens) {
    permuteDimsTensor.emplace_back(rewriter.create<Torch::ConstantIntOp>(
        loc, rewriter.getI64IntegerAttr(inputDimToIdx[d])));
  }
  auto permuteDimsListValue = rewriter.create<Torch::PrimListConstructOp>(
      loc, Torch::ListType::get(Torch::IntType::get(input.getContext())),
      permuteDimsTensor);
  auto out = rewriter.create<Torch::AtenPermuteOp>(loc, outType, input,
                                                   permuteDimsListValue);
  return out;
 }
 namespace {
 /// We decompose aten.amax into a set of aten.max.dim op(s) depending on the
 /// number of dimensions across which the max needs to be computed.
@ -628,6 +980,78 @@ public:
 };
 } // namespace
 namespace {
 // Decompose AtenEinsumOp to AtenMatmulOp, and supports possible reduce
 // operation and permute operation. Currently, this pass doesn't support
 // Hadamard product. The basic idea is that:
 //  Step 1: split the string equation to input/result tokens and find
 //    batchingDims, contractingDims, otherDims and reduceDims.
 //  Step 2: permute and reshape input tensors suitable
 //    for matmul operations.
 //  Step 3: use AtenMatmulOp to get the result.
 //  Step 4: iteratively execute step 2 & 3 until we get the final result.
 //  Step 5: perform remaining permute and reduce operations.
 // notice: support static shape only
 class DecomposeAtenEinsumOp : public OpRewritePattern<AtenEinsumOp> {
 public:
  using OpRewritePattern::OpRewritePattern;
  LogicalResult matchAndRewrite(AtenEinsumOp op,
                                PatternRewriter &rewriter) const override {
    Location loc = op.getLoc();
    std::string equation;
    if (!matchPattern(op.getEquation(), m_TorchConstantStr(equation))) {
      return rewriter.notifyMatchFailure(op, "Unsupported value of equation");
    }
    SmallVector<char> resultTokens;
    SmallVector<SmallVector<char>> inputTokens;
    if (!parseEquation(equation, inputTokens, resultTokens)) {
      return rewriter.notifyMatchFailure(
          op, "Unexpected character in equations encountered");
    }
    SmallVector<Value> inputTensors;
    if (!getListConstructElements(op.getTensors(), inputTensors)) {
      return rewriter.notifyMatchFailure(
          op, "input should comes from a PrimListConstructOp");
    }
    auto allTensorHasSizes = [](Value tensor) {
      auto type = tensor.getType().dyn_cast<BaseTensorType>();
      if (!type || !type.hasSizes())
        return false;
      return true;
    };
    if (!llvm::all_of(inputTensors, allTensorHasSizes)) {
      return rewriter.notifyMatchFailure(op,
                                         "all input tensors should have sizes");
    }
    SmallVector<char> lhsTokens = inputTokens[0];
    Value lhs = inputTensors[0];
    Value result;
    for (size_t i = 1; i < inputTensors.size(); ++i) {
      auto rhs = inputTensors[i];
      auto rhsTokens = inputTokens[i];
      SmallVector<char> outTokens;
      if (failed(performMatmul(rewriter, loc, lhs, lhsTokens, rhs, rhsTokens,
                               result, outTokens, resultTokens))) {
        return failure();
      }
      lhs = result;
      lhsTokens = outTokens;
    }
    result = performLastReduceAndPermute(rewriter, loc, op.getType(), lhs,
                                         lhsTokens, resultTokens);
    rewriter.replaceOp(op, result);
    return success();
  }
 };
 } // namespace
 // Calculates the softmax function on the given `input` tensor. Softmax(x) =
 // exp(x)/sum(exp(x)).
 // To avoid overflow we use the following decomposition rule:
@ -5798,6 +6222,7 @@ public:
    addPatternIfTargetOpIsIllegal<DecomposeAtenRandLikeOp>(patterns);
    addPatternIfTargetOpIsIllegal<DecomposeAtenHardsigmoidOp>(patterns);
    addPatternIfTargetOpIsIllegal<DecomposeAtenRelu6Op>(patterns);
    addPatternIfTargetOpIsIllegal<DecomposeAtenEinsumOp>(patterns);
    addPatternIfTargetOpIsIllegal<DecomposeAtenHardswishOp>(patterns);
    addPatternIfTargetOpIsIllegal<DecomposeAtenSoftplusOp>(patterns);
    addPatternIfTargetOpIsIllegal<DecomposeAtenSiluOp>(patterns);
--- a/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp
+++ b/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp
@ -385,6 +385,7 @@ static void markDecomposedOpsAsIllegal(MLIRContext *context,
  target.addIllegalOp<AtenReshapeOp>();
  target.addIllegalOp<Aten_SoftmaxBackwardDataOp>();
  target.addIllegalOp<AtenTanhBackwardOp>();
  target.addIllegalOp<AtenEinsumOp>();
  target.addIllegalOp<AtenAddmmOp>();
  target.addIllegalOp<AtenMeanOp>();
  target.addIllegalOp<AtenMeanDimOp>();
--- a/projects/pt1/e2e_testing/xfail_sets.py
+++ b/projects/pt1/e2e_testing/xfail_sets.py
@ -554,6 +554,9 @@ STABLEHLO_PASS_SET = {
    "EmptyLikeModule_int",
    "ExpandAsIntModule_basic",
    "ExpandModule_basic",
    "EinsumStaticModule_basic",
    "EinsumStaticFourDimensionModule_basic",
    "EinsumStaticContractRhsModule_basic",
    "Fill_TensorFloat64WithFloat32_basic",
    "Fill_TensorFloat64WithFloat64_basic",
    "Fill_TensorFloat64WithInt64_basic",
@ -1020,6 +1023,9 @@ TOSA_PASS_SET = {
    "RsubFloatModule_basic",
    "RsubFloatModule_noalpha_basic",
    "RsubInt0d_NumToTensor_Module_basic",
    "EinsumStaticModule_basic",
    "EinsumStaticFourDimensionModule_basic",
    "EinsumStaticContractRhsModule_basic",
    "ElementwiseBitwiseAndModule_basic",
    "ElementwiseBitwiseAndStaticShapeModule_basic",
    "ElementwiseBitwiseNotInt32Module_basic",
--- a/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/abstract_interp_lib_gen.py
+++ b/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/abstract_interp_lib_gen.py
@ -3684,6 +3684,19 @@ def aten〇cat〡dtype(tensors_rank_dtype: List[Tuple[int, int]], dim: int = 0)
        dtypes.append(tensor_dtype)
    return promote_dtypes(ranks, dtypes)
@check_dtype_function(
    [Invocation("i,j->ij", [TensorOfShape(1, dtype=torch.float32), 
                            TensorOfShape(1, dtype=torch.int32)]),])
 def aten〇einsum〡dtype(equation: str, tensors_rank_dtype: List[Tuple[int, int]], path: Optional[List[int]] = None) -> int:
    ranks: List[Optional[int]] = []
    dtypes: List[int] = []
    assert len(tensors_rank_dtype) != 0
    for tensor_rank_dtype in tensors_rank_dtype:
        tensor_rank, tensor_dtype = tensor_rank_dtype
        ranks.append(tensor_rank)
        dtypes.append(tensor_dtype)
    return promote_dtypes(ranks, dtypes)
@check_dtype_function(_check_tensors_with_the_same_dtype(num_of_tensors=1))
 def aten〇_shape_as_tensor〡dtype(self_rank_dtype: Tuple[int, int]) -> int:
    return torch.int64
--- a/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py
+++ b/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py
@ -566,6 +566,7 @@ def emit_ops(emitter_td: TextEmitter, registry: Registry):
    emit("aten::argmax : (Tensor, int?, bool) -> (Tensor)")
    emit("aten::argmin : (Tensor, int?, bool) -> (Tensor)")
    emit("aten::one_hot : (Tensor, int) -> (Tensor)")
    emit("aten::einsum : (str, Tensor[], int[]?) -> (Tensor)")
    emit("aten::bucketize.Tensor : (Tensor, Tensor, bool, bool) -> (Tensor)")
    emit("aten::clone : (Tensor, int?) -> (Tensor)")
    emit("aten::lift_fresh_copy : (Tensor) -> (Tensor)")
--- a/projects/pt1/python/torch_mlir_e2e_test/test_suite/reshape_like.py
+++ b/projects/pt1/python/torch_mlir_e2e_test/test_suite/reshape_like.py
@ -1044,3 +1044,59 @@ class UnflattenIntNegativeOneSizeStaticModule(torch.nn.Module):
@register_test_case(module_factory=lambda: UnflattenIntNegativeOneSizeStaticModule())
 def UnflattenIntNegativeOneSizeStaticModule_basic(module, tu: TestUtils):
    module.forward(tu.rand(5, 12, 3))
 # ==============================================================================
 class EinsumStaticModule(torch.nn.Module):
    def __init__(self):
        super().__init__()
    @export
    @annotate_args([
        None,
        ([3, 2, 4], torch.float32, True),
        ([5, 4, 6], torch.float32, True),
        ([3, 7, 6], torch.float32, True),
    ])
    def forward(self, tensor1, tensor2, tensor3):
        return torch.ops.aten.einsum('bqe,ked,btd->bqtk', [tensor1, tensor2, tensor3])
@register_test_case(module_factory=lambda: EinsumStaticModule())
 def EinsumStaticModule_basic(module, tu: TestUtils):
    module.forward(tu.rand(3, 2, 4), tu.rand(5, 4, 6), tu.rand(3, 7, 6))
 class EinsumStaticFourDimensionModule(torch.nn.Module):
    def __init__(self):
        super().__init__()
    @export
    @annotate_args([
        None,
        ([3, 4, 5, 6], torch.float32, True),
        ([3, 7, 5, 6], torch.float32, True),
    ])
    def forward(self, tensor1, tensor2):
        return torch.ops.aten.einsum('blhd,bshd->blhs', [tensor1, tensor2])
@register_test_case(module_factory=lambda: EinsumStaticFourDimensionModule())
 def EinsumStaticFourDimensionModule_basic(module, tu: TestUtils):
    module.forward(tu.rand(3, 4, 5, 6), tu.rand(3, 7, 5, 6))
 class EinsumStaticContractRhsModule(torch.nn.Module):
    def __init__(self):
        super().__init__()
    @export
    @annotate_args([
        None,
        ([3, 4, 5], torch.float32, True),
        ([4, 5], torch.float32, True),
    ])
    def forward(self, tensor1, tensor2):
        return torch.ops.aten.einsum('abc,bc->a', [tensor1, tensor2])
@register_test_case(module_factory=lambda: EinsumStaticContractRhsModule())
 def EinsumStaticContractRhsModule_basic(module, tu: TestUtils):
    module.forward(tu.rand(3, 4, 5), tu.rand(4, 5))