torch-mlir/include/npcomp/Dialect/ATen/ATenOpStatisticsUtils.h

//===- ATenOpStatisticsUtils.h ----------------------------------*- C++ -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef NPCOMP_DIALECT_ATEN_OPSTATISTICSUTILS_H
#define NPCOMP_DIALECT_ATEN_OPSTATISTICSUTILS_H

#include "npcomp/Dialect/ATen/ATenDialect.h"

#include "mlir/IR/StandardTypes.h"
#include "mlir/IR/Types.h"
#include "llvm/Support/Debug.h"

#define DEBUG_TYPE "aten-op-stats"

/// This file generally contains utility methods that factor common code
/// out from operations that implement Statisticsopinterface.

namespace mlir {
namespace NPCOMP {
namespace aten {

// Return the op statistics for conv2d-like operations.
template <class T>
std::map<std::string, uint64_t> getConv2dStatistics(T *o, uint64_t groups) {

  std::map<std::string, uint64_t> toReturn;

  TensorType resultTy = o->getResult().getType().template cast<TensorType>();
  TensorType inputTy = o->input().getType().template cast<TensorType>();
  TensorType weightTy = o->weight().getType().template cast<TensorType>();
  TensorType biasTy = o->bias().getType().template cast<TensorType>();

  uint64_t ofm_volume = getTensorVolume(resultTy);
  uint64_t ofm_depth = resultTy.getShape()[1];

  uint64_t ifm_depth = inputTy.getShape()[1];
  uint64_t kernel_height = weightTy.getShape()[2];
  uint64_t kernel_width = weightTy.getShape()[3];

  // Number of forward MACs per pixel =
  //  kernel_width * kernel_height * ifm_depth / groups
  uint64_t MACs_per_OFM = (ifm_depth / groups) * kernel_height * kernel_width;
  uint64_t total_MACs = ofm_volume * MACs_per_OFM;

  uint64_t ifm_volume = getTensorVolume(inputTy);
  uint64_t weight_volume = getTensorVolume(weightTy);
  uint64_t bias_volume = getTensorVolume(biasTy);

  // Should be gated on whether there is bias at all
  toReturn["ops:+"] = ofm_volume;
  toReturn["ops:MAC"] = total_MACs;

  toReturn["operand:0:activation_in"] = ifm_volume;
  toReturn["result:0:activation_out"] = ofm_volume;
  toReturn["operand:1:parameters_in:weight"] = weight_volume;
  toReturn["operand:2:parameters_in:bias"] = bias_volume;

  toReturn["reads"] = weight_volume + bias_volume + ifm_volume;
  toReturn["writes"] = ofm_volume;

  return toReturn;
}

// Return the op statistics for conv2dBackward-like operations.
template <typename T>
std::map<std::string, uint64_t> getConv2dBackwardStatistics(T op,
                                                            uint64_t groups) {

  std::map<std::string, uint64_t> toReturn;
  TensorType dx_out_resultTy =
      op.getResult(0).getType().template cast<TensorType>();
  uint64_t dx_out_volume = getTensorVolume(dx_out_resultTy);

  TensorType weightTy = op.getOperand(2).getType().template cast<TensorType>();
  uint64_t weight_volume = getTensorVolume(weightTy);
  uint64_t loss_in_depth = weightTy.getShape()[0];
  uint64_t kernel_width = weightTy.getShape()[2];
  uint64_t kernel_height = weightTy.getShape()[3];

  uint64_t MACs_per_loss =
      (loss_in_depth / groups) * kernel_height * kernel_width;

  uint64_t total_MACs = dx_out_volume * MACs_per_loss;

  TensorType ifmTy = op.getOperand(1).getType().template cast<TensorType>();
  uint64_t ifm_volume = getTensorVolume(ifmTy);
  auto ifm_shape = ifmTy.getShape();

  uint64_t ifm_bwh = ifm_shape[0] * ifm_shape[2] *
                     ifm_shape[3]; // Batch * height * width: the depth is in
                                   // the weight shape already
  total_MACs += ifm_bwh * weight_volume;

  TensorType dx_inTy = op.getOperand(0).getType().template cast<TensorType>();
  uint64_t dx_in_volume = getTensorVolume(dx_inTy);
  toReturn["ops:+"] = dx_in_volume;

  // Reads: Conv_backward reads 3 tensors: the loss in, the activation in and
  // the transposed weights
  toReturn["reads"] = dx_in_volume + ifm_volume + weight_volume;

  // Writes: Conv_backward writes 3 tensors: the loss out, gradients for the
  // weights, and gradients for the biases
  TensorType biasTy = op.getResult(2).getType().template cast<TensorType>();
  uint64_t bias_volume = getTensorVolume(biasTy);
  toReturn["writes"] = dx_out_volume + weight_volume + bias_volume;

  toReturn["ops:MAC"] = total_MACs;
  toReturn["operand:0:activation_in"] = dx_in_volume;
  toReturn["operand:1:activation_in"] = ifm_volume;
  toReturn["operand:2:parameters_in:weight"] = weight_volume;

  toReturn["result:0:grad:dx"] = dx_out_volume;
  toReturn["result:1:grad:dw"] = weight_volume;
  toReturn["result:2:grad:db"] = bias_volume;

  return toReturn;
}

// Return a model of the number of bytes needed to represent the operand of
// the given convolution-like operation with the given index.  The shape is
// assumed to be in NCHW order with a simple tiled model of data reuse.  TODO:
// convert this to a target-specific interface.
template <class T>
uint64_t getConv2dOperandTransferVolume(T *o, unsigned int idx, bool read) {

  if (!read)
    return 0;

  double vol = getTensorVolume(o->getOperand(idx).getType());

  TensorType inputTy = o->input().getType().template cast<TensorType>();
  TensorType weightTy = o->weight().getType().template cast<TensorType>();
  TensorType resultTy = o->getResult().getType().template cast<TensorType>();

  float filter_width = weightTy.getShape()[2];
  float filter_height = weightTy.getShape()[3];

  float batch_sw = inputTy.getShape()[0];
  float ifm_depth_sw = inputTy.getShape()[1];
  float ih = inputTy.getShape()[2];
  float iw = inputTy.getShape()[3];

  float ofm_depth_sw = resultTy.getShape()[1];

  const float batch_hw = 4;
  const float ifm_depth_hw = 32;
  const float ofm_depth_hw = 32;

  const float ifm_tile_height = 4;
  const float ifm_tile_width = 4;
  const float ofm_tile_height = 4;
  const float ofm_tile_width = 4;

  float ifm_aperture = ifm_tile_height - ceilf(filter_height / 2.0f);
  float ifm_overlap = ceilf(filter_height / 2.0f);

  float bl = ceilf(batch_sw / batch_hw);
  float ol = ceilf(ofm_depth_sw / ofm_depth_hw);
  float il = ceilf(ifm_depth_sw / ifm_depth_hw);

  float ifm_overhead = 1.0f;
  float weight_overhead = 1.0f;
  if (filter_width > 1) {
    ifm_overhead =
        ol * ifm_tile_height * ((ih - ifm_overlap) / (ih * ifm_aperture));
    weight_overhead = bl;
  } else {
    ifm_overhead = ol;
  }

  if (idx == 0) {
    LLVM_DEBUG(llvm::outs() << "ifm_overhead:" << ifm_overhead << "\n");
    return vol * ifm_overhead;
  }
  if (idx == 1) {
    LLVM_DEBUG(llvm::outs() << "weight_overhead:" << weight_overhead << "\n");
    return vol * weight_overhead;
  }
  return vol;
}

// Return a model of the number of bytes needed to represent the result of
// the given convolution-like operation with the given index.  The shape is
// assumed to be in NCHW order with a simple tiled model of data reuse.  TODO:
// convert this to a target-specific interface.
template <class T>
uint64_t getConv2dResultTransferVolume(T *o, unsigned int idx, bool write) {

  TensorType inputTy = o->input().getType().template cast<TensorType>();
  TensorType resultTy = o->getResult().getType().template cast<TensorType>();
  TensorType weightTy = o->weight().getType().template cast<TensorType>();
  float filter_width = weightTy.getShape()[2];
  // float filter_height = weightTy.getShape()[3];

  float ifm_depth_sw = inputTy.getShape()[1];
  const float ifm_depth_hw = 32;

  float il = ceilf(ifm_depth_sw / ifm_depth_hw);

  float write_output_overhead = 1.0f;
  float read_output_cost = 1.0f;

  if (filter_width > 1) {
    write_output_overhead = il;
    read_output_cost = il;
  }

  double vol = getTensorVolume(resultTy);

  if (write) {
    LLVM_DEBUG(llvm::outs()
               << "write_output_overhead:" << write_output_overhead << "\n");
    return vol * write_output_overhead;
  } else {
    LLVM_DEBUG(llvm::outs() << "read_output_cost:" << read_output_cost << "\n");
    return vol * read_output_cost;
  }
}

// Return the op statistics for matrixmultiply-like operations.
template <typename T> std::map<std::string, uint64_t> getMMOpStatistics(T op) {

  std::map<std::string, uint64_t> toReturn;

  TensorType resultTy = op.getResult().getType().template cast<TensorType>();
  uint64_t ofm_volume = getTensorVolume(resultTy);

  // Use the weight tensor to find the number of input neurons
  TensorType lossTy = op.getOperand(0).getType().template cast<TensorType>();
  TensorType weightTy = op.getOperand(1).getType().template cast<TensorType>();
  uint64_t num_input_neurons = weightTy.getShape()[0];
  uint64_t total_MACs = ofm_volume * num_input_neurons;
  toReturn["ops:MAC"] = total_MACs;

  uint64_t loss_in_volume = getTensorVolume(lossTy);
  uint64_t weight_volume = getTensorVolume(weightTy);
  toReturn["reads"] = loss_in_volume + weight_volume;
  toReturn["writes"] = ofm_volume;

  toReturn["operand:0:activation_in"] = loss_in_volume;
  toReturn["operand:1:activation_in"] = weight_volume;
  toReturn["result:0:activation_out"] = ofm_volume;
  return toReturn;
}

// Return the op statistics for ReLU-like operations.
template <typename T>
std::map<std::string, uint64_t> getReLUOpStatistics(T op) {

  std::map<std::string, uint64_t> toReturn;

  TensorType inputTy = op.getOperand().getType().template cast<TensorType>();
  TensorType resultTy = op.getResult().getType().template cast<TensorType>();

  uint64_t in_volume = getTensorVolume(inputTy);
  uint64_t out_volume = getTensorVolume(resultTy);

  toReturn["operand:0:activation_in"] = in_volume;
  toReturn["result:0:activation_out"] = out_volume;
  toReturn["reads"] = in_volume;
  toReturn["writes"] = out_volume;
  toReturn["ops:>"] = out_volume;

  return toReturn;
}

} // namespace aten
} // namespace NPCOMP
} // namespace mlir

#endif
Add ATen Dialect (#16) This patch adds a dialect intended to be used as a frontend dialect to facilitate lowering from "A Tensor Library" in torch/pytorch. This patch includes several passes that are useful in conjuction with the dialect: --aten-layer-name: Generates layer names for each operation, which are not present in the original pytorch. --aten-to-std: Lower the ATen dialect into standard dialect function calls. --return-elimination-pass: convert functions (primarily the toplevel function) to pass return values by reference. This simplifies pytorch integration. --aten-op-report: generate a textual report about the model --liveness-report Future patches will implement actual integration with the pytorch jit to intercept and generates MLIR in this dialect, then lower the resulting MLIR into function calls through aten-layer-name -> aten-to-std -> return-elimination -> std-to-llvm. The result would then jitted using the LLVM jit, linked against a runtime library which makes calls back into pytorch to implement all the layers. Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> 2020-08-13 10:28:04 +08:00			`//===- ATenOpStatisticsUtils.h ----------------------------------- C++ --===//`
			`//`
			`// This file is licensed under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#ifndef NPCOMP_DIALECT_ATEN_OPSTATISTICSUTILS_H`
			`#define NPCOMP_DIALECT_ATEN_OPSTATISTICSUTILS_H`

			`#include "npcomp/Dialect/ATen/ATenDialect.h"`

			`#include "mlir/IR/StandardTypes.h"`
			`#include "mlir/IR/Types.h"`
Format sources. 2020-08-28 05:47:49 +08:00			`#include "llvm/Support/Debug.h"`
Add ATen Dialect (#16) This patch adds a dialect intended to be used as a frontend dialect to facilitate lowering from "A Tensor Library" in torch/pytorch. This patch includes several passes that are useful in conjuction with the dialect: --aten-layer-name: Generates layer names for each operation, which are not present in the original pytorch. --aten-to-std: Lower the ATen dialect into standard dialect function calls. --return-elimination-pass: convert functions (primarily the toplevel function) to pass return values by reference. This simplifies pytorch integration. --aten-op-report: generate a textual report about the model --liveness-report Future patches will implement actual integration with the pytorch jit to intercept and generates MLIR in this dialect, then lower the resulting MLIR into function calls through aten-layer-name -> aten-to-std -> return-elimination -> std-to-llvm. The result would then jitted using the LLVM jit, linked against a runtime library which makes calls back into pytorch to implement all the layers. Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> 2020-08-13 10:28:04 +08:00
			`#define DEBUG_TYPE "aten-op-stats"`

			`/// This file generally contains utility methods that factor common code`
			`/// out from operations that implement Statisticsopinterface.`

			`namespace mlir {`
			`namespace NPCOMP {`
			`namespace aten {`

			`// Return the op statistics for conv2d-like operations.`
Format sources. 2020-08-28 05:47:49 +08:00			`template <class T>`
			`std::map<std::string, uint64_t> getConv2dStatistics(T *o, uint64_t groups) {`
Add ATen Dialect (#16) This patch adds a dialect intended to be used as a frontend dialect to facilitate lowering from "A Tensor Library" in torch/pytorch. This patch includes several passes that are useful in conjuction with the dialect: --aten-layer-name: Generates layer names for each operation, which are not present in the original pytorch. --aten-to-std: Lower the ATen dialect into standard dialect function calls. --return-elimination-pass: convert functions (primarily the toplevel function) to pass return values by reference. This simplifies pytorch integration. --aten-op-report: generate a textual report about the model --liveness-report Future patches will implement actual integration with the pytorch jit to intercept and generates MLIR in this dialect, then lower the resulting MLIR into function calls through aten-layer-name -> aten-to-std -> return-elimination -> std-to-llvm. The result would then jitted using the LLVM jit, linked against a runtime library which makes calls back into pytorch to implement all the layers. Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> 2020-08-13 10:28:04 +08:00
			`std::map<std::string, uint64_t> toReturn;`

			`TensorType resultTy = o->getResult().getType().template cast<TensorType>();`
			`TensorType inputTy = o->input().getType().template cast<TensorType>();`
			`TensorType weightTy = o->weight().getType().template cast<TensorType>();`
			`TensorType biasTy = o->bias().getType().template cast<TensorType>();`

			`uint64_t ofm_volume = getTensorVolume(resultTy);`
			`uint64_t ofm_depth = resultTy.getShape()[1];`

			`uint64_t ifm_depth = inputTy.getShape()[1];`
			`uint64_t kernel_height = weightTy.getShape()[2];`
			`uint64_t kernel_width = weightTy.getShape()[3];`

			`// Number of forward MACs per pixel =`
			`// kernel_width * kernel_height * ifm_depth / groups`
			`uint64_t MACs_per_OFM = (ifm_depth / groups) * kernel_height * kernel_width;`
			`uint64_t total_MACs = ofm_volume * MACs_per_OFM;`

			`uint64_t ifm_volume = getTensorVolume(inputTy);`
			`uint64_t weight_volume = getTensorVolume(weightTy);`
			`uint64_t bias_volume = getTensorVolume(biasTy);`

			`// Should be gated on whether there is bias at all`
			`toReturn["ops:+"] = ofm_volume;`
			`toReturn["ops:MAC"] = total_MACs;`

			`toReturn["operand:0:activation_in"] = ifm_volume;`
			`toReturn["result:0:activation_out"] = ofm_volume;`
			`toReturn["operand:1:parameters_in:weight"] = weight_volume;`
			`toReturn["operand:2:parameters_in:bias"] = bias_volume;`

			`toReturn["reads"] = weight_volume + bias_volume + ifm_volume;`
			`toReturn["writes"] = ofm_volume;`

			`return toReturn;`
			`}`

			`// Return the op statistics for conv2dBackward-like operations.`
Format sources. 2020-08-28 05:47:49 +08:00			`template <typename T>`
			`std::map<std::string, uint64_t> getConv2dBackwardStatistics(T op,`
			`uint64_t groups) {`
Add ATen Dialect (#16) This patch adds a dialect intended to be used as a frontend dialect to facilitate lowering from "A Tensor Library" in torch/pytorch. This patch includes several passes that are useful in conjuction with the dialect: --aten-layer-name: Generates layer names for each operation, which are not present in the original pytorch. --aten-to-std: Lower the ATen dialect into standard dialect function calls. --return-elimination-pass: convert functions (primarily the toplevel function) to pass return values by reference. This simplifies pytorch integration. --aten-op-report: generate a textual report about the model --liveness-report Future patches will implement actual integration with the pytorch jit to intercept and generates MLIR in this dialect, then lower the resulting MLIR into function calls through aten-layer-name -> aten-to-std -> return-elimination -> std-to-llvm. The result would then jitted using the LLVM jit, linked against a runtime library which makes calls back into pytorch to implement all the layers. Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> 2020-08-13 10:28:04 +08:00
			`std::map<std::string, uint64_t> toReturn;`
Format sources. 2020-08-28 05:47:49 +08:00			`TensorType dx_out_resultTy =`
			`op.getResult(0).getType().template cast<TensorType>();`
Add ATen Dialect (#16) This patch adds a dialect intended to be used as a frontend dialect to facilitate lowering from "A Tensor Library" in torch/pytorch. This patch includes several passes that are useful in conjuction with the dialect: --aten-layer-name: Generates layer names for each operation, which are not present in the original pytorch. --aten-to-std: Lower the ATen dialect into standard dialect function calls. --return-elimination-pass: convert functions (primarily the toplevel function) to pass return values by reference. This simplifies pytorch integration. --aten-op-report: generate a textual report about the model --liveness-report Future patches will implement actual integration with the pytorch jit to intercept and generates MLIR in this dialect, then lower the resulting MLIR into function calls through aten-layer-name -> aten-to-std -> return-elimination -> std-to-llvm. The result would then jitted using the LLVM jit, linked against a runtime library which makes calls back into pytorch to implement all the layers. Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> 2020-08-13 10:28:04 +08:00			`uint64_t dx_out_volume = getTensorVolume(dx_out_resultTy);`

			`TensorType weightTy = op.getOperand(2).getType().template cast<TensorType>();`
			`uint64_t weight_volume = getTensorVolume(weightTy);`
			`uint64_t loss_in_depth = weightTy.getShape()[0];`
			`uint64_t kernel_width = weightTy.getShape()[2];`
			`uint64_t kernel_height = weightTy.getShape()[3];`

Format sources. 2020-08-28 05:47:49 +08:00			`uint64_t MACs_per_loss =`
Add ATen Dialect (#16) This patch adds a dialect intended to be used as a frontend dialect to facilitate lowering from "A Tensor Library" in torch/pytorch. This patch includes several passes that are useful in conjuction with the dialect: --aten-layer-name: Generates layer names for each operation, which are not present in the original pytorch. --aten-to-std: Lower the ATen dialect into standard dialect function calls. --return-elimination-pass: convert functions (primarily the toplevel function) to pass return values by reference. This simplifies pytorch integration. --aten-op-report: generate a textual report about the model --liveness-report Future patches will implement actual integration with the pytorch jit to intercept and generates MLIR in this dialect, then lower the resulting MLIR into function calls through aten-layer-name -> aten-to-std -> return-elimination -> std-to-llvm. The result would then jitted using the LLVM jit, linked against a runtime library which makes calls back into pytorch to implement all the layers. Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> 2020-08-13 10:28:04 +08:00			`(loss_in_depth / groups) * kernel_height * kernel_width;`

			`uint64_t total_MACs = dx_out_volume * MACs_per_loss;`

			`TensorType ifmTy = op.getOperand(1).getType().template cast<TensorType>();`
			`uint64_t ifm_volume = getTensorVolume(ifmTy);`
			`auto ifm_shape = ifmTy.getShape();`

			`uint64_t ifm_bwh = ifm_shape[0] * ifm_shape[2] *`
			`ifm_shape[3]; // Batch * height * width: the depth is in`
			`// the weight shape already`
			`total_MACs += ifm_bwh * weight_volume;`

			`TensorType dx_inTy = op.getOperand(0).getType().template cast<TensorType>();`
			`uint64_t dx_in_volume = getTensorVolume(dx_inTy);`
			`toReturn["ops:+"] = dx_in_volume;`

			`// Reads: Conv_backward reads 3 tensors: the loss in, the activation in and`
			`// the transposed weights`
			`toReturn["reads"] = dx_in_volume + ifm_volume + weight_volume;`

			`// Writes: Conv_backward writes 3 tensors: the loss out, gradients for the`
			`// weights, and gradients for the biases`
			`TensorType biasTy = op.getResult(2).getType().template cast<TensorType>();`
			`uint64_t bias_volume = getTensorVolume(biasTy);`
			`toReturn["writes"] = dx_out_volume + weight_volume + bias_volume;`

			`toReturn["ops:MAC"] = total_MACs;`
			`toReturn["operand:0:activation_in"] = dx_in_volume;`
			`toReturn["operand:1:activation_in"] = ifm_volume;`
			`toReturn["operand:2:parameters_in:weight"] = weight_volume;`

			`toReturn["result:0:grad:dx"] = dx_out_volume;`
			`toReturn["result:1:grad:dw"] = weight_volume;`
			`toReturn["result:2:grad:db"] = bias_volume;`

			`return toReturn;`
			`}`

			`// Return a model of the number of bytes needed to represent the operand of`
			`// the given convolution-like operation with the given index. The shape is`
			`// assumed to be in NCHW order with a simple tiled model of data reuse. TODO:`
			`// convert this to a target-specific interface.`
			`template <class T>`
			`uint64_t getConv2dOperandTransferVolume(T *o, unsigned int idx, bool read) {`

			`if (!read)`
			`return 0;`

			`double vol = getTensorVolume(o->getOperand(idx).getType());`

			`TensorType inputTy = o->input().getType().template cast<TensorType>();`
			`TensorType weightTy = o->weight().getType().template cast<TensorType>();`
			`TensorType resultTy = o->getResult().getType().template cast<TensorType>();`

			`float filter_width = weightTy.getShape()[2];`
			`float filter_height = weightTy.getShape()[3];`

			`float batch_sw = inputTy.getShape()[0];`
			`float ifm_depth_sw = inputTy.getShape()[1];`
			`float ih = inputTy.getShape()[2];`
			`float iw = inputTy.getShape()[3];`

			`float ofm_depth_sw = resultTy.getShape()[1];`

			`const float batch_hw = 4;`
			`const float ifm_depth_hw = 32;`
			`const float ofm_depth_hw = 32;`

			`const float ifm_tile_height = 4;`
			`const float ifm_tile_width = 4;`
			`const float ofm_tile_height = 4;`
			`const float ofm_tile_width = 4;`

			`float ifm_aperture = ifm_tile_height - ceilf(filter_height / 2.0f);`
			`float ifm_overlap = ceilf(filter_height / 2.0f);`

			`float bl = ceilf(batch_sw / batch_hw);`
			`float ol = ceilf(ofm_depth_sw / ofm_depth_hw);`
			`float il = ceilf(ifm_depth_sw / ifm_depth_hw);`

			`float ifm_overhead = 1.0f;`
			`float weight_overhead = 1.0f;`
			`if (filter_width > 1) {`
			`ifm_overhead =`
			`ol * ifm_tile_height * ((ih - ifm_overlap) / (ih * ifm_aperture));`
			`weight_overhead = bl;`
			`} else {`
			`ifm_overhead = ol;`
			`}`

			`if (idx == 0) {`
			`LLVM_DEBUG(llvm::outs() << "ifm_overhead:" << ifm_overhead << "\n");`
			`return vol * ifm_overhead;`
			`}`
			`if (idx == 1) {`
			`LLVM_DEBUG(llvm::outs() << "weight_overhead:" << weight_overhead << "\n");`
			`return vol * weight_overhead;`
			`}`
			`return vol;`
			`}`

			`// Return a model of the number of bytes needed to represent the result of`
			`// the given convolution-like operation with the given index. The shape is`
			`// assumed to be in NCHW order with a simple tiled model of data reuse. TODO:`
			`// convert this to a target-specific interface.`
			`template <class T>`
			`uint64_t getConv2dResultTransferVolume(T *o, unsigned int idx, bool write) {`

			`TensorType inputTy = o->input().getType().template cast<TensorType>();`
			`TensorType resultTy = o->getResult().getType().template cast<TensorType>();`
			`TensorType weightTy = o->weight().getType().template cast<TensorType>();`
			`float filter_width = weightTy.getShape()[2];`
			`// float filter_height = weightTy.getShape()[3];`

			`float ifm_depth_sw = inputTy.getShape()[1];`
			`const float ifm_depth_hw = 32;`

			`float il = ceilf(ifm_depth_sw / ifm_depth_hw);`

			`float write_output_overhead = 1.0f;`
			`float read_output_cost = 1.0f;`

			`if (filter_width > 1) {`
			`write_output_overhead = il;`
			`read_output_cost = il;`
			`}`

			`double vol = getTensorVolume(resultTy);`

			`if (write) {`
			`LLVM_DEBUG(llvm::outs()`
			`<< "write_output_overhead:" << write_output_overhead << "\n");`
			`return vol * write_output_overhead;`
			`} else {`
			`LLVM_DEBUG(llvm::outs() << "read_output_cost:" << read_output_cost << "\n");`
			`return vol * read_output_cost;`
			`}`
			`}`

			`// Return the op statistics for matrixmultiply-like operations.`
Format sources. 2020-08-28 05:47:49 +08:00			`template <typename T> std::map<std::string, uint64_t> getMMOpStatistics(T op) {`
Add ATen Dialect (#16) This patch adds a dialect intended to be used as a frontend dialect to facilitate lowering from "A Tensor Library" in torch/pytorch. This patch includes several passes that are useful in conjuction with the dialect: --aten-layer-name: Generates layer names for each operation, which are not present in the original pytorch. --aten-to-std: Lower the ATen dialect into standard dialect function calls. --return-elimination-pass: convert functions (primarily the toplevel function) to pass return values by reference. This simplifies pytorch integration. --aten-op-report: generate a textual report about the model --liveness-report Future patches will implement actual integration with the pytorch jit to intercept and generates MLIR in this dialect, then lower the resulting MLIR into function calls through aten-layer-name -> aten-to-std -> return-elimination -> std-to-llvm. The result would then jitted using the LLVM jit, linked against a runtime library which makes calls back into pytorch to implement all the layers. Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> Co-authored-by: Jeff Fifield <jeff.fifield@xilinx.com> 2020-08-13 10:28:04 +08:00
			`std::map<std::string, uint64_t> toReturn;`

			`TensorType resultTy = op.getResult().getType().template cast<TensorType>();`
			`uint64_t ofm_volume = getTensorVolume(resultTy);`

			`// Use the weight tensor to find the number of input neurons`
			`TensorType lossTy = op.getOperand(0).getType().template cast<TensorType>();`
			`TensorType weightTy = op.getOperand(1).getType().template cast<TensorType>();`
			`uint64_t num_input_neurons = weightTy.getShape()[0];`
			`uint64_t total_MACs = ofm_volume * num_input_neurons;`
			`toReturn["ops:MAC"] = total_MACs;`

			`uint64_t loss_in_volume = getTensorVolume(lossTy);`
			`uint64_t weight_volume = getTensorVolume(weightTy);`
			`toReturn["reads"] = loss_in_volume + weight_volume;`
			`toReturn["writes"] = ofm_volume;`

			`toReturn["operand:0:activation_in"] = loss_in_volume;`
			`toReturn["operand:1:activation_in"] = weight_volume;`
			`toReturn["result:0:activation_out"] = ofm_volume;`
			`return toReturn;`
			`}`

			`// Return the op statistics for ReLU-like operations.`
			`template <typename T>`
			`std::map<std::string, uint64_t> getReLUOpStatistics(T op) {`

			`std::map<std::string, uint64_t> toReturn;`

			`TensorType inputTy = op.getOperand().getType().template cast<TensorType>();`
			`TensorType resultTy = op.getResult().getType().template cast<TensorType>();`

			`uint64_t in_volume = getTensorVolume(inputTy);`
			`uint64_t out_volume = getTensorVolume(resultTy);`

			`toReturn["operand:0:activation_in"] = in_volume;`
			`toReturn["result:0:activation_out"] = out_volume;`
			`toReturn["reads"] = in_volume;`
			`toReturn["writes"] = out_volume;`
			`toReturn["ops:>"] = out_volume;`

			`return toReturn;`
			`}`

			`} // namespace aten`
			`} // namespace NPCOMP`
			`} // namespace mlir`

			`#endif`