2020-08-13 10:28:04 +08:00
|
|
|
//===- ATenDialectOpStats.cpp -----------------------------------*- C++ -*-===//
|
|
|
|
//
|
|
|
|
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2020-10-23 05:13:26 +08:00
|
|
|
#include "npcomp/Dialect/ATen/IR/ATenDialect.h"
|
|
|
|
#include "npcomp/Dialect/ATen/IR/ATenOpStatisticsUtils.h"
|
2020-08-13 10:28:04 +08:00
|
|
|
|
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
|
2020-12-12 06:43:38 +08:00
|
|
|
#include "mlir/IR/BuiltinTypes.h"
|
2020-08-13 10:28:04 +08:00
|
|
|
#include "mlir/IR/Types.h"
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "aten-op-stats"
|
|
|
|
|
|
|
|
// This file contains the StatisticsOpInterface implementations
|
|
|
|
// for ATDialect operations
|
|
|
|
|
|
|
|
using namespace mlir;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
std::vector<uint64_t> unpackListConstant(Value op) {
|
|
|
|
std::vector<uint64_t> v;
|
|
|
|
auto co = cast<mlir::NPCOMP::aten::ConstantOp>(op.getDefiningOp());
|
2020-12-15 06:30:51 +08:00
|
|
|
DenseElementsAttr a = co->template getAttrOfType<DenseElementsAttr>("value");
|
2020-08-13 10:28:04 +08:00
|
|
|
for (auto i : a.getIntValues())
|
|
|
|
v.push_back(i.getSExtValue());
|
|
|
|
return v;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
namespace mlir {
|
|
|
|
namespace NPCOMP {
|
|
|
|
namespace aten {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> AdaptiveAvgPool2dOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
// FIXME: unimplemented
|
|
|
|
toReturn["reads"] = -1;
|
|
|
|
toReturn["writes"] = -1;
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
std::map<std::string, uint64_t> AdaptiveAvgPool2dBackwardOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
// FIXME: unimplemented
|
|
|
|
toReturn["reads"] = -1;
|
|
|
|
toReturn["writes"] = -1;
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// add_
|
|
|
|
std::map<std::string, uint64_t> AddUnderOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
TensorType resultTy = getResult().getType().cast<TensorType>();
|
|
|
|
TensorType aType = getOperand(0).getType().cast<TensorType>();
|
|
|
|
Type bType = getOperand(1).getType();
|
|
|
|
|
|
|
|
uint64_t ofm_volume = getTensorVolume(resultTy);
|
|
|
|
|
|
|
|
toReturn["ops:+"] = ofm_volume;
|
|
|
|
toReturn["result:0:activation_out"] = ofm_volume;
|
|
|
|
|
|
|
|
// Find the size of the A and B operands
|
|
|
|
uint64_t a_volume = getTensorVolume(aType);
|
|
|
|
uint64_t b_volume = getTensorVolume(bType);
|
|
|
|
|
|
|
|
toReturn["operand:0:activation_in"] = a_volume;
|
|
|
|
toReturn["operand:1:activation_in"] = b_volume;
|
|
|
|
|
|
|
|
toReturn["reads"] = a_volume + b_volume;
|
|
|
|
toReturn["writes"] = ofm_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// addmm
|
|
|
|
std::map<std::string, uint64_t> AddmmOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
// For linear, we need the number of output neurons and the number of input
|
|
|
|
// neurons Then the number of forward MACs is input * output And the number of
|
|
|
|
// adds is output if there is bias
|
|
|
|
|
|
|
|
TensorType resultTy = getResult().getType().cast<TensorType>();
|
|
|
|
TensorType biasTy = getOperand(0).getType().cast<TensorType>();
|
|
|
|
TensorType inputTy = getOperand(1).getType().cast<TensorType>();
|
|
|
|
TensorType weightTy = getOperand(2).getType().cast<TensorType>();
|
|
|
|
|
|
|
|
uint64_t num_output_neurons = resultTy.getShape()[1];
|
|
|
|
uint64_t ofm_volume = getTensorVolume(resultTy);
|
|
|
|
|
|
|
|
// Use the weight tensor to find the number of input neurons
|
|
|
|
uint64_t num_input_neurons = weightTy.getShape()[0];
|
|
|
|
uint64_t total_MACs = ofm_volume * num_input_neurons;
|
|
|
|
uint64_t weight_volume = getTensorVolume(weightTy);
|
|
|
|
|
|
|
|
uint64_t ifm_volume = getTensorVolume(inputTy);
|
|
|
|
|
|
|
|
toReturn["ops:MAC"] = total_MACs;
|
|
|
|
toReturn["ops:+"] =
|
|
|
|
ofm_volume; // Should be gated on whether there is bias at all
|
|
|
|
toReturn["operand:1:activation_in"] = ifm_volume;
|
|
|
|
toReturn["result:0:activation_out"] = ofm_volume;
|
|
|
|
toReturn["operand:0:parameters_in:bias"] = getTensorVolume(biasTy);
|
|
|
|
toReturn["operand:2:parameters_in:weight"] = weight_volume;
|
|
|
|
|
|
|
|
toReturn["reads"] = ifm_volume + weight_volume + num_output_neurons;
|
|
|
|
toReturn["writes"] = ofm_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// as_strided can be zero overhead
|
|
|
|
std::map<std::string, uint64_t> AsStridedOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
toReturn["reads"] = 0;
|
|
|
|
toReturn["writes"] = 0;
|
|
|
|
toReturn["operand:0:activation_in"] = 0;
|
|
|
|
toReturn["result:0:activation_out"] = 0;
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// batch_norm
|
|
|
|
std::map<std::string, uint64_t> BatchNormOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
TensorType resultTy = getResult(0).getType().cast<TensorType>();
|
|
|
|
uint64_t op_volume = getTensorVolume(resultTy);
|
|
|
|
uint64_t weight_volume = getTensorVolume(getOperand(1).getType());
|
|
|
|
uint64_t bias_volume = getTensorVolume(getOperand(2).getType());
|
|
|
|
toReturn["operand:0:activation_in"] = op_volume;
|
|
|
|
toReturn["result:0:activation_out"] = op_volume;
|
|
|
|
toReturn["operand:1:parameters_in:weight"] = weight_volume;
|
|
|
|
toReturn["operand:2:parameters_in:bias"] = bias_volume;
|
|
|
|
|
|
|
|
// Now for the arithmetic. Assume variance is calculated as sum of squares
|
|
|
|
uint64_t ifm_depth = resultTy.getShape()[1];
|
|
|
|
|
|
|
|
toReturn["ops:+"] = op_volume; // Add up for mean
|
|
|
|
toReturn["ops:*"] = op_volume; // Square for variance
|
|
|
|
toReturn["ops:+"] += op_volume; // Add up squares for variance
|
|
|
|
|
|
|
|
toReturn["ops:*"] += ifm_depth; // Calc channel means
|
|
|
|
toReturn["ops:-"] += ifm_depth; // Calc channel vars
|
|
|
|
toReturn["ops:*"] += ifm_depth; // Calc channel vars
|
|
|
|
|
|
|
|
toReturn["ops:sqrt"] = ifm_depth; // Convert to SD
|
|
|
|
toReturn["ops:/"] = ifm_depth; // Get the reciprocal
|
|
|
|
|
|
|
|
toReturn["ops:+"] += op_volume; // Subtract mean off each pixel
|
|
|
|
toReturn["ops:*"] += op_volume; // Multiply by 1/SD for each pixel
|
|
|
|
|
|
|
|
toReturn["ops:+"] += op_volume; // Bias
|
|
|
|
toReturn["ops:*"] += op_volume; // Scale
|
|
|
|
|
|
|
|
toReturn["reads"] = op_volume + weight_volume + bias_volume;
|
|
|
|
toReturn["writes"] = op_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// div_
|
|
|
|
std::map<std::string, uint64_t> DivUnderOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
TensorType resultTy = getResult().getType().cast<TensorType>();
|
|
|
|
TensorType aType = getOperand(0).getType().cast<TensorType>();
|
|
|
|
Type bType = getOperand(1).getType();
|
|
|
|
|
|
|
|
uint64_t ofm_volume = getTensorVolume(resultTy);
|
|
|
|
toReturn["ops:/"] = ofm_volume;
|
|
|
|
toReturn["result:0:activation_out"] = ofm_volume;
|
|
|
|
|
|
|
|
// Find the size of the A and B operands
|
|
|
|
uint64_t a_volume = getTensorVolume(aType);
|
|
|
|
uint64_t b_volume = getTensorVolume(bType);
|
|
|
|
|
|
|
|
toReturn["operand:0:activation_in"] = a_volume;
|
|
|
|
toReturn["operand:1:activation_in"] = b_volume;
|
|
|
|
|
|
|
|
toReturn["reads"] = a_volume + b_volume;
|
|
|
|
toReturn["writes"] = ofm_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// expand can be zero overhead
|
|
|
|
std::map<std::string, uint64_t> ExpandOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
toReturn["reads"] = toReturn["operand:0:activation_in"] = 0;
|
|
|
|
toReturn["writes"] = toReturn["result:0:activation_out"] = 0;
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// flatten can be zero overhead
|
|
|
|
std::map<std::string, uint64_t> FlattenOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
toReturn["reads"] = toReturn["operand:0:activation_in"] = 0;
|
|
|
|
toReturn["writes"] = toReturn["result:0:activation_out"] = 0;
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> GatherOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
// FIXME: unimplemented
|
|
|
|
toReturn["reads"] = -1;
|
|
|
|
toReturn["writes"] = -1;
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// hardtanh
|
|
|
|
std::map<std::string, uint64_t> HardtanhOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
TensorType inputTy = getOperand(0).getType().cast<TensorType>();
|
|
|
|
TensorType resultTy = getResult().getType().cast<TensorType>();
|
|
|
|
|
|
|
|
uint64_t in_volume = getTensorVolume(inputTy);
|
|
|
|
uint64_t out_volume = getTensorVolume(resultTy);
|
|
|
|
|
|
|
|
toReturn["operand:0:activation_in"] = in_volume;
|
|
|
|
toReturn["result:0:activation_out"] = out_volume;
|
|
|
|
toReturn["reads"] = in_volume;
|
|
|
|
toReturn["writes"] = out_volume;
|
|
|
|
toReturn["ops:>"] = out_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// hardtanh_
|
|
|
|
std::map<std::string, uint64_t> HardtanhUnderOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
TensorType inputTy = getOperand(0).getType().cast<TensorType>();
|
|
|
|
TensorType resultTy = getResult().getType().cast<TensorType>();
|
|
|
|
|
|
|
|
uint64_t in_volume = getTensorVolume(inputTy);
|
|
|
|
uint64_t out_volume = getTensorVolume(resultTy);
|
|
|
|
|
|
|
|
toReturn["operand:0:activation_in"] = in_volume;
|
|
|
|
toReturn["result:0:activation_out"] = out_volume;
|
|
|
|
toReturn["reads"] = in_volume;
|
|
|
|
toReturn["writes"] = out_volume;
|
|
|
|
toReturn["ops:>"] = out_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> HardtanhBackwardOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
// FIXME: unimplemented
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// max_pool2d
|
|
|
|
std::map<std::string, uint64_t> MaxPool2dOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
TensorType resultTy = getResult().getType().cast<TensorType>();
|
|
|
|
TensorType inputType = getOperand(0).getType().cast<TensorType>();
|
|
|
|
|
|
|
|
uint64_t ofm_volume = getTensorVolume(resultTy);
|
|
|
|
toReturn["result:0:activation_out"] = ofm_volume;
|
|
|
|
|
|
|
|
uint64_t ifm_volume = getTensorVolume(inputType);
|
|
|
|
toReturn["input:0:activation_in"] = ifm_volume;
|
|
|
|
|
|
|
|
// To find the number of compares, we need the filter extent
|
|
|
|
|
|
|
|
std::vector<uint64_t> kernel_size = unpackListConstant(getOperand(1));
|
|
|
|
|
|
|
|
uint64_t aperture = kernel_size[0] * kernel_size[1];
|
|
|
|
toReturn["ops:>"] = ofm_volume * (aperture - 1);
|
|
|
|
|
|
|
|
toReturn["reads"] = ifm_volume;
|
|
|
|
toReturn["writes"] = ofm_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// max_pool2d_with_indices
|
|
|
|
std::map<std::string, uint64_t> MaxPool2dWithIndicesOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
uint64_t ofm_volume =
|
|
|
|
getTensorVolume(getResult(0).getType().cast<TensorType>());
|
|
|
|
uint64_t indices_volume =
|
|
|
|
getTensorVolume(getResult(1).getType().cast<TensorType>());
|
|
|
|
|
|
|
|
toReturn["writes"] = ofm_volume + indices_volume;
|
|
|
|
toReturn["result:0:activation_out"] = ofm_volume;
|
|
|
|
toReturn["result:1:indices_out"] = indices_volume;
|
|
|
|
|
|
|
|
uint64_t ifm_volume =
|
|
|
|
getTensorVolume(getOperand(0).getType().cast<TensorType>());
|
|
|
|
toReturn["reads"] = ifm_volume;
|
|
|
|
toReturn["operand:0:activation_in"] = ifm_volume;
|
|
|
|
|
|
|
|
// To find the number of compares, we need the filter extent
|
|
|
|
|
|
|
|
std::vector<uint64_t> kernel_size = unpackListConstant(getOperand(1));
|
|
|
|
|
|
|
|
uint64_t aperture = kernel_size[0] * kernel_size[1];
|
|
|
|
toReturn["ops:>"] = ofm_volume * (aperture - 1);
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// max_pool2d_with_indices_backward
|
|
|
|
std::map<std::string, uint64_t>
|
|
|
|
MaxPool2dWithIndicesBackwardOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
Type resultTy = getResult().getType();
|
|
|
|
TensorType tensorResultTy = resultTy.cast<TensorType>();
|
|
|
|
uint64_t loss_out_volume = getTensorVolume(tensorResultTy);
|
|
|
|
toReturn["writes"] = loss_out_volume;
|
|
|
|
|
|
|
|
uint64_t loss_in_volume =
|
|
|
|
getTensorVolume(getOperand(0).getType().cast<TensorType>());
|
|
|
|
uint64_t act_in_volume = getTensorVolume(
|
|
|
|
getOperand(1).getType().cast<TensorType>()); // TODO: Why is this needed?
|
|
|
|
uint64_t indices_volume =
|
|
|
|
getTensorVolume(getOperand(7).getType().cast<TensorType>());
|
|
|
|
toReturn["reads"] = loss_in_volume + act_in_volume + indices_volume;
|
|
|
|
toReturn["operand:0:activation_in"] = loss_in_volume;
|
|
|
|
toReturn["operand:1:activation_in"] = act_in_volume;
|
|
|
|
toReturn["operand:3:activation_in"] = indices_volume;
|
|
|
|
toReturn["result:0:grad:dx"] = loss_out_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// mean
|
|
|
|
std::map<std::string, uint64_t> MeanOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
TensorType resultTy = getResult().getType().cast<TensorType>();
|
|
|
|
TensorType aType = getOperand().getType().cast<TensorType>();
|
|
|
|
|
|
|
|
uint64_t ofm_volume = getTensorVolume(resultTy);
|
|
|
|
toReturn["ops:+"] = ofm_volume;
|
|
|
|
toReturn["result:0:activation_out"] = ofm_volume;
|
|
|
|
|
|
|
|
// Find the size of the A and B operands
|
|
|
|
uint64_t a_volume = getTensorVolume(aType);
|
|
|
|
|
|
|
|
toReturn["operand:0:activation_in"] = a_volume;
|
|
|
|
|
|
|
|
toReturn["reads"] = a_volume;
|
|
|
|
toReturn["writes"] = ofm_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// mul_
|
|
|
|
std::map<std::string, uint64_t> MulUnderOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
TensorType resultTy = getResult().getType().cast<TensorType>();
|
|
|
|
TensorType aType = getOperand(0).getType().cast<TensorType>();
|
|
|
|
Type bType = getOperand(1).getType();
|
|
|
|
|
|
|
|
uint64_t ofm_volume = getTensorVolume(resultTy);
|
|
|
|
toReturn["ops:*"] = ofm_volume;
|
|
|
|
toReturn["result:0:activation_out"] = ofm_volume;
|
|
|
|
|
|
|
|
// Find the size of the A and B operands
|
|
|
|
uint64_t a_volume = getTensorVolume(aType);
|
|
|
|
uint64_t b_volume = getTensorVolume(bType);
|
|
|
|
|
|
|
|
toReturn["operand:0:activation_in"] = a_volume;
|
|
|
|
toReturn["operand:1:activation_in"] = b_volume;
|
|
|
|
|
|
|
|
toReturn["reads"] = a_volume + b_volume;
|
|
|
|
toReturn["writes"] = ofm_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// native_batch_norm
|
|
|
|
std::map<std::string, uint64_t> NativeBatchNormOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
TensorType resultTy = getResult(0).getType().cast<TensorType>();
|
|
|
|
uint64_t op_volume = getTensorVolume(resultTy);
|
|
|
|
uint64_t weight_volume = getTensorVolume(getOperand(1).getType());
|
|
|
|
uint64_t bias_volume = getTensorVolume(getOperand(2).getType());
|
|
|
|
toReturn["operand:0:activation_in"] = op_volume;
|
|
|
|
toReturn["result:0:activation_out"] = op_volume;
|
|
|
|
toReturn["operand:1:parameters_in:weight"] = weight_volume;
|
|
|
|
toReturn["operand:2:parameters_in:bias"] = bias_volume;
|
|
|
|
|
|
|
|
// Now for the arithmetic. Assume variance is calculated as sum of squares
|
|
|
|
uint64_t ifm_depth = resultTy.getShape()[1];
|
|
|
|
|
|
|
|
toReturn["ops:+"] = op_volume; // Add up for mean
|
|
|
|
toReturn["ops:*"] = op_volume; // Square for variance
|
|
|
|
toReturn["ops:+"] += op_volume; // Add up squares for variance
|
|
|
|
|
|
|
|
toReturn["ops:*"] += ifm_depth; // Calc channel means
|
|
|
|
toReturn["ops:-"] += ifm_depth; // Calc channel vars
|
|
|
|
toReturn["ops:*"] += ifm_depth; // Calc channel vars
|
|
|
|
|
|
|
|
toReturn["ops:sqrt"] = ifm_depth; // Convert to SD
|
|
|
|
toReturn["ops:/"] = ifm_depth; // Get the reciprocal
|
|
|
|
|
|
|
|
toReturn["ops:+"] += op_volume; // Subtract mean off each pixel
|
|
|
|
toReturn["ops:*"] += op_volume; // Multiply by 1/SD for each pixel
|
|
|
|
|
|
|
|
toReturn["ops:+"] += op_volume; // Bias
|
|
|
|
toReturn["ops:*"] += op_volume; // Scale
|
|
|
|
|
|
|
|
toReturn["reads"] = op_volume + weight_volume + bias_volume;
|
|
|
|
toReturn["writes"] = op_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// batchnorm backward
|
|
|
|
std::map<std::string, uint64_t> NativeBatchNormBackwardOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
ShapedType inputTy = getOperand(0).getType().cast<ShapedType>();
|
|
|
|
uint64_t input_volume = getTensorVolume(inputTy);
|
|
|
|
uint64_t input_channels = inputTy.getShape()[1];
|
|
|
|
|
|
|
|
// # 3 components make up the gradInput: 1 gradInput, 2 gradMean, 3 gradVar
|
|
|
|
// # totalGradInput = gradInput + (dL / dMean * dMean / dInput) +
|
|
|
|
// # (dL / dVar * dVar / dInput)
|
|
|
|
|
|
|
|
// # gradInput
|
|
|
|
// total_ops["backward"]["*"] = in_c * (in_h*in_w*batch_size) # scale
|
|
|
|
// # Bootstrap from previous
|
|
|
|
// #total_ops["backward"]["sqrt"] = in_c # Convert to std_dev
|
|
|
|
// #total_ops["backward"]["/"] = in_c # Calculate inverse sqrt first
|
|
|
|
toReturn["ops:*"] = input_volume; // scale
|
|
|
|
|
|
|
|
// # dL / dGradVar
|
|
|
|
// total_ops["backward"]["pow"] = in_c
|
|
|
|
// total_ops["backward"]["*"] = total_ops["backward"]["*"] + in_c
|
|
|
|
// #total_ops["backward"]["+"] = total_ops["backward"]["+"] + in_c *
|
|
|
|
// in_h*in_w*batch_size # Subtract mean, bootstrap from previous calculation
|
|
|
|
// total_ops["backward"]["*"] = total_ops["backward"]["*"] + in_c *
|
|
|
|
// (in_h*in_w*batch_size)
|
|
|
|
toReturn["ops:pow"] = input_channels;
|
|
|
|
;
|
|
|
|
toReturn["ops:*"] += input_channels;
|
|
|
|
toReturn["ops:*"] += input_volume;
|
|
|
|
|
|
|
|
// # dL / dGradMean
|
|
|
|
// #total_ops["backward"]["+"] = total_ops["backward"]["+"] + in_c *
|
|
|
|
// (in_h*in_w*batch_size) # bootstrap from previous total_ops["backward"]["*"]
|
|
|
|
// = total_ops["backward"]["*"] + in_c # scale gradMean
|
|
|
|
// total_ops["backward"]["*"] = total_ops["backward"]["*"] + in_c # eltwise
|
|
|
|
// with dL / dGradVar total_ops["backward"]["+"] = in_c *
|
|
|
|
// (in_h*in_w*batch_size) # sum gradXhat total_ops["backward"]["*"] =
|
|
|
|
// total_ops["backward"]["*"] + in_c # scale gradXhat
|
|
|
|
toReturn["ops:*"] += input_channels; // scale gradMean
|
|
|
|
toReturn["ops:*"] += input_channels; // eltwise with dL / dGradVar
|
|
|
|
toReturn["ops:+"] = input_volume; // sum gradXhat
|
|
|
|
toReturn["ops:*"] += input_channels; // scale gradXhat
|
|
|
|
|
|
|
|
// # totalGradInput
|
|
|
|
// total_ops["backward"]["+"] = total_ops["backward"]["+"] + in_c *
|
|
|
|
// (in_h*in_w*batch_size) # Subtract mean, can't bootstrap this one
|
|
|
|
// total_ops["backward"]["*"] = total_ops["backward"]["*"] + in_c # scale dL /
|
|
|
|
// dMean total_ops["backward"]["*"] = total_ops["backward"]["*"] + in_c #
|
|
|
|
// scale dL / dVar total_ops["backward"]["*"] = total_ops["backward"]["*"] +
|
|
|
|
// in_c * (in_h*in_w*batch_size) # Eltwise multiply by dL / dVar
|
|
|
|
// total_ops["backward"]["+"] = total_ops["backward"]["+"] + 2 * in_c *
|
|
|
|
// (in_h*in_w*batch_size) # Accumulate gradient terms
|
|
|
|
toReturn["ops:+"] += input_volume; // Subtract mean, can't bootstrap this one
|
|
|
|
toReturn["ops:*"] += input_channels; // scale dL / dMean
|
|
|
|
toReturn["ops:*"] += input_channels; // scale dL / dVar
|
|
|
|
toReturn["ops:*"] += input_volume; // Eltwise multiply by dL / dVar
|
|
|
|
toReturn["OPS:+"] += 2 * input_volume; // Accumulate gradient terms
|
|
|
|
|
|
|
|
uint64_t reads = 0;
|
|
|
|
for (int i = 0; i < 7; i++) {
|
|
|
|
auto v = getTensorVolume(getOperand(i).getType());
|
|
|
|
toReturn["operand:" + std::to_string(i) + ":activation_in"] = v;
|
|
|
|
reads += v;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t writes = 0;
|
|
|
|
for (int i = 0; i < 3; i++) {
|
|
|
|
auto v = getTensorVolume(getResult(i).getType());
|
|
|
|
toReturn["result:" + std::to_string(i) + ":grad"] = v;
|
|
|
|
writes += v;
|
|
|
|
}
|
|
|
|
|
|
|
|
toReturn["reads"] = reads;
|
|
|
|
toReturn["writes"] = writes;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// std::map<std::string, uint64_t> ReLUUnderOp::getStatistics() {
|
|
|
|
// return getReLUOpStatistics(*this);
|
|
|
|
// }
|
|
|
|
std::map<std::string, uint64_t> ReluUnderOp::getStatistics() {
|
|
|
|
return getReLUOpStatistics(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
// sub
|
|
|
|
std::map<std::string, uint64_t> SubOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
TensorType resultTy = getResult().getType().cast<TensorType>();
|
|
|
|
TensorType aType = getOperand(0).getType().cast<TensorType>();
|
|
|
|
Type bType = getOperand(1).getType();
|
|
|
|
|
|
|
|
uint64_t ofm_volume = getTensorVolume(resultTy);
|
|
|
|
|
|
|
|
toReturn["ops:-"] = ofm_volume;
|
|
|
|
toReturn["result:0:activation_out"] = ofm_volume;
|
|
|
|
|
|
|
|
// Find the size of the A and B operands
|
|
|
|
uint64_t a_volume = getTensorVolume(aType);
|
|
|
|
uint64_t b_volume = getTensorVolume(bType);
|
|
|
|
|
|
|
|
toReturn["operand:0:activation_in"] = a_volume;
|
|
|
|
toReturn["operand:1:activation_in"] = b_volume;
|
|
|
|
|
|
|
|
toReturn["reads"] = a_volume + b_volume;
|
|
|
|
toReturn["writes"] = ofm_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// sub_
|
|
|
|
std::map<std::string, uint64_t> SubUnderOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
|
|
|
|
TensorType resultTy = getResult().getType().cast<TensorType>();
|
|
|
|
TensorType aType = getOperand(0).getType().cast<TensorType>();
|
|
|
|
Type bType = getOperand(1).getType();
|
|
|
|
|
|
|
|
uint64_t ofm_volume = getTensorVolume(resultTy);
|
|
|
|
|
|
|
|
toReturn["ops:-"] = ofm_volume;
|
|
|
|
toReturn["result:0:activation_out"] = ofm_volume;
|
|
|
|
|
|
|
|
// Find the size of the A and B operands
|
|
|
|
uint64_t a_volume = getTensorVolume(aType);
|
|
|
|
uint64_t b_volume = getTensorVolume(bType);
|
|
|
|
|
|
|
|
toReturn["operand:0:activation_in"] = a_volume;
|
|
|
|
toReturn["operand:1:activation_in"] = b_volume;
|
|
|
|
|
|
|
|
toReturn["reads"] = a_volume + b_volume;
|
|
|
|
toReturn["writes"] = ofm_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// sum
|
|
|
|
std::map<std::string, uint64_t> SumOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
TensorType ty = getOperand(0).getType().cast<TensorType>();
|
|
|
|
uint64_t volume = getTensorVolume(ty);
|
|
|
|
|
|
|
|
toReturn["ops:+"] = volume;
|
|
|
|
|
|
|
|
toReturn["operand:0:activation_in"] = volume;
|
|
|
|
toReturn["result:0:activation_out"] = volume;
|
|
|
|
|
|
|
|
toReturn["reads"] = volume;
|
|
|
|
toReturn["writes"] = volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// size op can be zero overhead
|
|
|
|
std::map<std::string, uint64_t> SizeOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
toReturn["reads"] = toReturn["operand:0:activation_in"] = 0;
|
|
|
|
toReturn["writes"] = toReturn["result:0:activation_out"] = 0;
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// squeeze can be zero overhead
|
|
|
|
std::map<std::string, uint64_t> SqueezeOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
toReturn["reads"] = toReturn["operand:0:activation_in"] = 0;
|
|
|
|
toReturn["writes"] = toReturn["result:0:activation_out"] = 0;
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// transpose can be zero overhead
|
|
|
|
std::map<std::string, uint64_t> TOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
toReturn["reads"] = toReturn["operand:0:activation_in"] = 0;
|
|
|
|
toReturn["writes"] = toReturn["result:0:activation_out"] = 0;
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// threshold_backward
|
|
|
|
std::map<std::string, uint64_t> ThresholdBackwardOp::getStatistics() {
|
|
|
|
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
uint64_t loss_in_volume =
|
|
|
|
getTensorVolume(getOperand(0).getType().cast<TensorType>());
|
|
|
|
uint64_t act_in_volume =
|
|
|
|
getTensorVolume(getOperand(1).getType().cast<TensorType>());
|
|
|
|
uint64_t loss_out_volume =
|
|
|
|
getTensorVolume(getResult().getType().cast<TensorType>());
|
|
|
|
|
|
|
|
toReturn["reads"] = toReturn["operand:0:activation_in"] =
|
|
|
|
loss_in_volume + act_in_volume;
|
|
|
|
toReturn["writes"] = toReturn["result:0:grad:dx"] = loss_out_volume;
|
|
|
|
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// unsqueeze can be zero overhead
|
|
|
|
std::map<std::string, uint64_t> UnsqueezeOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
toReturn["reads"] = toReturn["operand:0:activation_in"] = 0;
|
|
|
|
toReturn["writes"] = toReturn["result:0:activation_out"] = 0;
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// view can be zero overhead
|
|
|
|
std::map<std::string, uint64_t> ViewOp::getStatistics() {
|
|
|
|
std::map<std::string, uint64_t> toReturn;
|
|
|
|
toReturn["reads"] = toReturn["operand:0:activation_in"] = 0;
|
|
|
|
toReturn["writes"] = toReturn["result:0:activation_out"] = 0;
|
|
|
|
return toReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace aten
|
|
|
|
} // namespace NPCOMP
|
|
|
|
} // namespace mlir
|