//===- ATenDialectOpStats.cpp -----------------------------------*- C++ -*-===// // // This file is licensed under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "npcomp/Dialect/ATen/IR/ATenDialect.h" #include "npcomp/Dialect/ATen/IR/ATenOpStatisticsUtils.h" #include "llvm/Support/Debug.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Types.h" #include #define DEBUG_TYPE "aten-op-stats" // This file contains the StatisticsOpInterface implementations // for ATDialect operations using namespace mlir; namespace { std::vector unpackListConstant(Value op) { std::vector v; auto co = cast(op.getDefiningOp()); DenseElementsAttr a = co->template getAttrOfType("value"); for (auto i : a.getIntValues()) v.push_back(i.getSExtValue()); return v; }; } // namespace namespace mlir { namespace NPCOMP { namespace aten { std::map AdaptiveAvgPool2dBackwardOp::getStatistics() { std::map toReturn; // FIXME: unimplemented toReturn["reads"] = -1; toReturn["writes"] = -1; return toReturn; } // add_ std::map AddUnderOp::getStatistics() { std::map toReturn; TensorType resultTy = getResult().getType().cast(); TensorType aType = getOperand(0).getType().cast(); Type bType = getOperand(1).getType(); uint64_t ofm_volume = getTensorVolume(resultTy); toReturn["ops:+"] = ofm_volume; toReturn["result:0:activation_out"] = ofm_volume; // Find the size of the A and B operands uint64_t a_volume = getTensorVolume(aType); uint64_t b_volume = getTensorVolume(bType); toReturn["operand:0:activation_in"] = a_volume; toReturn["operand:1:activation_in"] = b_volume; toReturn["reads"] = a_volume + b_volume; toReturn["writes"] = ofm_volume; return toReturn; } // addmm std::map AddmmOp::getStatistics() { std::map toReturn; // For linear, we need the number of output neurons and the number of input // neurons Then the number of forward MACs is input * output And the number of // adds is output if there is bias TensorType resultTy = getResult().getType().cast(); TensorType biasTy = getOperand(0).getType().cast(); TensorType inputTy = getOperand(1).getType().cast(); TensorType weightTy = getOperand(2).getType().cast(); uint64_t num_output_neurons = resultTy.getShape()[1]; uint64_t ofm_volume = getTensorVolume(resultTy); // Use the weight tensor to find the number of input neurons uint64_t num_input_neurons = weightTy.getShape()[0]; uint64_t total_MACs = ofm_volume * num_input_neurons; uint64_t weight_volume = getTensorVolume(weightTy); uint64_t ifm_volume = getTensorVolume(inputTy); toReturn["ops:MAC"] = total_MACs; toReturn["ops:+"] = ofm_volume; // Should be gated on whether there is bias at all toReturn["operand:1:activation_in"] = ifm_volume; toReturn["result:0:activation_out"] = ofm_volume; toReturn["operand:0:parameters_in:bias"] = getTensorVolume(biasTy); toReturn["operand:2:parameters_in:weight"] = weight_volume; toReturn["reads"] = ifm_volume + weight_volume + num_output_neurons; toReturn["writes"] = ofm_volume; return toReturn; } // as_strided can be zero overhead std::map AsStridedOp::getStatistics() { std::map toReturn; toReturn["reads"] = 0; toReturn["writes"] = 0; toReturn["operand:0:activation_in"] = 0; toReturn["result:0:activation_out"] = 0; return toReturn; } // div_ std::map DivUnderOp::getStatistics() { std::map toReturn; TensorType resultTy = getResult().getType().cast(); TensorType aType = getOperand(0).getType().cast(); Type bType = getOperand(1).getType(); uint64_t ofm_volume = getTensorVolume(resultTy); toReturn["ops:/"] = ofm_volume; toReturn["result:0:activation_out"] = ofm_volume; // Find the size of the A and B operands uint64_t a_volume = getTensorVolume(aType); uint64_t b_volume = getTensorVolume(bType); toReturn["operand:0:activation_in"] = a_volume; toReturn["operand:1:activation_in"] = b_volume; toReturn["reads"] = a_volume + b_volume; toReturn["writes"] = ofm_volume; return toReturn; } // expand can be zero overhead std::map ExpandOp::getStatistics() { std::map toReturn; toReturn["reads"] = toReturn["operand:0:activation_in"] = 0; toReturn["writes"] = toReturn["result:0:activation_out"] = 0; return toReturn; } // flatten can be zero overhead std::map FlattenOp::getStatistics() { std::map toReturn; toReturn["reads"] = toReturn["operand:0:activation_in"] = 0; toReturn["writes"] = toReturn["result:0:activation_out"] = 0; return toReturn; } std::map GatherOp::getStatistics() { std::map toReturn; // FIXME: unimplemented toReturn["reads"] = -1; toReturn["writes"] = -1; return toReturn; } // hardtanh std::map HardtanhOp::getStatistics() { std::map toReturn; TensorType inputTy = getOperand(0).getType().cast(); TensorType resultTy = getResult().getType().cast(); uint64_t in_volume = getTensorVolume(inputTy); uint64_t out_volume = getTensorVolume(resultTy); toReturn["operand:0:activation_in"] = in_volume; toReturn["result:0:activation_out"] = out_volume; toReturn["reads"] = in_volume; toReturn["writes"] = out_volume; toReturn["ops:>"] = out_volume; return toReturn; } // hardtanh_ std::map HardtanhUnderOp::getStatistics() { std::map toReturn; TensorType inputTy = getOperand(0).getType().cast(); TensorType resultTy = getResult().getType().cast(); uint64_t in_volume = getTensorVolume(inputTy); uint64_t out_volume = getTensorVolume(resultTy); toReturn["operand:0:activation_in"] = in_volume; toReturn["result:0:activation_out"] = out_volume; toReturn["reads"] = in_volume; toReturn["writes"] = out_volume; toReturn["ops:>"] = out_volume; return toReturn; } std::map HardtanhBackwardOp::getStatistics() { std::map toReturn; // FIXME: unimplemented return toReturn; } // max_pool2d_with_indices std::map MaxPool2dWithIndicesOp::getStatistics() { std::map toReturn; uint64_t ofm_volume = getTensorVolume(getResult(0).getType().cast()); uint64_t indices_volume = getTensorVolume(getResult(1).getType().cast()); toReturn["writes"] = ofm_volume + indices_volume; toReturn["result:0:activation_out"] = ofm_volume; toReturn["result:1:indices_out"] = indices_volume; uint64_t ifm_volume = getTensorVolume(getOperand(0).getType().cast()); toReturn["reads"] = ifm_volume; toReturn["operand:0:activation_in"] = ifm_volume; // To find the number of compares, we need the filter extent std::vector kernel_size = unpackListConstant(getOperand(1)); uint64_t aperture = kernel_size[0] * kernel_size[1]; toReturn["ops:>"] = ofm_volume * (aperture - 1); return toReturn; } // max_pool2d_with_indices_backward std::map MaxPool2dWithIndicesBackwardOp::getStatistics() { std::map toReturn; Type resultTy = getResult().getType(); TensorType tensorResultTy = resultTy.cast(); uint64_t loss_out_volume = getTensorVolume(tensorResultTy); toReturn["writes"] = loss_out_volume; uint64_t loss_in_volume = getTensorVolume(getOperand(0).getType().cast()); uint64_t act_in_volume = getTensorVolume( getOperand(1).getType().cast()); // TODO: Why is this needed? uint64_t indices_volume = getTensorVolume(getOperand(7).getType().cast()); toReturn["reads"] = loss_in_volume + act_in_volume + indices_volume; toReturn["operand:0:activation_in"] = loss_in_volume; toReturn["operand:1:activation_in"] = act_in_volume; toReturn["operand:3:activation_in"] = indices_volume; toReturn["result:0:grad:dx"] = loss_out_volume; return toReturn; } // mean std::map MeanOp::getStatistics() { std::map toReturn; TensorType resultTy = getResult().getType().cast(); TensorType aType = getOperand().getType().cast(); uint64_t ofm_volume = getTensorVolume(resultTy); toReturn["ops:+"] = ofm_volume; toReturn["result:0:activation_out"] = ofm_volume; // Find the size of the A and B operands uint64_t a_volume = getTensorVolume(aType); toReturn["operand:0:activation_in"] = a_volume; toReturn["reads"] = a_volume; toReturn["writes"] = ofm_volume; return toReturn; } // mul_ std::map MulUnderOp::getStatistics() { std::map toReturn; TensorType resultTy = getResult().getType().cast(); TensorType aType = getOperand(0).getType().cast(); Type bType = getOperand(1).getType(); uint64_t ofm_volume = getTensorVolume(resultTy); toReturn["ops:*"] = ofm_volume; toReturn["result:0:activation_out"] = ofm_volume; // Find the size of the A and B operands uint64_t a_volume = getTensorVolume(aType); uint64_t b_volume = getTensorVolume(bType); toReturn["operand:0:activation_in"] = a_volume; toReturn["operand:1:activation_in"] = b_volume; toReturn["reads"] = a_volume + b_volume; toReturn["writes"] = ofm_volume; return toReturn; } // native_batch_norm std::map NativeBatchNormOp::getStatistics() { std::map toReturn; TensorType resultTy = getResult(0).getType().cast(); uint64_t op_volume = getTensorVolume(resultTy); uint64_t weight_volume = getTensorVolume(getOperand(1).getType()); uint64_t bias_volume = getTensorVolume(getOperand(2).getType()); toReturn["operand:0:activation_in"] = op_volume; toReturn["result:0:activation_out"] = op_volume; toReturn["operand:1:parameters_in:weight"] = weight_volume; toReturn["operand:2:parameters_in:bias"] = bias_volume; // Now for the arithmetic. Assume variance is calculated as sum of squares uint64_t ifm_depth = resultTy.getShape()[1]; toReturn["ops:+"] = op_volume; // Add up for mean toReturn["ops:*"] = op_volume; // Square for variance toReturn["ops:+"] += op_volume; // Add up squares for variance toReturn["ops:*"] += ifm_depth; // Calc channel means toReturn["ops:-"] += ifm_depth; // Calc channel vars toReturn["ops:*"] += ifm_depth; // Calc channel vars toReturn["ops:sqrt"] = ifm_depth; // Convert to SD toReturn["ops:/"] = ifm_depth; // Get the reciprocal toReturn["ops:+"] += op_volume; // Subtract mean off each pixel toReturn["ops:*"] += op_volume; // Multiply by 1/SD for each pixel toReturn["ops:+"] += op_volume; // Bias toReturn["ops:*"] += op_volume; // Scale toReturn["reads"] = op_volume + weight_volume + bias_volume; toReturn["writes"] = op_volume; return toReturn; } // batchnorm backward std::map NativeBatchNormBackwardOp::getStatistics() { std::map toReturn; ShapedType inputTy = getOperand(0).getType().cast(); uint64_t input_volume = getTensorVolume(inputTy); uint64_t input_channels = inputTy.getShape()[1]; // # 3 components make up the gradInput: 1 gradInput, 2 gradMean, 3 gradVar // # totalGradInput = gradInput + (dL / dMean * dMean / dInput) + // # (dL / dVar * dVar / dInput) // # gradInput // total_ops["backward"]["*"] = in_c * (in_h*in_w*batch_size) # scale // # Bootstrap from previous // #total_ops["backward"]["sqrt"] = in_c # Convert to std_dev // #total_ops["backward"]["/"] = in_c # Calculate inverse sqrt first toReturn["ops:*"] = input_volume; // scale // # dL / dGradVar // total_ops["backward"]["pow"] = in_c // total_ops["backward"]["*"] = total_ops["backward"]["*"] + in_c // #total_ops["backward"]["+"] = total_ops["backward"]["+"] + in_c * // in_h*in_w*batch_size # Subtract mean, bootstrap from previous calculation // total_ops["backward"]["*"] = total_ops["backward"]["*"] + in_c * // (in_h*in_w*batch_size) toReturn["ops:pow"] = input_channels; ; toReturn["ops:*"] += input_channels; toReturn["ops:*"] += input_volume; // # dL / dGradMean // #total_ops["backward"]["+"] = total_ops["backward"]["+"] + in_c * // (in_h*in_w*batch_size) # bootstrap from previous total_ops["backward"]["*"] // = total_ops["backward"]["*"] + in_c # scale gradMean // total_ops["backward"]["*"] = total_ops["backward"]["*"] + in_c # eltwise // with dL / dGradVar total_ops["backward"]["+"] = in_c * // (in_h*in_w*batch_size) # sum gradXhat total_ops["backward"]["*"] = // total_ops["backward"]["*"] + in_c # scale gradXhat toReturn["ops:*"] += input_channels; // scale gradMean toReturn["ops:*"] += input_channels; // eltwise with dL / dGradVar toReturn["ops:+"] = input_volume; // sum gradXhat toReturn["ops:*"] += input_channels; // scale gradXhat // # totalGradInput // total_ops["backward"]["+"] = total_ops["backward"]["+"] + in_c * // (in_h*in_w*batch_size) # Subtract mean, can't bootstrap this one // total_ops["backward"]["*"] = total_ops["backward"]["*"] + in_c # scale dL / // dMean total_ops["backward"]["*"] = total_ops["backward"]["*"] + in_c # // scale dL / dVar total_ops["backward"]["*"] = total_ops["backward"]["*"] + // in_c * (in_h*in_w*batch_size) # Eltwise multiply by dL / dVar // total_ops["backward"]["+"] = total_ops["backward"]["+"] + 2 * in_c * // (in_h*in_w*batch_size) # Accumulate gradient terms toReturn["ops:+"] += input_volume; // Subtract mean, can't bootstrap this one toReturn["ops:*"] += input_channels; // scale dL / dMean toReturn["ops:*"] += input_channels; // scale dL / dVar toReturn["ops:*"] += input_volume; // Eltwise multiply by dL / dVar toReturn["OPS:+"] += 2 * input_volume; // Accumulate gradient terms uint64_t reads = 0; for (int i = 0; i < 7; i++) { auto v = getTensorVolume(getOperand(i).getType()); toReturn["operand:" + std::to_string(i) + ":activation_in"] = v; reads += v; } uint64_t writes = 0; for (int i = 0; i < 3; i++) { auto v = getTensorVolume(getResult(i).getType()); toReturn["result:" + std::to_string(i) + ":grad"] = v; writes += v; } toReturn["reads"] = reads; toReturn["writes"] = writes; return toReturn; } // std::map ReLUUnderOp::getStatistics() { // return getReLUOpStatistics(*this); // } std::map ReluUnderOp::getStatistics() { return getReLUOpStatistics(*this); } // sub std::map SubOp::getStatistics() { std::map toReturn; TensorType resultTy = getResult().getType().cast(); TensorType aType = getOperand(0).getType().cast(); Type bType = getOperand(1).getType(); uint64_t ofm_volume = getTensorVolume(resultTy); toReturn["ops:-"] = ofm_volume; toReturn["result:0:activation_out"] = ofm_volume; // Find the size of the A and B operands uint64_t a_volume = getTensorVolume(aType); uint64_t b_volume = getTensorVolume(bType); toReturn["operand:0:activation_in"] = a_volume; toReturn["operand:1:activation_in"] = b_volume; toReturn["reads"] = a_volume + b_volume; toReturn["writes"] = ofm_volume; return toReturn; } // sub_ std::map SubUnderOp::getStatistics() { std::map toReturn; TensorType resultTy = getResult().getType().cast(); TensorType aType = getOperand(0).getType().cast(); Type bType = getOperand(1).getType(); uint64_t ofm_volume = getTensorVolume(resultTy); toReturn["ops:-"] = ofm_volume; toReturn["result:0:activation_out"] = ofm_volume; // Find the size of the A and B operands uint64_t a_volume = getTensorVolume(aType); uint64_t b_volume = getTensorVolume(bType); toReturn["operand:0:activation_in"] = a_volume; toReturn["operand:1:activation_in"] = b_volume; toReturn["reads"] = a_volume + b_volume; toReturn["writes"] = ofm_volume; return toReturn; } // sum std::map SumOp::getStatistics() { std::map toReturn; TensorType ty = getOperand(0).getType().cast(); uint64_t volume = getTensorVolume(ty); toReturn["ops:+"] = volume; toReturn["operand:0:activation_in"] = volume; toReturn["result:0:activation_out"] = volume; toReturn["reads"] = volume; toReturn["writes"] = volume; return toReturn; } // size op can be zero overhead std::map SizeOp::getStatistics() { std::map toReturn; toReturn["reads"] = toReturn["operand:0:activation_in"] = 0; toReturn["writes"] = toReturn["result:0:activation_out"] = 0; return toReturn; } // squeeze can be zero overhead std::map SqueezeOp::getStatistics() { std::map toReturn; toReturn["reads"] = toReturn["operand:0:activation_in"] = 0; toReturn["writes"] = toReturn["result:0:activation_out"] = 0; return toReturn; } // transpose can be zero overhead std::map TOp::getStatistics() { std::map toReturn; toReturn["reads"] = toReturn["operand:0:activation_in"] = 0; toReturn["writes"] = toReturn["result:0:activation_out"] = 0; return toReturn; } // threshold_backward std::map ThresholdBackwardOp::getStatistics() { std::map toReturn; uint64_t loss_in_volume = getTensorVolume(getOperand(0).getType().cast()); uint64_t act_in_volume = getTensorVolume(getOperand(1).getType().cast()); uint64_t loss_out_volume = getTensorVolume(getResult().getType().cast()); toReturn["reads"] = toReturn["operand:0:activation_in"] = loss_in_volume + act_in_volume; toReturn["writes"] = toReturn["result:0:grad:dx"] = loss_out_volume; return toReturn; } // unsqueeze can be zero overhead std::map UnsqueezeOp::getStatistics() { std::map toReturn; toReturn["reads"] = toReturn["operand:0:activation_in"] = 0; toReturn["writes"] = toReturn["result:0:activation_out"] = 0; return toReturn; } // view can be zero overhead std::map ViewOp::getStatistics() { std::map toReturn; toReturn["reads"] = toReturn["operand:0:activation_in"] = 0; toReturn["writes"] = toReturn["result:0:activation_out"] = 0; return toReturn; } } // namespace aten } // namespace NPCOMP } // namespace mlir