TIM-VX/unified-tina/inc/HAL/shared/gc_hal_profiler_shared.h

347 lines
14 KiB
C
Raw Normal View History

/****************************************************************************
*
* Copyright (c) 2005 - 2023 by Vivante Corp. All rights reserved.
*
* The material in this file is confidential and contains trade secrets
* of Vivante Corporation. This is proprietary information owned by
* Vivante Corporation. No part of this work may be disclosed,
* reproduced, copied, transmitted, or used in any way for any purpose,
* without the express written permission of Vivante Corporation.
*
*****************************************************************************/
#ifndef __gc_hal_profiler_shared_h_
#define __gc_hal_profiler_shared_h_
#ifdef __cplusplus
extern "C" {
#endif
#define ANDROID_PROFILER_COUNTERS 1
#define FPGA_INFO 0
#define RECORD_COUNTER_ADDRESS 0
/* HW profile information. */
typedef struct _gcsPROFILER_COUNTERS_PART1 {
gctUINT32 gpuTotalRead64BytesPerFrame;
gctUINT32 gpuTotalWrite64BytesPerFrame;
/* FE */
gctUINT32 fe_draw_count;
gctUINT32 fe_out_vertex_count;
gctUINT32 fe_cache_miss_count;
gctUINT32 fe_cache_lk_count;
gctUINT32 fe_stall_count;
gctUINT32 fe_starve_count;
gctUINT32 fe_process_count;
/* PE */
gctUINT32 pe0_pixel_count_killed_by_color_pipe;
gctUINT32 pe0_pixel_count_killed_by_depth_pipe;
gctUINT32 pe0_pixel_count_drawn_by_color_pipe;
gctUINT32 pe0_pixel_count_drawn_by_depth_pipe;
gctUINT32 pe1_pixel_count_killed_by_color_pipe;
gctUINT32 pe1_pixel_count_killed_by_depth_pipe;
gctUINT32 pe1_pixel_count_drawn_by_color_pipe;
gctUINT32 pe1_pixel_count_drawn_by_depth_pipe;
/* SH */
gctUINT32 shader_cycle_count;
gctUINT32 vs_shader_cycle_count;
gctUINT32 ps_shader_cycle_count;
gctUINT32 ps_inst_counter;
gctUINT32 ps_rendered_pixel_counter;
gctUINT32 vs_inst_counter;
gctUINT32 vs_rendered_vertice_counter;
gctUINT32 vs_branch_inst_counter;
gctUINT32 vs_texld_inst_counter;
gctUINT32 ps_branch_inst_counter;
gctUINT32 ps_texld_inst_counter;
gctUINT32 vs_non_idle_starve_count;
gctUINT32 vs_starve_count;
gctUINT32 vs_stall_count;
gctUINT32 vs_process_count;
gctUINT32 ps_non_idle_starve_count;
gctUINT32 ps_starve_count;
gctUINT32 ps_stall_count;
gctUINT32 ps_process_count;
/* PA */
gctUINT32 pa_input_vtx_counter;
gctUINT32 pa_input_prim_counter;
gctUINT32 pa_output_prim_counter;
gctUINT32 pa_depth_clipped_counter;
gctUINT32 pa_trivial_rejected_counter;
gctUINT32 pa_culled_prim_counter;
gctUINT32 pa_droped_prim_counter;
gctUINT32 pa_frustum_clipped_prim_counter;
gctUINT32 pa_frustum_clipdroped_prim_counter;
gctUINT32 pa_non_idle_starve_count;
gctUINT32 pa_starve_count;
gctUINT32 pa_stall_count;
gctUINT32 pa_process_count;
/* SE */
gctUINT32 se_culled_triangle_count;
gctUINT32 se_culled_lines_count;
gctUINT32 se_clipped_triangle_count;
gctUINT32 se_clipped_line_count;
gctUINT32 se_starve_count;
gctUINT32 se_stall_count;
gctUINT32 se_receive_triangle_count;
gctUINT32 se_send_triangle_count;
gctUINT32 se_receive_lines_count;
gctUINT32 se_send_lines_count;
gctUINT32 se_process_count;
gctUINT32 se_trivial_rejected_line_count;
gctUINT32 se_non_idle_starve_count;
/* RA */
gctUINT32 ra_input_prim_count;
gctUINT32 ra_total_quad_count;
gctUINT32 ra_valid_quad_count_after_early_z;
gctUINT32 ra_valid_pixel_count_to_render;
gctUINT32 ra_output_valid_quad_count;
gctUINT32 ra_output_valid_pixel_count;
gctUINT32 ra_pipe_cache_miss_counter;
gctUINT32 ra_pipe_hz_cache_miss_counter;
gctUINT32 ra_prefetch_cache_miss_counter;
gctUINT32 ra_prefetch_hz_cache_miss_counter;
gctUINT32 ra_eez_culled_counter;
gctUINT32 ra_non_idle_starve_count;
gctUINT32 ra_starve_count;
gctUINT32 ra_stall_count;
gctUINT32 ra_process_count;
/* TX */
gctUINT32 tx_total_bilinear_requests;
gctUINT32 tx_total_trilinear_requests;
gctUINT32 tx_total_discarded_texture_requests;
gctUINT32 tx_total_texture_requests;
gctUINT32 tx_mc0_miss_count;
gctUINT32 tx_mc0_request_byte_count;
gctUINT32 tx_mc1_miss_count;
gctUINT32 tx_mc1_request_byte_count;
gctUINT32 tx_non_idle_starve_count;
gctUINT32 tx_starve_count;
gctUINT32 tx_stall_count;
gctUINT32 tx_process_count;
} gcsPROFILER_COUNTERS_PART1;
typedef struct _gcsPROFILER_COUNTERS_PART2 {
/* MCC */
gctUINT32 mcc_total_read_req_8B_from_colorpipe;
gctUINT32 mcc_total_read_req_8B_sentout_from_colorpipe;
gctUINT32 mcc_total_write_req_8B_from_colorpipe;
gctUINT32 mcc_total_read_req_sentout_from_colorpipe;
gctUINT32 mcc_total_write_req_from_colorpipe;
gctUINT32 mcc_total_read_req_8B_from_depthpipe;
gctUINT32 mcc_total_read_req_8B_sentout_from_depthpipe;
gctUINT32 mcc_total_write_req_8B_from_depthpipe;
gctUINT32 mcc_total_read_req_sentout_from_depthpipe;
gctUINT32 mcc_total_write_req_from_depthpipe;
gctUINT32 mcc_total_read_req_8B_from_others;
gctUINT32 mcc_total_write_req_8B_from_others;
gctUINT32 mcc_total_read_req_from_others;
gctUINT32 mcc_total_write_req_from_others;
gctUINT32 mcc_axi_total_latency;
gctUINT32 mcc_axi_sample_count;
gctUINT32 mcc_axi_max_latency;
gctUINT32 mcc_axi_min_latency;
gctUINT32 mc_fe_read_bandwidth;
gctUINT32 mc_mmu_read_bandwidth;
gctUINT32 mc_blt_read_bandwidth;
gctUINT32 mc_sh0_read_bandwidth;
gctUINT32 mc_sh1_read_bandwidth;
gctUINT32 mc_pe_write_bandwidth;
gctUINT32 mc_blt_write_bandwidth;
gctUINT32 mc_sh0_write_bandwidth;
gctUINT32 mc_sh1_write_bandwidth;
/* MCZ */
gctUINT32 mcz_total_read_req_8B_from_colorpipe;
gctUINT32 mcz_total_read_req_8B_sentout_from_colorpipe;
gctUINT32 mcz_total_write_req_8B_from_colorpipe;
gctUINT32 mcz_total_read_req_sentout_from_colorpipe;
gctUINT32 mcz_total_write_req_from_colorpipe;
gctUINT32 mcz_total_read_req_8B_from_depthpipe;
gctUINT32 mcz_total_read_req_8B_sentout_from_depthpipe;
gctUINT32 mcz_total_write_req_8B_from_depthpipe;
gctUINT32 mcz_total_read_req_sentout_from_depthpipe;
gctUINT32 mcz_total_write_req_from_depthpipe;
gctUINT32 mcz_total_read_req_8B_from_others;
gctUINT32 mcz_total_write_req_8B_from_others;
gctUINT32 mcz_total_read_req_from_others;
gctUINT32 mcz_total_write_req_from_others;
gctUINT32 mcz_axi_total_latency;
gctUINT32 mcz_axi_sample_count;
gctUINT32 mcz_axi_max_latency;
gctUINT32 mcz_axi_min_latency;
/* HI */
gctUINT32 hi0_total_read_8B_count;
gctUINT32 hi0_total_write_8B_count;
gctUINT32 hi0_total_read_request_count;
gctUINT32 hi0_total_write_request_count;
gctUINT32 hi0_axi_cycles_read_request_stalled;
gctUINT32 hi0_axi_cycles_write_request_stalled;
gctUINT32 hi0_axi_cycles_write_data_stalled;
gctUINT32 hi1_total_read_8B_count;
gctUINT32 hi1_total_write_8B_count;
gctUINT32 hi1_total_read_request_count;
gctUINT32 hi1_total_write_request_count;
gctUINT32 hi1_axi_cycles_read_request_stalled;
gctUINT32 hi1_axi_cycles_write_request_stalled;
gctUINT32 hi1_axi_cycles_write_data_stalled;
gctUINT32 hi_total_cycle_count;
gctUINT32 hi_total_idle_cycle_count;
gctUINT32 hi_total_read_8B_count;
gctUINT32 hi_total_write_8B_count;
gctUINT32 hi_total_readOCB_16B_count;
gctUINT32 hi_total_writeOCB_16B_count;
/* L2 */
gctUINT32 l2_total_axi0_read_request_count;
gctUINT32 l2_total_axi1_read_request_count;
gctUINT32 l2_total_axi0_write_request_count;
gctUINT32 l2_total_axi1_write_request_count;
gctUINT32 l2_total_read_transactions_request_by_axi0;
gctUINT32 l2_total_read_transactions_request_by_axi1;
gctUINT32 l2_total_write_transactions_request_by_axi0;
gctUINT32 l2_total_write_transactions_request_by_axi1;
gctUINT32 l2_axi0_minmax_latency;
gctUINT32 l2_axi0_min_latency;
gctUINT32 l2_axi0_max_latency;
gctUINT32 l2_axi0_total_latency;
gctUINT32 l2_axi0_total_request_count;
gctUINT32 l2_axi1_minmax_latency;
gctUINT32 l2_axi1_min_latency;
gctUINT32 l2_axi1_max_latency;
gctUINT32 l2_axi1_total_latency;
gctUINT32 l2_axi1_total_request_count;
} gcsPROFILER_COUNTERS_PART2;
typedef struct _gcsPROFILER_COUNTERS {
gcsPROFILER_COUNTERS_PART1 counters_part1;
gcsPROFILER_COUNTERS_PART2 counters_part2;
} gcsPROFILER_COUNTERS;
typedef enum _gceVIP_PROBE_COUNTER {
gcvVIP_PROBE_COUNTER_NEURAL_NET,
gcvVIP_PROBE_COUNTER_TENSOR_PROCESSOR,
gcvVIP_PROBE_COUNTER_COUNT
} gceVIP_PROBE_COUNTER;
/* Mask definations for overflow indicator of TP */
typedef enum _gceTPCOUNTER_OVERFLOW {
gcvTPCOUNTER_LAYER_ID_OVERFLOW = (1 << 0),
gcvTPCOUNTER_TOTAL_BUSY_CYCLE_OVERFLOW = (1 << 1),
gcvTPCOUNTER_TOTAL_READ_BW_DDR_OVERFLOW = (1 << 2),
gcvTPCOUNTER_TOTAL_WRITE_BW_DDR_OVERFLOW = (1 << 3),
gcvTPCOUNTER_TOTAL_READ_BW_SRAM_OVERFLOW = (1 << 4),
gcvTPCOUNTER_TOTAL_WRITE_BW_SRAM_OVERFLOW = (1 << 5),
gcvTPCOUNTER_TOTAL_READ_BW_OCB_OVERFLOW = (1 << 6),
gcvTPCOUNTER_TOTAL_WRITE_BW_OCB_OVERFLOW = (1 << 7),
gcvTPCOUNTER_FC_PIX_CNT_OVERFLOW = (1 << 8),
gcvTPCOUNTER_FC_ZERO_SKIP_OVERFLOW = (1 << 9),
gcvTPCOUNTER_FC_COEF_CNT_OVERFLOW = (1 << 10),
gcvTPCOUNTER_FC_COEF_ZERO_CNT_OVERFLOW = (1 << 11),
gcvTPCOUNTER_TOTAL_IDLE_CYCLE_CORE0_OVERFLOW = (1 << 0),
gcvTPCOUNTER_TOTAL_IDLE_CYCLE_CORE1_OVERFLOW = (1 << 1),
gcvTPCOUNTER_TOTAL_IDLE_CYCLE_CORE2_OVERFLOW = (1 << 2),
gcvTPCOUNTER_TOTAL_IDLE_CYCLE_CORE3_OVERFLOW = (1 << 3),
} _gceTPCOUNTER_OVERFLOW;
/* Mask definations for overflow indicator of NN */
typedef enum _gceNNCOUNTER_OVERFLOW {
gcvNNCOUNTER_TOTAL_BUSY_CYCLE_OVERFLOW = (1 << 0),
gcvNNCOUNTER_TOTAL_READ_CYCLE_DDR_OVERFLOW = (1 << 2),
gcvNNCOUNTER_TOTAL_READ_BW_DDR_OVERFLOW = (1 << 3),
gcvNNCOUNTER_TOTAL_WRITE_CYCLE_DDR_OVERFLOW = (1 << 4),
gcvNNCOUNTER_TOTAL_WRITE_BW_DDR_OVERFLOW = (1 << 5),
gcvNNCOUNTER_TOTAL_READ_SYCLE_SRAM_OVERFLOW = (1 << 6),
gcvNNCOUNTER_TOTAL_WRITE_CYCLE_SRAM_OVERFLOW = (1 << 7),
gcvNNCOUNTER_TOTAL_MAC_CYCLE_OVERFLOW = (1 << 8),
gcvNNCOUNTER_TOTAL_MAC_COUNT_OVERFLOW = (1 << 9),
gcvNNCOUNTER_ZERO_COEF_SKIP_COUNT_OVERFLOW = (1 << 10),
gcvNNCOUNTER_NON_ZERO_COEF_COUNT_OVERFLOW = (1 << 11),
} _gceNNCOUNTER_OVERFLOW;
#define MODULE_NN_RESERVED_COUNTER_NUM 0x9
typedef struct _gcsPROFILER_VIP_PROBE_COUNTERS {
/* NN */
gctUINT32 nn_layer_id;
gctUINT32 nn_layer_id_overflow;
gctUINT32 nn_instr_info;
gctUINT32 nn_total_busy_cycle;
gctUINT32 nn_total_busy_cycle_overflow;
gctUINT32 nn_total_read_cycle_ddr;
gctUINT32 nn_total_read_cycle_ddr_overflow;
gctUINT32 nn_total_read_valid_bandwidth_ddr;
gctUINT32 nn_total_read_valid_bandwidth_ddr_overflow;
gctUINT32 nn_total_write_cycle_ddr;
gctUINT32 nn_total_write_cycle_ddr_overflow;
gctUINT32 nn_total_write_valid_bandwidth_ddr;
gctUINT32 nn_total_write_valid_bandwidth_ddr_overflow;
gctUINT32 nn_total_read_cycle_sram;
gctUINT32 nn_total_read_cycle_sram_overflow;
gctUINT32 nn_total_write_cycle_sram;
gctUINT32 nn_total_write_cycle_sram_overflow;
gctUINT32 nn_total_mac_cycle;
gctUINT32 nn_total_mac_cycle_overflow;
gctUINT32 nn_total_mac_count;
gctUINT32 nn_total_mac_count_overflow;
gctUINT32 nn_zero_coef_skip_count;
gctUINT32 nn_zero_coef_skip_count_overflow;
gctUINT32 nn_non_zero_coef_count;
gctUINT32 nn_non_zero_coef_count_overflow;
gctUINT32 nn_reserved_counter[4 * MODULE_NN_RESERVED_COUNTER_NUM];
gctUINT32 nn_total_idle_cycle_core_overflow[4];
gctUINT32 nn_total_idle_cycle_core[32];
/* TP */
gctUINT32 tp_layer_id;
gctUINT32 tp_layer_id_overflow;
gctUINT32 tp_total_busy_cycle;
gctUINT32 tp_total_busy_cycle_overflow;
gctUINT32 tp_total_read_bandwidth_cache;
gctUINT32 tp_total_read_bandwidth_cache_overflow;
gctUINT32 tp_total_write_bandwidth_cache;
gctUINT32 tp_total_write_bandwidth_cache_overflow;
gctUINT32 tp_total_read_bandwidth_sram;
gctUINT32 tp_total_read_bandwidth_sram_overflow;
gctUINT32 tp_total_write_bandwidth_sram;
gctUINT32 tp_total_write_bandwidth_sram_overflow;
gctUINT32 tp_total_read_bandwidth_ocb;
gctUINT32 tp_total_read_bandwidth_ocb_overflow;
gctUINT32 tp_total_write_bandwidth_ocb;
gctUINT32 tp_total_write_bandwidth_ocb_overflow;
gctUINT32 tp_fc_pix_count;
gctUINT32 tp_fc_zero_skip_count;
gctUINT32 tp_fc_pix_count_overflow;
gctUINT32 tp_fc_zero_skip_count_overflow;
gctUINT32 tp_fc_coef_count;
gctUINT32 tp_fc_coef_zero_count;
gctUINT32 tp_fc_coef_count_overflow;
gctUINT32 tp_fc_coef_zero_count_overflow;
gctUINT32 tp_total_idle_cycle_core[16];
gctUINT32 tp_total_idle_cycle_core_overflows[16];
} gcsPROFILER_VIP_PROBE_COUNTERS;
#ifdef __cplusplus
}
#endif
#endif /* __gc_hal_profiler_shared_h_ */