Now that gallium hud properly handle floating point values. And it also helps AMD_performance_monitor to return return correct values for non integer types.
Reviewed-by: Samuel Pitoiset <[email protected]> Signed-off-by: Boyan Ding <[email protected]> --- .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c | 70 +++++++++++++--------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c index 089af61820..6d4deaf2ba 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c @@ -498,53 +498,59 @@ nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) static uint64_t sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) { + union pipe_query_result result; + + result.u64 = 0; switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */ if (res64[1]) - return ((res64[0] / (double)res64[1]) / 48) * 100; + result.f = ((res64[0] / (double)res64[1]) / 48) * 100; break; case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY: /* (branch / (branch + divergent_branch)) * 100 */ if (res64[0] + res64[1]) - return (res64[0] / (double)(res64[0] + res64[1])) * 100; + result.f = (res64[0] / (double)(res64[0] + res64[1])) * 100; break; case NVC0_HW_METRIC_QUERY_INST_PER_WRAP: /* inst_executed / warps_launched */ if (res64[1]) - return res64[0] / (double)res64[1]; + result.u64 = res64[0] / (double)res64[1]; break; case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD: /* (inst_issued - inst_executed) / inst_executed */ if (res64[1]) - return (res64[0] - res64[1]) / (double)res64[1]; + result.u64 = (res64[0] - res64[1]) / (double)res64[1]; break; case NVC0_HW_METRIC_QUERY_ISSUED_IPC: /* inst_issued / active_cycles */ if (res64[1]) - return res64[0] / (double)res64[1]; + result.u64 = res64[0] / (double)res64[1]; break; case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION: /* ((inst_issued / 2) / active_cycles) * 100 */ if (res64[1]) - return ((res64[0] / 2) / (double)res64[1]) * 100; + result.f = ((res64[0] / 2) / (double)res64[1]) * 100; break; case NVC0_HW_METRIC_QUERY_IPC: /* inst_executed / active_cycles */ if (res64[1]) - return res64[0] / (double)res64[1]; + result.u64 = res64[0] / (double)res64[1]; break; default: debug_printf("invalid metric type: %d\n", hq->base.type - NVC0_HW_METRIC_QUERY(0)); break; } - return 0; + return result.u64; } static uint64_t sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) { + union pipe_query_result result; + + result.u64 = 0; switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: return sm20_hw_metric_calc_result(hq, res64); @@ -552,31 +558,31 @@ sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) return sm20_hw_metric_calc_result(hq, res64); case NVC0_HW_METRIC_QUERY_INST_ISSUED: /* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */ - return res64[0] + res64[1] + (res64[2] + res64[3]) * 2; + result.u64 = res64[0] + res64[1] + (res64[2] + res64[3]) * 2; break; case NVC0_HW_METRIC_QUERY_INST_PER_WRAP: return sm20_hw_metric_calc_result(hq, res64); case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD: /* (metric-inst_issued - inst_executed) / inst_executed */ if (res64[4]) - return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) - - res64[4]) / (double)res64[4]); + result.u64 = (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) - + res64[4]) / (double)res64[4]); break; case NVC0_HW_METRIC_QUERY_ISSUED_IPC: /* metric-inst_issued / active_cycles */ if (res64[4]) - return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) / - (double)res64[4]; + result.u64 = (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) / + (double)res64[4]; break; case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS: /* issued1_0 + issued1_1 + issued2_0 + issued2_1 */ - return res64[0] + res64[1] + res64[2] + res64[3]; + result.u64 = res64[0] + res64[1] + res64[2] + res64[3]; break; case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION: /* ((metric-issue_slots / 2) / active_cycles) * 100 */ if (res64[4]) - return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) / - (double)res64[4]) * 100; + result.f = (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) / + (double)res64[4]) * 100; break; case NVC0_HW_METRIC_QUERY_IPC: return sm20_hw_metric_calc_result(hq, res64); @@ -585,78 +591,86 @@ sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) hq->base.type - NVC0_HW_METRIC_QUERY(0)); break; } - return 0; + return result.u64; } static uint64_t sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) { + union pipe_query_result result; + + result.u64 = 0; switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: /* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */ if (res64[1]) - return ((res64[0] / (double)res64[1]) / 64) * 100; + result.f = ((res64[0] / (double)res64[1]) / 64) * 100; break; case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY: return sm20_hw_metric_calc_result(hq, res64); case NVC0_HW_METRIC_QUERY_INST_ISSUED: /* inst_issued1 + inst_issued2 * 2 */ - return res64[0] + res64[1] * 2; + result.u64 = res64[0] + res64[1] * 2; + break; case NVC0_HW_METRIC_QUERY_INST_PER_WRAP: return sm20_hw_metric_calc_result(hq, res64); case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD: /* (metric-inst_issued - inst_executed) / inst_executed */ if (res64[2]) - return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]); + result.u64 = (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]); break; case NVC0_HW_METRIC_QUERY_ISSUED_IPC: /* metric-inst_issued / active_cycles */ if (res64[2]) - return (res64[0] + res64[1] * 2) / (double)res64[2]; + result.u64 = (res64[0] + res64[1] * 2) / (double)res64[2]; break; case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS: /* inst_issued1 + inst_issued2 */ - return res64[0] + res64[1]; + result.u64 = res64[0] + res64[1]; + break; case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION: /* ((metric-issue_slots / 2) / active_cycles) * 100 */ if (res64[2]) - return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100; + result.f = (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100; break; case NVC0_HW_METRIC_QUERY_IPC: return sm20_hw_metric_calc_result(hq, res64); case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD: /* (shared_load_replay + shared_store_replay) / inst_executed */ if (res64[2]) - return (res64[0] + res64[1]) / (double)res64[2]; + result.u64 = (res64[0] + res64[1]) / (double)res64[2]; break; case NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY: /* thread_inst_executed / (inst_executed * max. number of threads per * wrap) * 100 */ if (res64[0]) - return (res64[1] / ((double)res64[0] * 32)) * 100; + result.f = (res64[1] / ((double)res64[0] * 32)) * 100; break; default: debug_printf("invalid metric type: %d\n", hq->base.type - NVC0_HW_METRIC_QUERY(0)); break; } - return 0; + return result.u64; } static uint64_t sm35_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) { + union pipe_query_result result; + + result.u64 = 0; switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { case NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY: /* not_predicated_off_thread_inst_executed / (inst_executed * max. number * of threads per wrap) * 100 */ if (res64[0]) - return (res64[1] / ((double)res64[0] * 32)) * 100; + result.f = (res64[1] / ((double)res64[0] * 32)) * 100; break; default: return sm30_hw_metric_calc_result(hq, res64); } - return 0; + return result.u64; } static boolean -- 2.13.1 _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
