On Tue, Mar 26, 2013 at 10:06:39PM -0700, Kenneth Graunke wrote:
> While the Sandybridge PRM doesn't have any documentation on the GPU's
> performance counters, a lot of information can be gleaned from the older
> Ironlake PRM.  Oddly, none of the information documented there actually
> appears to apply to Ironlake.  However, it apparently works just great
> on Sandybridge.
> 
> Since this information has all been publicly available on the internet
> for around three years, we can use it.
> 
> Signed-off-by: Kenneth Graunke <[email protected]>

Merged, thanks for the patches.
-Daniel

> ---
>  tools/intel_perf_counters.c | 146 
> ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 146 insertions(+)
> 
> diff --git a/tools/intel_perf_counters.c b/tools/intel_perf_counters.c
> index fd268b1..b528361 100644
> --- a/tools/intel_perf_counters.c
> +++ b/tools/intel_perf_counters.c
> @@ -22,9 +22,21 @@
>   *
>   * Authors:
>   *    Eric Anholt <[email protected]>
> + *    Kenneth Graunke <[email protected]>
> + *
> + * While documentation for performance counters is suspiciously missing from 
> the
> + * Sandybridge PRM, they were documented in Volume 1 Part 3 of the Ironlake 
> PRM.
> + *
> + * A lot of the Ironlake PRM actually unintentionally documents Sandybridge
> + * due to mistakes made when updating the documentation for Gen6+.  Many of
> + * these mislabeled sections carried forward to the public documentation.
> + *
> + * The Ironlake PRMs have been publicly available since 2010 and are online 
> at:
> + * 
> https://01.org/linuxgraphics/documentation/2010-intel-core-processor-family
>   */
>  
>  #include <unistd.h>
> +#include <stdbool.h>
>  #include <stdlib.h>
>  #include <stdio.h>
>  #include <err.h>
> @@ -71,6 +83,60 @@ const char *gen5_counter_names[GEN5_COUNTER_COUNT] = {
>       "cycles any EU is stalled for math",
>  };
>  
> +#define GEN6_COUNTER_COUNT 29
> +
> +/**
> + * Sandybridge: Counter Select = 001
> + * A0   A1   A2   A3   A4   TIMESTAMP RPT_ID
> + * A5   A6   A7   A8   A9   A10  A11  A12
> + * A13  A14  A15  A16  A17  A18  A19  A20
> + * A21  A22  A23  A24  A25  A26  A27  A28
> + */
> +const int gen6_counter_format = 1;
> +
> +/**
> + * Names for aggregating counters A0-A28.
> + *
> + * While the Ironlake PRM clearly documents that there are 29 counters 
> (A0-A28),
> + * it only lists the names for 28 of them; one is missing.  However, careful
> + * examination reveals a pattern: there are five GS counters (Active, Stall,
> + * Core Stall, # threads loaded, and ready but not running time).  There are
> + * also five PS counters, in the same order.  But there are only four VS
> + * counters listed - the number of VS threads loaded is missing.  Presumably,
> + * it exists and is counter 5, and the rest are shifted over one place.
> + */
> +const char *gen6_counter_names[GEN6_COUNTER_COUNT] = {
> +     [0]  = "Aggregated Core Array Active",
> +     [1]  = "Aggregated Core Array Stalled",
> +     [2]  = "Vertex Shader Active Time",
> +     [3]  = "Vertex Shader Stall Time",
> +     [4]  = "Vertex Shader Stall Time - Core Stall",
> +     [5]  = "# VS threads loaded",
> +     [6]  = "Vertex Shader Ready but not running time",
> +     [7]  = "Geometry Shader Active Time",
> +     [8]  = "Geometry Shader Stall Time",
> +     [9]  = "Geometry Shader Stall Time - Core Stall",
> +     [10] = "# GS threads loaded",
> +     [11] = "Geometry Shader ready but not running Time",
> +     [12] = "Pixel Shader Active Time",
> +     [13] = "Pixel Shader Stall Time",
> +     [14] = "Pixel Shader Stall Time - Core Stall",
> +     [15] = "# PS threads loaded",
> +     [16] = "Pixel Shader ready but not running Time",
> +     [17] = "Early Z Test Pixels Passing",
> +     [18] = "Early Z Test Pixels Failing",
> +     [19] = "Early Stencil Test Pixels Passing",
> +     [20] = "Early Stencil Test Pixels Failing",
> +     [21] = "Pixel Kill Count",
> +     [22] = "Alpha Test Pixels Failed",
> +     [23] = "Post PS Stencil Pixels Failed",
> +     [24] = "Post PS Z buffer Pixels Failed",
> +     [25] = "Pixels/samples Written in the frame buffer",
> +     [26] = "GPU Busy",
> +     [27] = "CL active and not stalled",
> +     [28] = "SF active and stalled",
> +};
> +
>  int have_totals = 0;
>  uint32_t *totals;
>  uint32_t *last_counter;
> @@ -85,6 +151,20 @@ struct intel_batchbuffer *batch;
>  #define MI_COUNTER_ADDRESS_GTT       (1 << 0)
>  /* DW2: report ID */
>  
> +/**
> + * According to the Sandybridge PRM, Volume 1, Part 1, page 48,
> + * MI_REPORT_PERF_COUNT is now opcode 0x28.  The Ironlake PRM, Volume 1,
> + * Part 3 details how it works.
> + */
> +/* DW0 */
> +#define GEN6_MI_REPORT_PERF_COUNT (0x28 << 23)
> +/* DW1 and 2 are the same as above */
> +
> +/* OACONTROL exists on Gen6+ but is documented in the Ironlake PRM */
> +#define OACONTROL                       0x2360
> +# define OACONTROL_COUNTER_SELECT_SHIFT 2
> +# define PERFORMANCE_COUNTER_ENABLE     (1 << 0)
> +
>  static void
>  gen5_get_counters(void)
>  {
> @@ -124,6 +204,45 @@ gen5_get_counters(void)
>       drm_intel_bo_unreference(stats_bo);
>  }
>  
> +static void
> +gen6_get_counters(void)
> +{
> +     int i;
> +     drm_intel_bo *stats_bo;
> +     uint32_t *stats_result;
> +
> +     /* Map from counter names to their index in the buffer object */
> +     static const int buffer_index[GEN6_COUNTER_COUNT] =
> +     {
> +             7,   6,  5,  4,  3,
> +             15, 14, 13, 12, 11, 10,  9,  8,
> +             23, 22, 21, 20, 19, 18, 17, 16,
> +             31, 30, 29, 28, 27, 26, 25, 24,
> +     };
> +
> +     stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096);
> +
> +     BEGIN_BATCH(3);
> +     OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2));
> +     OUT_RELOC(stats_bo,
> +               I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
> +               MI_COUNTER_ADDRESS_GTT);
> +     OUT_BATCH(0);
> +     ADVANCE_BATCH();
> +
> +     intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER);
> +
> +     drm_intel_bo_map(stats_bo, 0);
> +     stats_result = stats_bo->virtual;
> +     for (i = 0; i < GEN6_COUNTER_COUNT; i++) {
> +             totals[i] += stats_result[buffer_index[i]] - last_counter[i];
> +             last_counter[i] = stats_result[buffer_index[i]];
> +     }
> +
> +     drm_intel_bo_unmap(stats_bo);
> +     drm_intel_bo_unreference(stats_bo);
> +}
> +
>  #define STATS_CHECK_FREQUENCY        100
>  #define STATS_REPORT_FREQUENCY       2
>  
> @@ -131,6 +250,7 @@ int
>  main(int argc, char **argv)
>  {
>       uint32_t devid;
> +     int counter_format;
>       int counter_count;
>       const char **counter_name;
>       void (*get_counters)(void);
> @@ -138,6 +258,7 @@ main(int argc, char **argv)
>       char clear_screen[] = {0x1b, '[', 'H',
>                              0x1b, '[', 'J',
>                              0x0};
> +     bool oacontrol = true;
>       int fd;
>       int l;
>  
> @@ -152,10 +273,27 @@ main(int argc, char **argv)
>               counter_name = gen5_counter_names;
>               counter_count = GEN5_COUNTER_COUNT;
>               get_counters = gen5_get_counters;
> +             oacontrol = false;
> +     } else if (IS_GEN6(devid)) {
> +             counter_name = gen6_counter_names;
> +             counter_count = GEN6_COUNTER_COUNT;
> +             counter_format = gen6_counter_format;
> +             get_counters = gen6_get_counters;
>       } else {
>               printf("This tool is not yet supported on your platform.\n");
>               abort();
>       }
> +
> +     if (oacontrol) {
> +             /* Forcewake */
> +             intel_register_access_init(intel_get_pci_device(), 0);
> +
> +             /* Enable performance counters */
> +             intel_register_write(OACONTROL,
> +                     counter_format << OACONTROL_COUNTER_SELECT_SHIFT |
> +                     PERFORMANCE_COUNTER_ENABLE);
> +     }
> +
>       totals = calloc(counter_count, sizeof(uint32_t));
>       last_counter = calloc(counter_count, sizeof(uint32_t));
>  
> @@ -180,6 +318,14 @@ main(int argc, char **argv)
>               }
>       }
>  
> +     if (oacontrol) {
> +             /* Disable performance counters */
> +             intel_register_write(OACONTROL, 0);
> +
> +             /* Forcewake */
> +             intel_register_access_fini();
> +     }
> +
>       free(totals);
>       free(last_counter);
>  
> -- 
> 1.8.2
> 
> _______________________________________________
> Intel-gfx mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to