Added events for tracking early/late Depth and stencil events,
TE patch info, GS prim info, and FrontEnd/BackEnd DrawEnd events.
---
 .../drivers/swr/rasterizer/archrast/archrast.cpp   | 279 ++++++++++++++++++++-
 .../drivers/swr/rasterizer/archrast/events.proto   | 255 +++++++++++++++++++
 .../drivers/swr/rasterizer/core/backend.cpp        |   7 +
 .../drivers/swr/rasterizer/core/frontend.cpp       |   3 +-
 .../drivers/swr/rasterizer/core/threads.cpp        |   1 +
 .../scripts/templates/ar_eventhandler_h.template   |   2 +-
 6 files changed, 541 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp 
b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
index 16b6d33..5bb1c7b 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -35,16 +35,289 @@
 namespace ArchRast
 {
     //////////////////////////////////////////////////////////////////////////
+    /// @brief struct that keeps track of depth and stencil event information
+    struct DepthStencilStats
+    {
+        uint32_t earlyZTestPassCount = 0;
+        uint32_t earlyZTestFailCount = 0;
+        uint32_t lateZTestPassCount = 0;
+        uint32_t lateZTestFailCount = 0;
+        uint32_t earlyStencilTestPassCount = 0;
+        uint32_t earlyStencilTestFailCount = 0;
+        uint32_t lateStencilTestPassCount = 0;
+        uint32_t lateStencilTestFailCount = 0;
+        uint32_t earlyZTestCount = 0;
+        uint32_t lateZTestCount = 0;
+        uint32_t earlyStencilTestCount = 0;
+        uint32_t lateStencilTestCount = 0;
+    };
+
+    struct CStats
+    {
+        uint32_t clippedVerts = 0;
+    };
+
+    struct TEStats
+    {
+        uint32_t inputPrims = 0;
+        //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If 
holds, its fine.
+    };
+
+    struct GSStats
+    {
+        uint32_t inputPrimCount;
+        uint32_t primGeneratedCount;
+        uint32_t vertsInput;
+    };
+
+    //////////////////////////////////////////////////////////////////////////
     /// @brief Event handler that saves stat events to event files. This
     ///        handler filters out unwanted events.
     class EventHandlerStatsFile : public EventHandlerFile
     {
     public:
+        DepthStencilStats DSSingleSample = {};
+        DepthStencilStats DSSampleRate = {};
+        DepthStencilStats DSPixelRate = {};
+        DepthStencilStats DSNullPS = {};
+        DepthStencilStats DSOmZ = {};
+        CStats CS = {};
+        TEStats TS = {};
+        GSStats GS = {};
+
         EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {}
 
         // These are events that we're not interested in saving in stats event 
files.
         virtual void Handle(Start& event) {}
         virtual void Handle(End& event) {}
+
+        virtual void Handle(EarlyDepthStencilInfoSingleSample& event)
+        {
+            //earlyZ test compute
+            DSSingleSample.earlyZTestPassCount += 
_mm_popcnt_u32(event.data.depthPassMask);
+            DSSingleSample.earlyZTestFailCount += 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            DSSingleSample.earlyZTestCount += 
(_mm_popcnt_u32(event.data.depthPassMask) + 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+            //earlyStencil test compute
+            DSSingleSample.earlyStencilTestPassCount += 
_mm_popcnt_u32(event.data.stencilPassMask);
+            DSSingleSample.earlyStencilTestFailCount += 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            DSSingleSample.earlyStencilTestCount += 
(_mm_popcnt_u32(event.data.stencilPassMask) + 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+            //outputerMerger test compute
+            DSOmZ.earlyZTestPassCount += DSSingleSample.earlyZTestPassCount;
+            DSOmZ.earlyZTestFailCount += DSSingleSample.earlyZTestFailCount;
+            DSOmZ.earlyZTestCount += DSSingleSample.earlyZTestCount;
+            DSOmZ.earlyStencilTestPassCount += 
DSSingleSample.earlyStencilTestPassCount;
+            DSOmZ.earlyStencilTestFailCount += 
DSSingleSample.earlyStencilTestFailCount;
+            DSOmZ.earlyStencilTestCount += 
DSSingleSample.earlyStencilTestCount;
+        }
+
+        virtual void Handle(EarlyDepthStencilInfoSampleRate& event)
+        {
+            //earlyZ test compute
+            DSSampleRate.earlyZTestPassCount += 
_mm_popcnt_u32(event.data.depthPassMask);
+            DSSampleRate.earlyZTestFailCount += 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            DSSampleRate.earlyZTestCount += 
(_mm_popcnt_u32(event.data.depthPassMask) + 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+            //earlyStencil test compute
+            DSSampleRate.earlyStencilTestPassCount += 
_mm_popcnt_u32(event.data.stencilPassMask);
+            DSSampleRate.earlyStencilTestFailCount += 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            DSSampleRate.earlyStencilTestCount += 
(_mm_popcnt_u32(event.data.stencilPassMask) + 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+            //outputerMerger test compute
+            DSOmZ.earlyZTestPassCount += DSSampleRate.earlyZTestPassCount;
+            DSOmZ.earlyZTestFailCount += DSSampleRate.earlyZTestFailCount;
+            DSOmZ.earlyZTestCount += DSSampleRate.earlyZTestCount;
+            DSOmZ.earlyStencilTestPassCount += 
DSSampleRate.earlyStencilTestPassCount;
+            DSOmZ.earlyStencilTestFailCount += 
DSSampleRate.earlyStencilTestFailCount;
+            DSOmZ.earlyStencilTestCount += DSSampleRate.earlyStencilTestCount;
+        }
+
+        virtual void Handle(EarlyDepthStencilInfoNullPS& event)
+        {
+            //earlyZ test compute
+            DSNullPS.earlyZTestPassCount += 
_mm_popcnt_u32(event.data.depthPassMask);
+            DSNullPS.earlyZTestFailCount += 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            DSNullPS.earlyZTestCount += 
(_mm_popcnt_u32(event.data.depthPassMask) + 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+            //earlyStencil test compute
+            DSNullPS.earlyStencilTestPassCount += 
_mm_popcnt_u32(event.data.stencilPassMask);
+            DSNullPS.earlyStencilTestFailCount += 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            DSNullPS.earlyStencilTestCount += 
(_mm_popcnt_u32(event.data.stencilPassMask) + 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+            //outputerMerger test compute
+            DSOmZ.earlyZTestPassCount += DSNullPS.earlyZTestPassCount;
+            DSOmZ.earlyZTestFailCount += DSNullPS.earlyZTestFailCount;
+            DSOmZ.earlyZTestCount += DSNullPS.earlyZTestCount;
+            DSOmZ.earlyStencilTestPassCount += 
DSNullPS.earlyStencilTestPassCount;
+            DSOmZ.earlyStencilTestFailCount += 
DSNullPS.earlyStencilTestFailCount;
+            DSOmZ.earlyStencilTestCount += DSNullPS.earlyStencilTestCount;
+        }
+
+        virtual void Handle(LateDepthStencilInfoSingleSample& event)
+        {
+            //lateZ test compute
+            DSSingleSample.lateZTestPassCount += 
_mm_popcnt_u32(event.data.depthPassMask);
+            DSSingleSample.lateZTestFailCount += 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            DSSingleSample.lateZTestCount += 
(_mm_popcnt_u32(event.data.depthPassMask) + 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+            //lateStencil test compute
+            DSSingleSample.lateStencilTestPassCount += 
_mm_popcnt_u32(event.data.stencilPassMask);
+            DSSingleSample.lateStencilTestFailCount += 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            DSSingleSample.lateStencilTestCount += 
(_mm_popcnt_u32(event.data.stencilPassMask) + 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+            //outputerMerger test compute
+            DSOmZ.lateZTestPassCount += DSSingleSample.lateZTestPassCount;
+            DSOmZ.lateZTestFailCount += DSSingleSample.lateZTestFailCount;
+            DSOmZ.lateZTestCount += DSSingleSample.lateZTestCount;
+            DSOmZ.lateStencilTestPassCount += 
DSSingleSample.lateStencilTestPassCount;
+            DSOmZ.lateStencilTestFailCount += 
DSSingleSample.lateStencilTestFailCount;
+            DSOmZ.lateStencilTestCount += DSSingleSample.lateStencilTestCount;
+        }
+
+        virtual void Handle(LateDepthStencilInfoSampleRate& event)
+        {
+            //lateZ test compute
+            DSSampleRate.lateZTestPassCount += 
_mm_popcnt_u32(event.data.depthPassMask);
+            DSSampleRate.lateZTestFailCount += 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            DSSampleRate.lateZTestCount += 
(_mm_popcnt_u32(event.data.depthPassMask) + 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+            //lateStencil test compute
+            DSSampleRate.lateStencilTestPassCount += 
_mm_popcnt_u32(event.data.stencilPassMask);
+            DSSampleRate.lateStencilTestFailCount += 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            DSSampleRate.lateStencilTestCount += 
(_mm_popcnt_u32(event.data.stencilPassMask) + 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+            //outputerMerger test compute
+            DSOmZ.lateZTestPassCount += DSSampleRate.lateZTestPassCount;
+            DSOmZ.lateZTestFailCount += DSSampleRate.lateZTestFailCount;
+            DSOmZ.lateZTestCount += DSSampleRate.lateZTestCount;
+            DSOmZ.lateStencilTestPassCount += 
DSSampleRate.lateStencilTestPassCount;
+            DSOmZ.lateStencilTestFailCount += 
DSSampleRate.lateStencilTestFailCount;
+            DSOmZ.lateStencilTestCount += DSSampleRate.lateStencilTestCount;
+        }
+
+        virtual void Handle(LateDepthStencilInfoNullPS& event)
+        {
+            //lateZ test compute
+            DSNullPS.lateZTestPassCount += 
_mm_popcnt_u32(event.data.depthPassMask);
+            DSNullPS.lateZTestFailCount += 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            DSNullPS.lateZTestCount += 
(_mm_popcnt_u32(event.data.depthPassMask) + 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+            //lateStencil test compute
+            DSNullPS.lateStencilTestPassCount += 
_mm_popcnt_u32(event.data.stencilPassMask);
+            DSNullPS.lateStencilTestFailCount += 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            DSNullPS.lateStencilTestCount += 
(_mm_popcnt_u32(event.data.stencilPassMask) + 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+            //outputerMerger test compute
+            DSOmZ.lateZTestPassCount += DSNullPS.lateZTestPassCount;
+            DSOmZ.lateZTestFailCount += DSNullPS.lateZTestFailCount;
+            DSOmZ.lateZTestCount += DSNullPS.lateZTestCount;
+            DSOmZ.lateStencilTestPassCount += 
DSNullPS.lateStencilTestPassCount;
+            DSOmZ.lateStencilTestFailCount += 
DSNullPS.lateStencilTestFailCount;
+            DSOmZ.lateStencilTestCount += DSNullPS.lateStencilTestCount;
+        }
+
+        virtual void Handle(EarlyDepthInfoPixelRate& event)
+        {
+            //earlyZ test compute
+            DSPixelRate.earlyZTestCount += 
_mm_popcnt_u32(event.data.activeLanes);
+            DSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
+            DSPixelRate.earlyZTestFailCount += 
(_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+
+            //outputerMerger test compute
+            DSOmZ.earlyZTestPassCount += DSPixelRate.earlyZTestPassCount;
+            DSOmZ.earlyZTestFailCount += DSPixelRate.earlyZTestFailCount;
+            DSOmZ.earlyZTestCount += DSPixelRate.earlyZTestCount;
+        }
+
+
+        virtual void Handle(LateDepthInfoPixelRate& event)
+        {
+            //lateZ test compute
+            DSPixelRate.lateZTestCount += 
_mm_popcnt_u32(event.data.activeLanes);
+            DSPixelRate.lateZTestPassCount += event.data.depthPassCount;
+            DSPixelRate.lateZTestFailCount += 
(_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+
+            //outputerMerger test compute
+            DSOmZ.lateZTestPassCount += DSPixelRate.lateZTestPassCount;
+            DSOmZ.lateZTestFailCount += DSPixelRate.lateZTestFailCount;
+            DSOmZ.lateZTestCount += DSPixelRate.lateZTestCount;
+
+        }
+
+
+        virtual void Handle(BackendDrawEndEvent& event)
+        {
+            //singleSample
+            EventHandlerFile::Handle(EarlyZSingleSample(event.data.drawId, 
DSSingleSample.earlyZTestPassCount, DSSingleSample.earlyZTestFailCount, 
DSSingleSample.earlyZTestCount));
+            EventHandlerFile::Handle(LateZSingleSample(event.data.drawId, 
DSSingleSample.lateZTestPassCount, DSSingleSample.lateZTestFailCount, 
DSSingleSample.lateZTestCount));
+            
EventHandlerFile::Handle(EarlyStencilSingleSample(event.data.drawId, 
DSSingleSample.earlyStencilTestPassCount, 
DSSingleSample.earlyStencilTestFailCount, 
DSSingleSample.earlyStencilTestCount));
+            
EventHandlerFile::Handle(LateStencilSingleSample(event.data.drawId, 
DSSingleSample.lateStencilTestPassCount, 
DSSingleSample.lateStencilTestFailCount, DSSingleSample.lateStencilTestCount));
+
+            //sampleRate
+            EventHandlerFile::Handle(EarlyZSampleRate(event.data.drawId, 
DSSampleRate.earlyZTestPassCount, DSSampleRate.earlyZTestFailCount, 
DSSampleRate.earlyZTestCount));
+            EventHandlerFile::Handle(LateZSampleRate(event.data.drawId, 
DSSampleRate.lateZTestPassCount, DSSampleRate.lateZTestFailCount, 
DSSampleRate.lateZTestCount));
+            EventHandlerFile::Handle(EarlyStencilSampleRate(event.data.drawId, 
DSSampleRate.earlyStencilTestPassCount, DSSampleRate.earlyStencilTestFailCount, 
DSSampleRate.earlyStencilTestCount));
+            EventHandlerFile::Handle(LateStencilSampleRate(event.data.drawId, 
DSSampleRate.lateStencilTestPassCount, DSSampleRate.lateStencilTestFailCount, 
DSSampleRate.lateStencilTestCount));
+
+            //pixelRate
+            EventHandlerFile::Handle(EarlyZPixelRate(event.data.drawId, 
DSPixelRate.earlyZTestPassCount, DSPixelRate.earlyZTestFailCount, 
DSPixelRate.earlyZTestCount));
+            EventHandlerFile::Handle(LateZPixelRate(event.data.drawId, 
DSPixelRate.lateZTestPassCount, DSPixelRate.lateZTestFailCount, 
DSPixelRate.lateZTestCount));
+
+
+            //NullPS
+            EventHandlerFile::Handle(EarlyZNullPS(event.data.drawId, 
DSNullPS.earlyZTestPassCount, DSNullPS.earlyZTestFailCount, 
DSNullPS.earlyZTestCount));
+            EventHandlerFile::Handle(EarlyStencilNullPS(event.data.drawId, 
DSNullPS.earlyStencilTestPassCount, DSNullPS.earlyStencilTestFailCount, 
DSNullPS.earlyStencilTestCount));
+
+            //OmZ
+            EventHandlerFile::Handle(EarlyOmZ(event.data.drawId, 
DSOmZ.earlyZTestPassCount, DSOmZ.earlyZTestFailCount, DSOmZ.earlyZTestCount));
+            EventHandlerFile::Handle(EarlyOmStencil(event.data.drawId, 
DSOmZ.earlyStencilTestPassCount, DSOmZ.earlyStencilTestFailCount, 
DSOmZ.earlyStencilTestCount));
+            EventHandlerFile::Handle(LateOmZ(event.data.drawId, 
DSOmZ.lateZTestPassCount, DSOmZ.lateZTestFailCount, DSOmZ.lateZTestCount));
+            EventHandlerFile::Handle(LateOmStencil(event.data.drawId, 
DSOmZ.lateStencilTestPassCount, DSOmZ.lateStencilTestFailCount, 
DSOmZ.lateStencilTestCount));
+
+            //Reset Internal Counters
+            DSSingleSample = {};
+            DSSampleRate = {};
+            DSPixelRate = {};
+            DSNullPS = {};
+            DSOmZ = {};
+        }
+
+        virtual void Handle(FrontendDrawEndEvent& event)
+        {
+            //Clipper
+            EventHandlerFile::Handle(VertsClipped(event.data.drawId, 
CS.clippedVerts));
+
+            //Tesselator
+            EventHandlerFile::Handle(TessPrims(event.data.drawId, 
TS.inputPrims));
+
+            //Geometry Shader
+            EventHandlerFile::Handle(GSInputPrims(event.data.drawId, 
GS.inputPrimCount));
+            EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, 
GS.primGeneratedCount));
+            EventHandlerFile::Handle(GSVertsInput(event.data.drawId, 
GS.vertsInput));
+
+            //Reset Internal Counters
+            CS = {};
+            TS = {};
+            GS = {};
+        }
+
+        virtual void Handle(GSPrimInfo& event)
+        {
+            GS.inputPrimCount += event.data.inputPrimCount;
+            GS.primGeneratedCount += event.data.primGeneratedCount;
+            GS.vertsInput += event.data.vertsInput;
+        }
+
+        virtual void Handle(ClipVertexCount& event)
+        {
+            CS.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * 
event.data.vertsPerPrim);
+        }
+
+        virtual void Handle(TessPrimCount& event)
+        {
+            TS.inputPrims += event.data.primCount;
+        }
     };
 
     static EventManager* FromHandle(HANDLE hThreadContext)
@@ -68,13 +341,11 @@ namespace ArchRast
 
             if (type == AR_THREAD::API)
             {
-                ThreadStartApiEvent e;
-                pManager->Dispatch(e);
+                pHandler->Handle(ThreadStartApiEvent());
             }
             else
             {
-                ThreadStartWorkerEvent e;
-                pManager->Dispatch(e);
+                pHandler->Handle(ThreadStartWorkerEvent());
             }
             pHandler->MarkHeader();
 
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto 
b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
index 107d7a3..95cb79b 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
@@ -170,3 +170,258 @@ event BackendStatsEvent
     uint64_t CsInvocations;
 
 };
+
+event EarlyDepthStencilInfoSingleSample
+{
+       uint64_t depthPassMask;
+       uint64_t stencilPassMask;
+       uint64_t coverageMask;
+};
+
+event EarlyDepthStencilInfoSampleRate
+{
+       uint64_t depthPassMask;
+       uint64_t stencilPassMask;
+       uint64_t coverageMask;
+};
+
+event EarlyDepthStencilInfoNullPS
+{
+       uint64_t depthPassMask;
+       uint64_t stencilPassMask;
+       uint64_t coverageMask;
+};
+
+event LateDepthStencilInfoSingleSample
+{
+       uint64_t depthPassMask;
+       uint64_t stencilPassMask;
+       uint64_t coverageMask;
+};
+
+event LateDepthStencilInfoSampleRate
+{
+       uint64_t depthPassMask;
+       uint64_t stencilPassMask;
+       uint64_t coverageMask;
+};
+
+event LateDepthStencilInfoNullPS
+{
+       uint64_t depthPassMask;
+       uint64_t stencilPassMask;
+       uint64_t coverageMask;
+};
+
+event EarlyDepthInfoPixelRate
+{
+       uint64_t depthPassCount;
+       uint64_t activeLanes;
+};
+
+
+event LateDepthInfoPixelRate
+{
+       uint64_t depthPassCount;
+       uint64_t activeLanes;
+};
+
+
+event BackendDrawEndEvent
+{
+       uint32_t drawId;
+};
+
+event FrontendDrawEndEvent
+{
+       uint32_t drawId;
+};
+
+event EarlyZSingleSample
+{
+       uint32_t drawId;
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};     
+
+event LateZSingleSample
+{
+       uint32_t drawId;
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};
+
+event EarlyStencilSingleSample
+{
+       uint32_t drawId; 
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount; 
+};
+
+event LateStencilSingleSample
+{
+       uint32_t drawId; 
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};
+
+event EarlyZSampleRate
+{
+       uint32_t drawId;
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};     
+
+event LateZSampleRate
+{
+       uint32_t drawId;
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};
+
+event EarlyStencilSampleRate
+{
+       uint32_t drawId; 
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount; 
+};
+
+event LateStencilSampleRate
+{
+       uint32_t drawId; 
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};
+
+event EarlyZNullPS
+{
+       uint32_t drawId;
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};
+
+event EarlyStencilNullPS
+{
+       uint32_t drawId; 
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount; 
+};
+
+event EarlyZPixelRate
+{
+       uint32_t drawId;
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};
+
+event LateZPixelRate
+{
+       uint32_t drawId;
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};
+
+
+event EarlyOmZ
+{
+       uint32_t drawId;
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};
+
+event EarlyOmStencil
+{
+       uint32_t drawId;
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};
+
+event LateOmZ
+{
+       uint32_t drawId;
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};
+
+event LateOmStencil
+{
+       uint32_t drawId;
+       uint64_t passCount;
+       uint64_t failCount;
+       uint64_t testCount;
+};
+
+event GSPrimInfo
+{
+       uint64_t inputPrimCount;
+       uint64_t primGeneratedCount;
+       uint64_t vertsInput;
+};
+
+event GSInputPrims
+{
+       uint32_t drawId;
+       uint64_t inputPrimCount;
+};
+
+event GSPrimsGen
+{
+       uint32_t drawId;
+       uint64_t primGeneratedCount;
+};
+
+event GSVertsInput
+{
+       uint32_t drawId;
+       uint64_t vertsInput;
+};
+
+event ClipVertexCount
+{
+       uint64_t vertsPerPrim;
+       uint64_t primMask;
+};
+
+//REMOVE AND REPLACE
+event FlushVertClip
+{
+       uint32_t drawId;
+};
+
+event VertsClipped
+{
+       uint32_t drawId;
+       uint64_t clipCount;
+};
+
+event TessPrimCount
+{
+       uint64_t primCount;
+};
+
+//REMOVE AND REPLACE
+event TessPrimFlush
+{
+       uint32_t drawId;
+};
+
+event TessPrims
+{
+       uint32_t drawId;
+       uint64_t primCount;
+};
\ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp 
b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index c5e6b98..16c4537 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -533,6 +533,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint3
                     AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
                     depthPassMask = DepthStencilTest(&state, 
work.triFlags.frontFacing, work.triFlags.viewportIndex,
                                                      psContext.vZ, 
pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
+                                       
AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
                     AR_END(BEEarlyDepthTest, 0);
 
                     // early-exit if no pixels passed depth or earlyZ is 
forced on
@@ -565,6 +566,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint3
                     AR_BEGIN(BELateDepthTest, pDC->drawId);
                     depthPassMask = DepthStencilTest(&state, 
work.triFlags.frontFacing, work.triFlags.viewportIndex,
                                                         psContext.vZ, 
pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
+                                       
AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
                     AR_END(BELateDepthTest, 0);
 
                     if (!_simd_movemask_ps(depthPassMask))
@@ -742,6 +744,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint32_
                         AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
                         depthPassMask = DepthStencilTest(&state, 
work.triFlags.frontFacing, work.triFlags.viewportIndex,
                                               psContext.vZ, pDepthSample, 
vCoverageMask, pStencilSample, &stencilPassMask);
+                                               
AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
                         AR_END(BEEarlyDepthTest, 0);
 
                         // early-exit if no samples passed depth or earlyZ is 
forced on.
@@ -775,6 +778,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint32_
                         AR_BEGIN(BELateDepthTest, pDC->drawId);
                         depthPassMask = DepthStencilTest(&state, 
work.triFlags.frontFacing, work.triFlags.viewportIndex,
                                               psContext.vZ, pDepthSample, 
vCoverageMask, pStencilSample, &stencilPassMask);
+                                               
AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
                         AR_END(BELateDepthTest, 0);
 
                         if (!_simd_movemask_ps(depthPassMask))
@@ -923,6 +927,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, 
uint32_t x, uint32_t
             {
                 uint32_t depthPassCount = PixelRateZTest(activeLanes, 
psContext, BEEarlyDepthTest);
                 UPDATE_STAT_BE(DepthPassCount, depthPassCount);
+                               
AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, 
_simd_movemask_ps(activeLanes)));
             }
 
             // if we have no covered samples that passed depth at this point, 
go to next tile
@@ -956,6 +961,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, 
uint32_t x, uint32_t
             {
                 uint32_t depthPassCount = PixelRateZTest(activeLanes, 
psContext, BELateDepthTest);
                 UPDATE_STAT_BE(DepthPassCount, depthPassCount);
+                               AR_EVENT(LateDepthInfoPixelRate(depthPassCount, 
_simd_movemask_ps(activeLanes)));
             }
 
             // if we have no covered samples that passed depth at this point, 
skip OM and go to next tile
@@ -1134,6 +1140,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, 
uint32_t x, uint32_t y,
                     AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
                     simdscalar depthPassMask = DepthStencilTest(&state, 
work.triFlags.frontFacing, work.triFlags.viewportIndex,
                         psContext.vZ, pDepthSample, vCoverageMask, 
pStencilSample, &stencilPassMask);
+                                       
AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), 
_simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
                     DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], 
&state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
                         pDepthSample, depthPassMask, vCoverageMask, 
pStencilSample, stencilPassMask);
                     AR_END(BEEarlyDepthTest, 0);
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp 
b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index a208a36..c8dce10 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -853,7 +853,7 @@ static void GeometryShaderStage(
     // update GS pipeline stats
     UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
     UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
-
+       AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, 
numVertsPerPrim*numInputPrims));
     AR_END(FEGeometryShader, 1);
 }
 
@@ -1027,6 +1027,7 @@ static void TessellationStages(
         SWR_TS_TESSELLATED_DATA tsData = { 0 };
         AR_BEGIN(FETessellation, pDC->drawId);
         TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
+               AR_EVENT(TessPrimCount(1));
         AR_END(FETessellation, 0);
 
         if (tsData.NumPrimitives == 0)
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp 
b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index f7730ff..ee12612 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -581,6 +581,7 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t 
workerId, DRAW_CONTEX
             stats.SoPrimStorageNeeded[0], stats.SoPrimStorageNeeded[1], 
stats.SoPrimStorageNeeded[2], stats.SoPrimStorageNeeded[3],
             stats.SoNumPrimsWritten[0], stats.SoNumPrimsWritten[1], 
stats.SoNumPrimsWritten[2], stats.SoNumPrimsWritten[3]
         ));
+               AR_EVENT(FrontendDrawEndEvent(pDC->drawId));
 
         pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &stats);
     }
diff --git 
a/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template
 
b/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template
index 95c5442..abde3c0 100644
--- 
a/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template
+++ 
b/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template
@@ -43,7 +43,7 @@ namespace ArchRast
         virtual ~EventHandler() {}
 
 % for name in protos['event_names']:
-        virtual void Handle(${name}& event) {}
+        virtual void Handle(${name}&& event) {}
 % endfor
     };
 }
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to