DG2 has a new feature to supports OA buffer sizes up to 128Mb by
toggling a bit in OA_DEBUG. This would eventually be a user configurable
parameter. Use OA buffer vma size in all calculations with some helpers.

v2: Let compiler decide inline (Jani)

Signed-off-by: Umesh Nerlige Ramappa <[email protected]>
---
 drivers/gpu/drm/i915/i915_perf.c | 46 +++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 6fc4f0d8fc5a..bbf1c574f393 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -385,6 +385,21 @@ static struct ctl_table_header *sysctl_header;
 
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
+static u32 _oa_taken(struct i915_perf_stream * stream, u32 tail, u32 head)
+{
+       u32 size = stream->oa_buffer.vma->size;
+
+       return tail >= head ? tail - head : size - (head - tail);
+}
+
+static u32 _rewind_tail(struct i915_perf_stream * stream, u32 relative_hw_tail,
+                       u32 rewind_delta)
+{
+       return rewind_delta > relative_hw_tail ?
+              stream->oa_buffer.vma->size - (rewind_delta - relative_hw_tail) :
+              relative_hw_tail - rewind_delta;
+}
+
 void i915_oa_config_release(struct kref *ref)
 {
        struct i915_oa_config *oa_config =
@@ -487,12 +502,14 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
         * sizes need not be integral multiples or 64 or powers of 2.
         * Compute potentially partially landed report in the OA buffer
         */
-       partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
+       partial_report_size =
+               _oa_taken(stream, hw_tail, stream->oa_buffer.tail);
        partial_report_size %= report_size;
 
        /* Subtract partial amount off the tail */
-       hw_tail = gtt_offset + ((hw_tail - partial_report_size) &
-                               (stream->oa_buffer.vma->size - 1));
+       hw_tail = gtt_offset + _rewind_tail(stream,
+                                           hw_tail - gtt_offset,
+                                           partial_report_size);
 
        now = ktime_get_mono_fast_ns();
 
@@ -527,16 +544,16 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
                 * memory in the order they were written to.
                 * If not : (╯°□°)╯︵ ┻━┻
                 */
-               while (OA_TAKEN(tail, aged_tail) >= report_size) {
+               while (_oa_taken(stream, tail, aged_tail) >= report_size) {
                        u32 *report32 = (void *)(stream->oa_buffer.vaddr + 
tail);
 
                        if (report32[0] != 0 || report32[1] != 0)
                                break;
 
-                       tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
+                       tail = _rewind_tail(stream, tail, report_size);
                }
 
-               if (OA_TAKEN(hw_tail, tail) > report_size &&
+               if (_oa_taken(stream, hw_tail, tail) > report_size &&
                    __ratelimit(&stream->perf->tail_pointer_race))
                        DRM_NOTE("unlanded report(s) head=0x%x "
                                 "tail=0x%x hw_tail=0x%x\n",
@@ -547,8 +564,9 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
                stream->oa_buffer.aging_timestamp = now;
        }
 
-       pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
-                         stream->oa_buffer.head - gtt_offset) >= report_size;
+       pollin = _oa_taken(stream,
+                          stream->oa_buffer.tail,
+                          stream->oa_buffer.head) >= report_size;
 
        spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
 
@@ -679,11 +697,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
        int report_size = stream->oa_buffer.format_size;
        u8 *oa_buf_base = stream->oa_buffer.vaddr;
        u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
-       u32 mask = (OA_BUFFER_SIZE - 1);
        size_t start_offset = *offset;
        unsigned long flags;
-       u32 head, tail;
-       u32 taken;
+       u32 head, tail, size;
        int ret = 0;
 
        if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
@@ -693,6 +709,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
 
        head = stream->oa_buffer.head;
        tail = stream->oa_buffer.tail;
+       size = stream->oa_buffer.vma->size;
 
        spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
 
@@ -711,16 +728,15 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
         * all a power of two).
         */
        if (drm_WARN_ONCE(&uncore->i915->drm,
-                         head > stream->oa_buffer.vma->size ||
-                         tail > stream->oa_buffer.vma->size,
+                         head > size || tail > size,
                          "Inconsistent OA buffer pointers: head = %u, tail = 
%u\n",
                          head, tail))
                return -EIO;
 
 
        for (/* none */;
-            (taken = OA_TAKEN(tail, head));
-            head = (head + report_size) & mask) {
+            _oa_taken(stream, tail, head);
+            head = (head + report_size) % size) {
                u8 *report = oa_buf_base + head;
                u32 *report32 = (void *)report;
                u32 ctx_id;
-- 
2.25.1

Reply via email to