I currently have something like the below on top, is that correct?

If so, I'll fold it back in.


--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -563,16 +563,17 @@ int x86_pmu_hw_config(struct perf_event
        /* sample_regs_user never support XMM registers */
        if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS))
                return -EINVAL;
+
        /*
         * Besides the general purpose registers, XMM registers may
         * be collected in PEBS on some platforms, e.g. Icelake
         */
        if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) {
-               if (!is_sampling_event(event) ||
-                   !event->attr.precise_ip ||
-                   x86_pmu.pebs_no_xmm_regs)
+               if (x86_pmu.pebs_no_xmm_regs)
                        return -EINVAL;
 
+               if (!event->attr.precise_ip)
+                       return -EINVAL;
        }
 
        return x86_setup_perfctr(event);
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3428,7 +3428,7 @@ icl_get_event_constraints(struct cpu_hw_
         * Force instruction:ppp in Fixed counter 0
         */
        if ((event->attr.precise_ip == 3) &&
-           ((event->hw.config & X86_RAW_EVENT_MASK) == 0x00c0))
+           (event->hw.config == X86_CONFIG(.event=0xc0)))
                return &fixed_counter0_constraint;
 
        return hsw_get_event_constraints(cpuc, idx, event);
@@ -4810,7 +4810,7 @@ __init int intel_pmu_init(void)
                        hsw_format_attr : nhm_format_attr;
                extra_attr = merge_attr(extra_attr, skl_format_attr);
                x86_pmu.cpu_events = get_icl_events_attrs();
-               x86_pmu.force_gpr_event = 0x2ca;
+               x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
                x86_pmu.lbr_pt_coexist = true;
                intel_pmu_pebs_data_source_skl(false);
                pr_cont("Icelake events, ");
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -853,13 +853,13 @@ struct event_constraint intel_icl_pebs_e
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),   /* 
INST_RETIRED.PREC_DIST */
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL),  /* SLOTS */
 
-       INTEL_PLD_CONSTRAINT(0x1cd, 0xff),              /* 
MEM_TRANS_RETIRED.LOAD_LATENCY */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),  /* 
MEM_INST_RETIRED.LOAD */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),  /* 
MEM_INST_RETIRED.STORE */
+       INTEL_PLD_CONSTRAINT(0x1cd, 0xff),                      /* 
MEM_TRANS_RETIRED.LOAD_LATENCY */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),    /* 
MEM_INST_RETIRED.LOAD */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),    /* 
MEM_INST_RETIRED.STORE */
 
        INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* 
MEM_LOAD_*_RETIRED.* */
 
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),             /* 
MEM_INST_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),                /* 
MEM_INST_RETIRED.* */
 
        /*
         * Everything else is handled by PMU_FL_PEBS_ALL, because we
@@ -963,40 +963,42 @@ static u64 pebs_update_adaptive_cfg(stru
        u64 pebs_data_cfg = 0;
        bool gprs, tsx_weight;
 
-       if ((sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) ||
-           attr->precise_ip < 2) {
+       if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
+           attr->precise_ip > 1)
+               return pebs_data_cfs;
 
-               if (sample_type & PERF_PEBS_MEMINFO_TYPE)
-                       pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
+       if (sample_type & PERF_PEBS_MEMINFO_TYPE)
+               pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
 
+       /*
+        * We need GPRs when:
+        * + user requested them
+        * + precise_ip < 2 for the non event IP
+        * + For RTM TSX weight we need GPRs for the abort code.
+        */
+       gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
+              (attr->sample_regs_intr & PEBS_GPRS_REGS);
+
+       tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
+                    ((attr->config & INTEL_ARCH_EVENT_MASK) ==
+                     x86_pmu.rtm_abort_event);
+
+       if (gprs || (attr->precise_ip < 2) || tsx_weight)
+               pebs_data_cfg |= PEBS_DATACFG_GPRS;
+
+       if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
+           (attr->sample_regs_intr & PERF_XMM_REGS))
+               pebs_data_cfg |= PEBS_DATACFG_XMMS;
+
+       if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
                /*
-                * Cases we need the registers:
-                * + user requested registers
-                * + precise_ip < 2 for the non event IP
-                * + For RTM TSX weight we need GPRs too for the abort
-                * code. But we don't want to force GPRs for all other
-                * weights.  So add it only collectfor the RTM abort event.
+                * For now always log all LBRs. Could configure this
+                * later.
                 */
-               gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
-                             (attr->sample_regs_intr & 0xffffffff);
-               tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
-                            ((attr->config & 0xffff) == 
x86_pmu.force_gpr_event);
-               if (gprs || (attr->precise_ip < 2) || tsx_weight)
-                       pebs_data_cfg |= PEBS_DATACFG_GPRS;
-
-               if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
-                   (attr->sample_regs_intr >> 32))
-                       pebs_data_cfg |= PEBS_DATACFG_XMMS;
-
-               if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
-                       /*
-                        * For now always log all LBRs. Could configure this
-                        * later.
-                        */
-                       pebs_data_cfg |= PEBS_DATACFG_LBRS |
-                               ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
-               }
+               pebs_data_cfg |= PEBS_DATACFG_LBRS |
+                       ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
        }
+
        return pebs_data_cfg;
 }
 
@@ -1022,13 +1024,8 @@ pebs_update_state(bool needed_cb, struct
        }
 
        /*
-        * The PEBS record doesn't shrink on the del. Because to get
-        * an accurate config needs to go through all the existing pebs events.
-        * It's not necessary.
-        * There is no harmful for a bigger PEBS record, except little
-        * performance impacts.
-        * Also, for most cases, the same pebs config is applied for all
-        * pebs events.
+        * The PEBS record doesn't shrink on pmu::del(). Doing so would require
+        * iterating all remaining PEBS events to reconstruct the config.
         */
        if (x86_pmu.intel_cap.pebs_baseline && add) {
                u64 pebs_data_cfg;
@@ -1076,8 +1073,7 @@ void intel_pmu_pebs_enable(struct perf_e
 
        cpuc->pebs_enabled |= 1ULL << hwc->idx;
 
-       if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
-           (x86_pmu.version < 5))
+       if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 
5))
                cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
                cpuc->pebs_enabled |= 1ULL << 63;
@@ -1766,8 +1762,7 @@ static void intel_pmu_drain_pebs_core(st
                               setup_pebs_fixed_sample_data);
 }
 
-static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc,
-                                                int size)
+static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, 
int size)
 {
        struct perf_event *event;
        int bit;
@@ -1826,8 +1821,7 @@ static void intel_pmu_drain_pebs_nhm(str
 
                /* PEBS v3 has more accurate status bits */
                if (x86_pmu.intel_cap.pebs_format >= 3) {
-                       for_each_set_bit(bit, (unsigned long *)&pebs_status,
-                                        size)
+                       for_each_set_bit(bit, (unsigned long *)&pebs_status, 
size)
                                counts[bit]++;
 
                        continue;
@@ -1866,8 +1860,7 @@ static void intel_pmu_drain_pebs_nhm(str
                 * If collision happened, the record will be dropped.
                 */
                if (p->status != (1ULL << bit)) {
-                       for_each_set_bit(i, (unsigned long *)&pebs_status,
-                                        x86_pmu.max_pebs_events)
+                       for_each_set_bit(i, (unsigned long *)&pebs_status, size)
                                error[i]++;
                        continue;
                }
@@ -1875,7 +1868,7 @@ static void intel_pmu_drain_pebs_nhm(str
                counts[bit]++;
        }
 
-       for (bit = 0; bit < size; bit++) {
+       for_each_set_bit(bit, (unsigned long *)&mask, size) {
                if ((counts[bit] == 0) && (error[bit] == 0))
                        continue;
 
@@ -1939,7 +1932,7 @@ static void intel_pmu_drain_pebs_icl(str
                        counts[bit]++;
        }
 
-       for (bit = 0; bit < size; bit++) {
+       for_each_set_bit(bit, (unsigned long *)mask, size) {
                if (counts[bit] == 0)
                        continue;
 
@@ -1980,6 +1973,9 @@ void __init intel_ds_init(void)
                char *pebs_qual = "";
                int format = x86_pmu.intel_cap.pebs_format;
 
+               if (format < 4)
+                       x86_pmu.intel_cap.pebs_baseline = 0;
+
                switch (format) {
                case 0:
                        pr_cont("PEBS fmt0%c, ", pebs_type);
@@ -2042,8 +2038,6 @@ void __init intel_ds_init(void)
                        pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
                        x86_pmu.pebs = 0;
                }
-               if (format != 4)
-                       x86_pmu.intel_cap.pebs_baseline = 0;
        }
 }
 
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -303,8 +303,8 @@ struct cpu_hw_events {
        __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
 
 /*
- * Only works for Intel events, which has 'small' event codes.
- * Need to fix the rang compare for 'big' event codes, e.g AMD64_EVENTSEL_EVENT
+ * The constraint_match() function only works for 'simple' event codes
+ * and not for extended (AMD64_EVENTSEL_EVENT) events codes.
  */
 #define EVENT_CONSTRAINT_RANGE(c, e, n, m) \
        __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0)
@@ -672,12 +672,12 @@ struct x86_pmu {
                        pebs_no_xmm_regs        :1;
        int             pebs_record_size;
        int             pebs_buffer_size;
+       int             max_pebs_events;
        void            (*drain_pebs)(struct pt_regs *regs);
        struct event_constraint *pebs_constraints;
        void            (*pebs_aliases)(struct perf_event *event);
-       int             max_pebs_events;
        unsigned long   large_pebs_flags;
-       u64             force_gpr_event;
+       u64             rtm_abort_event;
 
        /*
         * Intel LBR

Reply via email to