On 09/19/2017 09:04 AM, Yonghong Song wrote:
[...]
Just some minor things, but a bit scattered.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 43ab5c4..2c68b9e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -582,6 +582,14 @@ union bpf_attr {
* @map: pointer to sockmap to update
* @key: key to insert/update sock in map
* @flags: same flags as map update elem
+ *
+ * int bpf_perf_event_read_value(map, flags, buf, buf_size)
+ * read perf event counter value and perf event enabled/running time
+ * @map: pointer to perf_event_array map
+ * @flags: index of event in the map or bitmask flags
+ * @buf: buf to fill
+ * @buf_size: size of the buf
+ * Return: 0 on success or negative error code
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -638,6 +646,7 @@ union bpf_attr {
FN(redirect_map), \
FN(sk_redirect_map), \
FN(sock_map_update), \
+ FN(perf_event_read_value), \
Nit: can you indent the '\' so it aligns with the rest
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -681,7 +690,8 @@ enum bpf_func_id {
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
-/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+ * BPF_FUNC_perf_event_read_value flags. */
Nit: comment style
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
/* BPF_FUNC_perf_event_output for sk_buff input context. */
@@ -864,4 +874,10 @@ enum {
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+struct bpf_perf_event_value {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+};
+
[...]
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3e691b7..2d5bbe5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3684,10 +3684,12 @@ static inline u64 perf_event_count(struct perf_event
*event)
* will not be local and we cannot read them atomically
* - must not have a pmu::count method
*/
-int perf_event_read_local(struct perf_event *event, u64 *value)
+int perf_event_read_local(struct perf_event *event, u64 *value,
+ u64 *enabled, u64 *running)
{
unsigned long flags;
int ret = 0;
+ u64 now;
/*
* Disabling interrupts avoids all counter scheduling (context
@@ -3718,14 +3720,21 @@ int perf_event_read_local(struct perf_event *event, u64
*value)
goto out;
}
+ now = event->shadow_ctx_time + perf_clock();
+ if (enabled)
+ *enabled = now - event->tstamp_enabled;
/*
* If the event is currently on this CPU, its either a per-task event,
* or local to this CPU. Furthermore it means its ACTIVE (otherwise
* oncpu == -1).
*/
- if (event->oncpu == smp_processor_id())
+ if (event->oncpu == smp_processor_id()) {
event->pmu->read(event);
-
+ if (running)
+ *running = now - event->tstamp_running;
+ } else if (running) {
+ *running = event->total_time_running;
+ }
*value = local64_read(&event->count);
out:
local_irq_restore(flags);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index dc498b6..39ce5d9 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -255,13 +255,13 @@ const struct bpf_func_proto
*bpf_get_trace_printk_proto(void)
return &bpf_trace_printk_proto;
}
-BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
-{
+static __always_inline int
+get_map_perf_counter(struct bpf_map *map, u64 flags,
+ u64 *value, u64 *enabled, u64 *running) {
Nit: coding style
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
struct bpf_event_entry *ee;
- u64 value = 0;
int err;
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
@@ -275,7 +275,17 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map,
u64, flags)
if (!ee)
return -ENOENT;
- err = perf_event_read_local(ee->event, &value);
+ err = perf_event_read_local(ee->event, value, enabled, running);
+ return err;
err can be removed entirely then.
return perf_event_read_local(ee->event, value, enabled, running);
+}
+
+
Nit: Two newlines slipped in.
+BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
+{
+ u64 value = 0;
+ int err;
+
+ err = get_map_perf_counter(map, flags, &value, NULL, NULL);
/*
* this api is ugly since we miss [-22..-2] range of valid
* counter values, but that's uapi
@@ -285,6 +295,20 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map,
u64, flags)
return value;
}
Can you also move the bpf_perf_event_read_proto definition right
underneath here as we have with all other helpers?
+BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
+ struct bpf_perf_event_value *, buf, u32, size)
Nit: indent
+{
+ int err;
+
+ if (unlikely(size != sizeof(struct bpf_perf_event_value)))
+ return -EINVAL;
+ err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled,
+ &buf->running);
^ This is only indented with spaces.
+ if (err)
+ return err;
+ return 0;
Also here err can be removed entirely, make it
less convoluted:
BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
struct bpf_perf_event_value *, eval, u32, size)
{
if (unlikely(size != sizeof(struct bpf_perf_event_value)))
return -EINVAL;
return get_map_perf_counter(map, flags, &eval->counter, &eval->enabled,
&eval->running);
}
+}
+
static const struct bpf_func_proto bpf_perf_event_read_proto = {
.func = bpf_perf_event_read,
.gpl_only = true,
@@ -293,6 +317,16 @@ static const struct bpf_func_proto
bpf_perf_event_read_proto = {
.arg2_type = ARG_ANYTHING,
};
+static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
+ .func = bpf_perf_event_read_value,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
If you do that, then error paths need to memset the region, e.g.
see bpf_skb_get_tunnel_opt() and bpf_skb_get_tunnel_key() helpers
which operate similarly.
+ .arg4_type = ARG_CONST_SIZE,
+};
+
static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd);
static __always_inline u64
@@ -499,6 +533,8 @@ static const struct bpf_func_proto
*kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_perf_event_output_proto;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto;
+ case BPF_FUNC_perf_event_read_value:
+ return &bpf_perf_event_read_value_proto;
default:
return tracing_func_proto(func_id);
}