This is an automated email from the ASF dual-hosted git repository.
masaori pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 530ca2a99c Add bcc tool to observe pthread trylock (#11803)
530ca2a99c is described below
commit 530ca2a99c40dfb7efef79c7d3999522baad47c1
Author: Masaori Koshiba <[email protected]>
AuthorDate: Tue Oct 22 07:08:32 2024 +0900
Add bcc tool to observe pthread trylock (#11803)
* Add bcc tool to observe pthread trylock
* Cleanup
---
NOTICE | 5 +
tools/ebpf/trylock-stats/README.md | 19 +++
tools/ebpf/trylock-stats/trylock-stats.bpf.c | 211 +++++++++++++++++++++++++++
tools/ebpf/trylock-stats/trylock-stats.py | 120 +++++++++++++++
4 files changed, 355 insertions(+)
diff --git a/NOTICE b/NOTICE
index 4d04f3d960..1daf5038be 100644
--- a/NOTICE
+++ b/NOTICE
@@ -106,3 +106,8 @@ Copyright (c) 2021 Andrey Pechkurov (MIT License)
lib/systemtap/sys/sdt.h is from SystemTap. This file is dedicated to the
public domain, pursuant to CC0.
https://fossies.org/linux/systemtap/includes/sys/sdt.h
+
+~~
+
+tools/ebpf/trylock-stats is try lock version of lockstat-solution.py of
goldshtn/linux-tracing-workshop
+Copyright (c) 2017 Sasha Goldshtein (MIT License)
diff --git a/tools/ebpf/trylock-stats/README.md
b/tools/ebpf/trylock-stats/README.md
new file mode 100644
index 0000000000..ef4fadd850
--- /dev/null
+++ b/tools/ebpf/trylock-stats/README.md
@@ -0,0 +1,19 @@
+# pthread trylock stats
+
+bcc tool to observe the pthread trylock
+
+# Usage
+
+```
+sudo ./trylock-stats.py -p `pgrep TS_MAIN` > trylock.log
+```
+
+# Requirements
+
+This tool depends on [bcc](https://github.com/iovisor/bcc).
+
+On RHEL, below packages are required.
+
+- bcc
+- bcc-tools
+- python3-bcc
diff --git a/tools/ebpf/trylock-stats/trylock-stats.bpf.c
b/tools/ebpf/trylock-stats/trylock-stats.bpf.c
new file mode 100644
index 0000000000..5685081f38
--- /dev/null
+++ b/tools/ebpf/trylock-stats/trylock-stats.bpf.c
@@ -0,0 +1,211 @@
+/** @file
+
+ eBPF program for trylock-stats
+
+ Based on
+
https://github.com/goldshtn/linux-tracing-workshop/blob/master/lockstat-solution.py
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#include <linux/ptrace.h>
+
+struct thread_mutex_key_t {
+ u32 tid;
+ u64 mtx;
+ int lock_stack_id;
+};
+
+struct thread_mutex_val_t {
+ u64 wait_time_ns;
+ u64 lock_time_ns;
+ u64 enter_count;
+ u64 fail_count; /// failure of try lock
+};
+
+struct mutex_timestamp_t {
+ u64 mtx;
+ u64 timestamp;
+};
+
+struct mutex_lock_time_key_t {
+ u32 tid;
+ u64 mtx;
+};
+
+struct mutex_lock_time_val_t {
+ u64 timestamp;
+ int stack_id;
+};
+
+// Mutex to the stack id which initialized that mutex
+BPF_HASH(init_stacks, u64, int);
+// Main info database about mutex and thread pairs
+BPF_HASH(locks, struct thread_mutex_key_t, struct thread_mutex_val_t);
+// PID to the mutex address and timestamp of when the wait started
+BPF_HASH(lock_start, u32, struct mutex_timestamp_t);
+// PID and mutex address to the timestamp of when the wait ended (mutex
acquired) and the stack id
+BPF_HASH(lock_end, struct mutex_lock_time_key_t, struct mutex_lock_time_val_t);
+// Histogram of wait times
+BPF_HISTOGRAM(mutex_wait_hist, u64);
+// Histogram of hold times
+BPF_HISTOGRAM(mutex_lock_hist, u64);
+BPF_STACK_TRACE(stacks, 4096);
+
+int
+probe_mutex_init(struct pt_regs *ctx)
+{
+ int stack_id = stacks.get_stackid(ctx, BPF_F_REUSE_STACKID |
BPF_F_USER_STACK);
+ u64 mutex_addr = PT_REGS_PARM1(ctx);
+ init_stacks.update(&mutex_addr, &stack_id);
+ return 0;
+}
+
+int
+probe_mutex_lock(struct pt_regs *ctx)
+{
+ u64 now = bpf_ktime_get_ns();
+ u32 pid = bpf_get_current_pid_tgid();
+ struct mutex_timestamp_t val = {};
+ val.mtx = PT_REGS_PARM1(ctx);
+ val.timestamp = now;
+ lock_start.update(&pid, &val);
+ return 0;
+}
+
+int
+probe_mutex_lock_return(struct pt_regs *ctx)
+{
+ u64 now = bpf_ktime_get_ns();
+ u32 pid = bpf_get_current_pid_tgid();
+ struct mutex_timestamp_t *entry = lock_start.lookup(&pid);
+
+ if (entry == NULL) {
+ return 0; // Missed the entry
+ }
+
+ u64 wait_time = now - entry->timestamp;
+ int stack_id = stacks.get_stackid(ctx, BPF_F_REUSE_STACKID |
BPF_F_USER_STACK);
+
+ // If pthread_mutex_lock() returned 0, we have the lock
+ if (PT_REGS_RC(ctx) == 0) {
+ // Record the lock acquisition timestamp so that we can read it when
unlocking
+ struct mutex_lock_time_key_t key = {};
+ key.mtx = entry->mtx;
+ key.tid = pid;
+ struct mutex_lock_time_val_t val = {};
+ val.timestamp = now;
+ val.stack_id = stack_id;
+ lock_end.update(&key, &val);
+ }
+
+ // Record the wait time for this mutex-tid-stack combination even if locking
failed
+ struct thread_mutex_key_t tm_key = {};
+ tm_key.mtx = entry->mtx;
+ tm_key.tid = pid;
+ tm_key.lock_stack_id = stack_id;
+ struct thread_mutex_val_t *existing_tm_val, new_tm_val = {};
+ existing_tm_val = locks.lookup_or_init(&tm_key, &new_tm_val);
+ existing_tm_val->wait_time_ns += wait_time;
+
+ if (PT_REGS_RC(ctx) == 0) {
+ existing_tm_val->enter_count += 1;
+ }
+
+ u64 mtx_slot = bpf_log2l(wait_time / 1000);
+ mutex_wait_hist.increment(mtx_slot);
+ lock_start.delete(&pid);
+ return 0;
+}
+
+int
+probe_mutex_trylock_return(struct pt_regs *ctx)
+{
+ u64 now = bpf_ktime_get_ns();
+ u32 pid = bpf_get_current_pid_tgid();
+ struct mutex_timestamp_t *entry = lock_start.lookup(&pid);
+
+ if (entry == NULL) {
+ return 0; // Missed the entry
+ }
+
+ int stack_id = stacks.get_stackid(ctx, BPF_F_REUSE_STACKID |
BPF_F_USER_STACK);
+
+ // If pthread_mutex_lock() returned 0, we have the lock
+ if (PT_REGS_RC(ctx) == 0) {
+ // Record the lock acquisition timestamp so that we can read it when
unlocking
+ struct mutex_lock_time_key_t key = {};
+ key.mtx = entry->mtx;
+ key.tid = pid;
+ struct mutex_lock_time_val_t val = {};
+ val.timestamp = now;
+ val.stack_id = stack_id;
+ lock_end.update(&key, &val);
+ }
+
+ // Record the wait time for this mutex-tid-stack combination even if locking
failed
+ struct thread_mutex_key_t tm_key = {};
+ tm_key.mtx = entry->mtx;
+ tm_key.tid = pid;
+ tm_key.lock_stack_id = stack_id;
+ struct thread_mutex_val_t *existing_tm_val, new_tm_val = {};
+ existing_tm_val = locks.lookup_or_init(&tm_key, &new_tm_val);
+
+ if (PT_REGS_RC(ctx) == 0) {
+ existing_tm_val->enter_count += 1;
+ } else {
+ existing_tm_val->fail_count += 1;
+ }
+
+ lock_start.delete(&pid);
+ return 0;
+}
+
+int
+probe_mutex_unlock(struct pt_regs *ctx)
+{
+ u64 now = bpf_ktime_get_ns();
+ u64 mtx = PT_REGS_PARM1(ctx);
+ u32 pid = bpf_get_current_pid_tgid();
+ struct mutex_lock_time_key_t lock_key = {};
+ lock_key.mtx = mtx;
+ lock_key.tid = pid;
+ struct mutex_lock_time_val_t *lock_val = lock_end.lookup(&lock_key);
+
+ if (lock_val == NULL) {
+ return 0; // Missed the lock of this mutex
+ }
+
+ u64 hold_time = now - lock_val->timestamp;
+ struct thread_mutex_key_t tm_key = {};
+ tm_key.mtx = mtx;
+ tm_key.tid = pid;
+ tm_key.lock_stack_id = lock_val->stack_id;
+ struct thread_mutex_val_t *existing_tm_val = locks.lookup(&tm_key);
+
+ if (existing_tm_val == NULL) {
+ return 0; // Couldn't find this record
+ }
+
+ existing_tm_val->lock_time_ns += hold_time;
+ u64 slot = bpf_log2l(hold_time / 1000);
+ mutex_lock_hist.increment(slot);
+ lock_end.delete(&lock_key);
+ return 0;
+}
diff --git a/tools/ebpf/trylock-stats/trylock-stats.py
b/tools/ebpf/trylock-stats/trylock-stats.py
new file mode 100755
index 0000000000..b16135f420
--- /dev/null
+++ b/tools/ebpf/trylock-stats/trylock-stats.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+#
+# bcc tool to observe pthread mutex trylock
+#
+# Based on
https://github.com/goldshtn/linux-tracing-workshop/blob/master/lockstat-solution.py
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import subprocess
+import sys
+import itertools
+from time import sleep
+from bcc import BPF
+from argparse import ArgumentParser
+
+BPF_SRC_FILE = "./trylock-stats.bpf.c"
+
+
+def glibc_version(glibc_path):
+ major = 0
+ minor = 0
+
+ result = subprocess.run([glibc_path], capture_output=True, text=True)
+
+ if result:
+ version_str = result.stdout.splitlines()[0].split()[-1]
+
+ version = version_str.split(".")
+ major = int(version[0])
+ minor = int(version[1])
+
+ return (major, minor)
+
+
+def attach(bpf, pid, glibc_path):
+ libname = "pthread"
+
+ # glibc removed "libpthread" from 2.34
+ (major, minor) = glibc_version(glibc_path)
+ if major >= 2 and minor >= 34:
+ libname = "c"
+
+ bpf.attach_uprobe(name=libname, sym="pthread_mutex_trylock",
fn_name="probe_mutex_lock", pid=pid)
+ bpf.attach_uretprobe(name=libname, sym="pthread_mutex_trylock",
fn_name="probe_mutex_trylock_return", pid=pid)
+ bpf.attach_uprobe(name=libname, sym="pthread_mutex_unlock",
fn_name="probe_mutex_unlock", pid=pid)
+
+
+def print_frame(bpf, pid, addr):
+ print("\t\t%16s (%x)" % (bpf.sym(addr, pid, show_module=True,
show_offset=True), addr))
+
+
+def print_stack(bpf, pid, stacks, stack_id):
+ for addr in stacks.walk(stack_id):
+ print_frame(bpf, pid, addr)
+
+
+def run(args):
+ pid = args.pid
+ bpf = BPF(src_file=BPF_SRC_FILE)
+ attach(bpf, pid, args.glibc_path)
+
+ init_stacks = bpf["init_stacks"]
+ stacks = bpf["stacks"]
+ locks = bpf["locks"]
+ mutex_lock_hist = bpf["mutex_lock_hist"]
+ mutex_wait_hist = bpf["mutex_wait_hist"]
+
+ sleep(args.duration)
+
+ mutex_ids = {}
+ next_mutex_id = 1
+ for k, v in init_stacks.items():
+ mutex_id = "#%d" % next_mutex_id
+ next_mutex_id += 1
+ mutex_ids[k.value] = mutex_id
+ print("init stack for mutex %x (%s)" % (k.value, mutex_id))
+ print_stack(bpf, pid, stacks, v.value)
+ print("")
+
+ grouper = lambda kv: kv[0].tid
+ sorted_by_thread = sorted(locks.items(), key=grouper)
+ locks_by_thread = itertools.groupby(sorted_by_thread, grouper)
+
+ for tid, items in locks_by_thread:
+ print("thread %d" % tid)
+
+ for k, v in sorted(items, key=lambda kv: -kv[1].fail_count):
+ mutex_descr = mutex_ids[k.mtx] if k.mtx in mutex_ids else
bpf.sym(k.mtx, pid)
+ print(
+ "\tmutex %s ::: wait time %.2fus ::: hold time %.2fus :::
enter count %d ::: try-lock failure count %d" %
+ (mutex_descr, v.wait_time_ns / 1000.0, v.lock_time_ns /
1000.0, v.enter_count, v.fail_count))
+ print_stack(bpf, pid, stacks, k.lock_stack_id)
+ print("")
+
+ mutex_wait_hist.print_log2_hist(val_type="wait time (us)")
+ mutex_lock_hist.print_log2_hist(val_type="hold time (us)")
+
+
+if __name__ == "__main__":
+ parser = ArgumentParser()
+ parser.add_argument("-p", "--pid", dest="pid", help="process id",
type=int, required=True)
+ parser.add_argument("-d", "--duration", dest="duration", help="duration to
run", default=10, type=float)
+ parser.add_argument("-l", "--glibc", dest="glibc_path", help="path to the
glibc", default="/lib64/libc.so.6")
+
+ run(parser.parse_args())