This is an automated email from the ASF dual-hosted git repository.

masaori pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new 530ca2a99c Add bcc tool to observe pthread trylock (#11803)
530ca2a99c is described below

commit 530ca2a99c40dfb7efef79c7d3999522baad47c1
Author: Masaori Koshiba <[email protected]>
AuthorDate: Tue Oct 22 07:08:32 2024 +0900

    Add bcc tool to observe pthread trylock (#11803)
    
    * Add bcc tool to observe pthread trylock
    
    * Cleanup
---
 NOTICE                                       |   5 +
 tools/ebpf/trylock-stats/README.md           |  19 +++
 tools/ebpf/trylock-stats/trylock-stats.bpf.c | 211 +++++++++++++++++++++++++++
 tools/ebpf/trylock-stats/trylock-stats.py    | 120 +++++++++++++++
 4 files changed, 355 insertions(+)

diff --git a/NOTICE b/NOTICE
index 4d04f3d960..1daf5038be 100644
--- a/NOTICE
+++ b/NOTICE
@@ -106,3 +106,8 @@ Copyright (c) 2021 Andrey Pechkurov (MIT License)
 
 lib/systemtap/sys/sdt.h is from SystemTap.  This file is dedicated to the 
public domain, pursuant to CC0.
 https://fossies.org/linux/systemtap/includes/sys/sdt.h
+
+~~
+
+tools/ebpf/trylock-stats is try lock version of lockstat-solution.py of 
goldshtn/linux-tracing-workshop
+Copyright (c) 2017 Sasha Goldshtein (MIT License)
diff --git a/tools/ebpf/trylock-stats/README.md 
b/tools/ebpf/trylock-stats/README.md
new file mode 100644
index 0000000000..ef4fadd850
--- /dev/null
+++ b/tools/ebpf/trylock-stats/README.md
@@ -0,0 +1,19 @@
+# pthread trylock stats
+
+bcc tool to observe the pthread trylock
+
+# Usage
+
+```
+sudo ./trylock-stats.py -p `pgrep TS_MAIN` > trylock.log
+```
+
+# Requirements
+
+This tool depends on [bcc](https://github.com/iovisor/bcc).
+
+On RHEL, below packages are required.
+
+- bcc
+- bcc-tools
+- python3-bcc
diff --git a/tools/ebpf/trylock-stats/trylock-stats.bpf.c 
b/tools/ebpf/trylock-stats/trylock-stats.bpf.c
new file mode 100644
index 0000000000..5685081f38
--- /dev/null
+++ b/tools/ebpf/trylock-stats/trylock-stats.bpf.c
@@ -0,0 +1,211 @@
+/** @file
+
+  eBPF program for trylock-stats
+
+  Based on
+  
https://github.com/goldshtn/linux-tracing-workshop/blob/master/lockstat-solution.py
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+#include <linux/ptrace.h>
+
+struct thread_mutex_key_t {
+  u32 tid;
+  u64 mtx;
+  int lock_stack_id;
+};
+
+struct thread_mutex_val_t {
+  u64 wait_time_ns;
+  u64 lock_time_ns;
+  u64 enter_count;
+  u64 fail_count; /// failure of try lock
+};
+
+struct mutex_timestamp_t {
+  u64 mtx;
+  u64 timestamp;
+};
+
+struct mutex_lock_time_key_t {
+  u32 tid;
+  u64 mtx;
+};
+
+struct mutex_lock_time_val_t {
+  u64 timestamp;
+  int stack_id;
+};
+
+// Mutex to the stack id which initialized that mutex
+BPF_HASH(init_stacks, u64, int);
+// Main info database about mutex and thread pairs
+BPF_HASH(locks, struct thread_mutex_key_t, struct thread_mutex_val_t);
+// PID to the mutex address and timestamp of when the wait started
+BPF_HASH(lock_start, u32, struct mutex_timestamp_t);
+// PID and mutex address to the timestamp of when the wait ended (mutex 
acquired) and the stack id
+BPF_HASH(lock_end, struct mutex_lock_time_key_t, struct mutex_lock_time_val_t);
+// Histogram of wait times
+BPF_HISTOGRAM(mutex_wait_hist, u64);
+// Histogram of hold times
+BPF_HISTOGRAM(mutex_lock_hist, u64);
+BPF_STACK_TRACE(stacks, 4096);
+
+int
+probe_mutex_init(struct pt_regs *ctx)
+{
+  int stack_id   = stacks.get_stackid(ctx, BPF_F_REUSE_STACKID | 
BPF_F_USER_STACK);
+  u64 mutex_addr = PT_REGS_PARM1(ctx);
+  init_stacks.update(&mutex_addr, &stack_id);
+  return 0;
+}
+
+int
+probe_mutex_lock(struct pt_regs *ctx)
+{
+  u64                      now = bpf_ktime_get_ns();
+  u32                      pid = bpf_get_current_pid_tgid();
+  struct mutex_timestamp_t val = {};
+  val.mtx                      = PT_REGS_PARM1(ctx);
+  val.timestamp                = now;
+  lock_start.update(&pid, &val);
+  return 0;
+}
+
+int
+probe_mutex_lock_return(struct pt_regs *ctx)
+{
+  u64                       now   = bpf_ktime_get_ns();
+  u32                       pid   = bpf_get_current_pid_tgid();
+  struct mutex_timestamp_t *entry = lock_start.lookup(&pid);
+
+  if (entry == NULL) {
+    return 0; // Missed the entry
+  }
+
+  u64 wait_time = now - entry->timestamp;
+  int stack_id  = stacks.get_stackid(ctx, BPF_F_REUSE_STACKID | 
BPF_F_USER_STACK);
+
+  // If pthread_mutex_lock() returned 0, we have the lock
+  if (PT_REGS_RC(ctx) == 0) {
+    // Record the lock acquisition timestamp so that we can read it when 
unlocking
+    struct mutex_lock_time_key_t key = {};
+    key.mtx                          = entry->mtx;
+    key.tid                          = pid;
+    struct mutex_lock_time_val_t val = {};
+    val.timestamp                    = now;
+    val.stack_id                     = stack_id;
+    lock_end.update(&key, &val);
+  }
+
+  // Record the wait time for this mutex-tid-stack combination even if locking 
failed
+  struct thread_mutex_key_t tm_key = {};
+  tm_key.mtx                       = entry->mtx;
+  tm_key.tid                       = pid;
+  tm_key.lock_stack_id             = stack_id;
+  struct thread_mutex_val_t *existing_tm_val, new_tm_val = {};
+  existing_tm_val                = locks.lookup_or_init(&tm_key, &new_tm_val);
+  existing_tm_val->wait_time_ns += wait_time;
+
+  if (PT_REGS_RC(ctx) == 0) {
+    existing_tm_val->enter_count += 1;
+  }
+
+  u64 mtx_slot = bpf_log2l(wait_time / 1000);
+  mutex_wait_hist.increment(mtx_slot);
+  lock_start.delete(&pid);
+  return 0;
+}
+
+int
+probe_mutex_trylock_return(struct pt_regs *ctx)
+{
+  u64                       now   = bpf_ktime_get_ns();
+  u32                       pid   = bpf_get_current_pid_tgid();
+  struct mutex_timestamp_t *entry = lock_start.lookup(&pid);
+
+  if (entry == NULL) {
+    return 0; // Missed the entry
+  }
+
+  int stack_id = stacks.get_stackid(ctx, BPF_F_REUSE_STACKID | 
BPF_F_USER_STACK);
+
+  // If pthread_mutex_lock() returned 0, we have the lock
+  if (PT_REGS_RC(ctx) == 0) {
+    // Record the lock acquisition timestamp so that we can read it when 
unlocking
+    struct mutex_lock_time_key_t key = {};
+    key.mtx                          = entry->mtx;
+    key.tid                          = pid;
+    struct mutex_lock_time_val_t val = {};
+    val.timestamp                    = now;
+    val.stack_id                     = stack_id;
+    lock_end.update(&key, &val);
+  }
+
+  // Record the wait time for this mutex-tid-stack combination even if locking 
failed
+  struct thread_mutex_key_t tm_key = {};
+  tm_key.mtx                       = entry->mtx;
+  tm_key.tid                       = pid;
+  tm_key.lock_stack_id             = stack_id;
+  struct thread_mutex_val_t *existing_tm_val, new_tm_val = {};
+  existing_tm_val = locks.lookup_or_init(&tm_key, &new_tm_val);
+
+  if (PT_REGS_RC(ctx) == 0) {
+    existing_tm_val->enter_count += 1;
+  } else {
+    existing_tm_val->fail_count += 1;
+  }
+
+  lock_start.delete(&pid);
+  return 0;
+}
+
+int
+probe_mutex_unlock(struct pt_regs *ctx)
+{
+  u64                          now       = bpf_ktime_get_ns();
+  u64                          mtx       = PT_REGS_PARM1(ctx);
+  u32                          pid       = bpf_get_current_pid_tgid();
+  struct mutex_lock_time_key_t lock_key  = {};
+  lock_key.mtx                           = mtx;
+  lock_key.tid                           = pid;
+  struct mutex_lock_time_val_t *lock_val = lock_end.lookup(&lock_key);
+
+  if (lock_val == NULL) {
+    return 0; // Missed the lock of this mutex
+  }
+
+  u64                       hold_time        = now - lock_val->timestamp;
+  struct thread_mutex_key_t tm_key           = {};
+  tm_key.mtx                                 = mtx;
+  tm_key.tid                                 = pid;
+  tm_key.lock_stack_id                       = lock_val->stack_id;
+  struct thread_mutex_val_t *existing_tm_val = locks.lookup(&tm_key);
+
+  if (existing_tm_val == NULL) {
+    return 0; // Couldn't find this record
+  }
+
+  existing_tm_val->lock_time_ns += hold_time;
+  u64 slot                       = bpf_log2l(hold_time / 1000);
+  mutex_lock_hist.increment(slot);
+  lock_end.delete(&lock_key);
+  return 0;
+}
diff --git a/tools/ebpf/trylock-stats/trylock-stats.py 
b/tools/ebpf/trylock-stats/trylock-stats.py
new file mode 100755
index 0000000000..b16135f420
--- /dev/null
+++ b/tools/ebpf/trylock-stats/trylock-stats.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+#
+#  bcc tool to observe pthread mutex trylock
+#
+#  Based on 
https://github.com/goldshtn/linux-tracing-workshop/blob/master/lockstat-solution.py
+#
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import os
+import subprocess
+import sys
+import itertools
+from time import sleep
+from bcc import BPF
+from argparse import ArgumentParser
+
+BPF_SRC_FILE = "./trylock-stats.bpf.c"
+
+
+def glibc_version(glibc_path):
+    major = 0
+    minor = 0
+
+    result = subprocess.run([glibc_path], capture_output=True, text=True)
+
+    if result:
+        version_str = result.stdout.splitlines()[0].split()[-1]
+
+        version = version_str.split(".")
+        major = int(version[0])
+        minor = int(version[1])
+
+    return (major, minor)
+
+
+def attach(bpf, pid, glibc_path):
+    libname = "pthread"
+
+    # glibc removed "libpthread" from 2.34
+    (major, minor) = glibc_version(glibc_path)
+    if major >= 2 and minor >= 34:
+        libname = "c"
+
+    bpf.attach_uprobe(name=libname, sym="pthread_mutex_trylock", 
fn_name="probe_mutex_lock", pid=pid)
+    bpf.attach_uretprobe(name=libname, sym="pthread_mutex_trylock", 
fn_name="probe_mutex_trylock_return", pid=pid)
+    bpf.attach_uprobe(name=libname, sym="pthread_mutex_unlock", 
fn_name="probe_mutex_unlock", pid=pid)
+
+
+def print_frame(bpf, pid, addr):
+    print("\t\t%16s (%x)" % (bpf.sym(addr, pid, show_module=True, 
show_offset=True), addr))
+
+
+def print_stack(bpf, pid, stacks, stack_id):
+    for addr in stacks.walk(stack_id):
+        print_frame(bpf, pid, addr)
+
+
+def run(args):
+    pid = args.pid
+    bpf = BPF(src_file=BPF_SRC_FILE)
+    attach(bpf, pid, args.glibc_path)
+
+    init_stacks = bpf["init_stacks"]
+    stacks = bpf["stacks"]
+    locks = bpf["locks"]
+    mutex_lock_hist = bpf["mutex_lock_hist"]
+    mutex_wait_hist = bpf["mutex_wait_hist"]
+
+    sleep(args.duration)
+
+    mutex_ids = {}
+    next_mutex_id = 1
+    for k, v in init_stacks.items():
+        mutex_id = "#%d" % next_mutex_id
+        next_mutex_id += 1
+        mutex_ids[k.value] = mutex_id
+        print("init stack for mutex %x (%s)" % (k.value, mutex_id))
+        print_stack(bpf, pid, stacks, v.value)
+        print("")
+
+    grouper = lambda kv: kv[0].tid
+    sorted_by_thread = sorted(locks.items(), key=grouper)
+    locks_by_thread = itertools.groupby(sorted_by_thread, grouper)
+
+    for tid, items in locks_by_thread:
+        print("thread %d" % tid)
+
+        for k, v in sorted(items, key=lambda kv: -kv[1].fail_count):
+            mutex_descr = mutex_ids[k.mtx] if k.mtx in mutex_ids else 
bpf.sym(k.mtx, pid)
+            print(
+                "\tmutex %s ::: wait time %.2fus ::: hold time %.2fus ::: 
enter count %d ::: try-lock failure count %d" %
+                (mutex_descr, v.wait_time_ns / 1000.0, v.lock_time_ns / 
1000.0, v.enter_count, v.fail_count))
+            print_stack(bpf, pid, stacks, k.lock_stack_id)
+            print("")
+
+    mutex_wait_hist.print_log2_hist(val_type="wait time (us)")
+    mutex_lock_hist.print_log2_hist(val_type="hold time (us)")
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("-p", "--pid", dest="pid", help="process id", 
type=int, required=True)
+    parser.add_argument("-d", "--duration", dest="duration", help="duration to 
run", default=10, type=float)
+    parser.add_argument("-l", "--glibc", dest="glibc_path", help="path to the 
glibc", default="/lib64/libc.so.6")
+
+    run(parser.parse_args())

Reply via email to