On 5/16/16, Peter Zijlstra <pet...@infradead.org> wrote: > On Mon, May 16, 2016 at 07:42:35PM +0200, Sedat Dilek wrote: > >> Unfortunately, I could not reproduce this again with none of my >> 183-kernels. >> When I first hit a "chain_key collision" issue, it was hard to redproduce, >> so. >> Any idea, how I can "force" this? > > Nope; I wish I knew, that'd be so much easier to work with :/ > > I'm hoping someone will report a reproducer, even something that > triggers once every 5-10 runs would be awesome. > > In any case, like I've explained before, nothing regressed as such, we > only added this new warning under DEBUG_LOCKDEP because we want to > better understand the condition that triggers it. > > If it bothers you, just turn off DEBUG_LOCKDEP and know that your kernel > is as reliable as it was before. OTOH, if you do keep it on, please > let me know if you can (semi) reliably trigger this, as I'd really like > to have a better understanding. >
OK, I keep checking my logs. I refreshed your patch Ingo pointed me to. But it fails like this (on top of Linux v4.6 final)... [...] if [ "" = "-pg" ]; then if [ kernel/locking/mutex-debug.o != "scripts/mod/empty.o" ]; then ./scripts/recordmcount "kernel/locking/mutex-debug.o"; fi; fi; mycompiler -Wp,-MD,kernel/locking/.lockdep.o.d -nostdinc -isystem /usr/lib/gcc/x86_64-linux-gnu/4.9/include -nostdinc -isystem /usr/lib/gcc/x86_64-linux-gnu/4.9/include -I./arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -Iinclude -I./arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I./include/uapi -Iinclude/generated/uapi -include ./include/linux/kconfig.h -D__KERNEL__ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Werror-implicit-function-declaration -Wno-format-security -std=gnu89 -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -falign-jumps=1 -falign-loops=1 -mno-80387 -mno-fp-ret-in-387 -mpreferred-stack-boundary=3 -mtune=generic -mno-red-zone -mcmodel=kernel -funit-at-a-time -maccumulate-outgoing-args -DCONFIG_X86_X32_ABI -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1 -DCONFIG_AS_CFI_SECTIONS=1 -DCONFIG_AS_FXSAVEQ=1 -DCONFIG_AS_SSSE3=1 -DCONFIG_AS_CRC32=1 -DCONFIG_AS_AVX=1 -DCONFIG_AS_AVX2=1 -pipe -Wno-sign-compare -fno-asynchronous-unwind-tables -fno-delete-null-pointer-checks -O2 --param=allow-store-data-races=0 -Wframe-larger-than=1024 -fno-stack-protector -Wno-unused-but-set-variable -fno-omit-frame-pointer -fno-optimize-sibling-calls -fno-var-tracking-assignments -mfentry -DCC_USING_FENTRY -Wdeclaration-after-statement -Wno-pointer-sign -fno-strict-overflow -fconserve-stack -Werror=implicit-int -Werror=strict-prototypes -Werror=date-time -DCC_HAVE_ASM_GOTO -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(lockdep)" -D"KBUILD_MODNAME=KBUILD_STR(lockdep)" -c -o kernel/locking/.tmp_lockdep.o kernel/locking/lockdep.c kernel/locking/lockdep.c: In function 'print_chain_keys_held_locks': kernel/locking/lockdep.c:2034:2: error: too few arguments to function 'print_chain_key_iteration' print_chain_key_iteration(hlock_next->class_idx, chain_key); ^ kernel/locking/lockdep.c:2006:12: note: declared here static u64 print_chain_key_iteration(int class_idx, u64 chain_key, u64 prev_key) ^ make[4]: *** [kernel/locking/lockdep.o] Error 1 make[3]: *** [kernel/locking] Error 2 make[2]: *** [kernel] Error 2 [...] - Sedat -
From b953be255bfb46970c75950e297be836577bc525 Mon Sep 17 00:00:00 2001 From: Sedat Dilek <sedat.di...@gmail.com> Date: Mon, 16 May 2016 15:51:04 +0200 Subject: [PATCH] locking/lockdep: Some more additional chain_key collision information From: Peter Zijlstra <pet...@infradead.org> For more details see thread "[v4.6-rc7-183-g1410b74e4061]" at LKML [1]. Patch for testing from Peter Zijlstra see [2] and [3]. [1] http://marc.info/?t=146321784300002&r=1&w=2 [2] http://marc.info/?l=linux-kernel&m=146339587506110&w=2 [3] https://lkml.org/lkml/2016/5/10/214 Cc: Wanpeng Li <wanpeng...@linux.intel.com> Cc: Alfredo Alvarez Fernandez <alfredoalvarezfernan...@gmail.com> Cc: Peter Zijlstra (Intel) <pet...@infradead.org> Cc: Linus Torvalds <torva...@linux-foundation.org> Cc: Thomas Gleixner <t...@linutronix.de> Cc: Ingo Molnar <mi...@kernel.org> Cc: Sedat Dilek <sedat.di...@gmail.com> Cc: Ted Tso <ty...@mit.edu> Cc: LKML <linux-ker...@vger.kernel.org> Cc: the arch/x86 maintainers <x...@kernel.org> Cc: linux-fsde...@vger.kernel.org --- kernel/locking/lockdep.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 78c1c0ee6dc1..5dc21eb101b0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2003,13 +2003,14 @@ static inline int get_first_held_lock(struct task_struct *curr, /* * Returns the next chain_key iteration */ -static u64 print_chain_key_iteration(int class_idx, u64 chain_key) +static u64 print_chain_key_iteration(int class_idx, u64 chain_key, u64 prev_key) { u64 new_chain_key = iterate_chain_key(chain_key, class_idx); - printk(" class_idx:%d -> chain_key:%016Lx", + printk(" class_idx: %d -> chain_key: %016Lx (%016Lx)", class_idx, - (unsigned long long)new_chain_key); + (unsigned long long)new_chain_key, + (unsigned long long)prev_key); return new_chain_key; } @@ -2024,7 +2025,8 @@ print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_ne printk("depth: %u\n", depth + 1); for (i = get_first_held_lock(curr, hlock_next); i < depth; i++) { hlock = curr->held_locks + i; - chain_key = print_chain_key_iteration(hlock->class_idx, chain_key); + chain_key = print_chain_key_iteration(hlock->class_idx, chain_key, + hlock->prev_chain_key); print_lock(hlock); } @@ -2042,7 +2044,7 @@ static void print_chain_keys_chain(struct lock_chain *chain) printk("depth: %u\n", chain->depth); for (i = 0; i < chain->depth; i++) { class_id = chain_hlocks[chain->base + i]; - chain_key = print_chain_key_iteration(class_id + 1, chain_key); + chain_key = print_chain_key_iteration(class_id + 1, chain_key, 0); print_lock_name(lock_classes + class_id); printk("\n"); @@ -2051,7 +2053,8 @@ static void print_chain_keys_chain(struct lock_chain *chain) static void print_collision(struct task_struct *curr, struct held_lock *hlock_next, - struct lock_chain *chain) + struct lock_chain *chain, + u64 chain_key) { printk("\n"); printk("======================\n"); @@ -2060,11 +2063,14 @@ static void print_collision(struct task_struct *curr, printk("----------------------\n"); printk("%s/%d: ", current->comm, task_pid_nr(current)); printk("Hash chain already cached but the contents don't match!\n"); + printk("chain->chain_key: %016Lx chain_key: %016Lx\n", + (unsigned long long)chain->chain_key, + (unsigned long long)chain_key); - printk("Held locks:"); + printk("Held locks: "); print_chain_keys_held_locks(curr, hlock_next); - printk("Locks in cached chain:"); + printk("Locks in cached chain: "); print_chain_keys_chain(chain); printk("\nstack backtrace:\n"); @@ -2080,7 +2086,8 @@ static void print_collision(struct task_struct *curr, */ static int check_no_collision(struct task_struct *curr, struct held_lock *hlock, - struct lock_chain *chain) + struct lock_chain *chain, + u64 chain_key) { #ifdef CONFIG_DEBUG_LOCKDEP int i, j, id; @@ -2088,7 +2095,7 @@ static int check_no_collision(struct task_struct *curr, i = get_first_held_lock(curr, hlock); if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1))) { - print_collision(curr, hlock, chain); + print_collision(curr, hlock, chain, chain_key); return 0; } @@ -2096,7 +2103,7 @@ static int check_no_collision(struct task_struct *curr, id = curr->held_locks[i].class_idx - 1; if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) { - print_collision(curr, hlock, chain); + print_collision(curr, hlock, chain, chain_key); return 0; } } @@ -2134,7 +2141,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, if (chain->chain_key == chain_key) { cache_hit: debug_atomic_inc(chain_lookup_hits); - if (!check_no_collision(curr, hlock, chain)) + if (!check_no_collision(curr, hlock, chain, chain_key)) return 0; if (very_verbose(class)) -- 2.8.2