Hi, We're seeing occasional lockups on an embedded board (running an -rt kernel), which I believe I've tracked down to the
if (!rtnl_trylock()) return restart_syscall(); in net/bridge/br_sysfs_br.c. The problem is that some SCHED_FIFO task writes a "1" to the /sys/class/net/foo/bridge/flush file, while some lower-priority SCHED_FIFO task happens to hold rtnl_lock(). When that happens, the higher-priority task is stuck in an eternal ERESTARTNOINTR loop, and the lower-priority task never gets runtime and thus cannot release the lock. I've written a script that rather quickly reproduces this both on our target and my desktop machine (pinning everything on one CPU to emulate the uni-processor board), see below. Also, with this hacky patch diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 0318a69888d4..df8078c023d2 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -36,6 +36,7 @@ static ssize_t store_bridge_parm(struct device *d, char *endp; unsigned long val; int err; + static unsigned int restarts; if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -44,8 +45,14 @@ static ssize_t store_bridge_parm(struct device *d, if (endp == buf) return -EINVAL; - if (!rtnl_trylock()) - return restart_syscall(); + if (!rtnl_trylock()) { + restarts++; + if (restarts < 100) + return restart_syscall(); + pr_err("too many restarts, doing unconditional rtnl_lock()\n"); + rtnl_lock(); + } + restarts = 0; err = (*set)(br, val); if (!err) priority inheritance kicks in and boosts the lower-prio thread so the lockup doesn't happen. But I'm failing to come up with a proper solution. Thoughts? Thanks, Rasmus Reproducer: #!/bin/bash dev=br-test flusher() { # $$ doesn't work as expected in subshells read -r pid _ < /proc/self/stat echo "flusher: PID $pid" chrt -f -p 20 $pid while true ; do echo 1 > /sys/class/net/${dev}/bridge/flush sleep .15 done exit 0 } worker() { read -r pid _ < /proc/self/stat echo "worker: PID $pid" chrt -f -p 10 $pid while true ; do read -n 1 -u 12 ip addr add 200.201.202.203/24 dev ${dev} ip addr del 200.201.202.203/24 dev ${dev} echo -n . >&21 done exit 0 } taskset -p 1 $$ chrt -f -p 30 $$ tmpdir=$(mktemp -d) mkfifo ${tmpdir}/a mkfifo ${tmpdir}/b exec 12<> ${tmpdir}/a exec 21<> ${tmpdir}/b ip link add name $dev type bridge ( flusher ) & flusher_pid=$! ( worker ) & worker_pid=$! sleep .1 printf '\n' count=0 while ! [ -e /tmp/stop ] && [ $count -lt 1000 ]; do echo -n . >&12 read -n 1 -u 21 -t 10 if [ $? -gt 0 ] ; then printf '\nlockup?!\n' sleep 20 break fi count=$((count+1)) printf '\r%4d' $count sleep .02 done kill $flusher_pid kill $worker_pid wait rm -rf $tmpdir ip link del $dev type bridge