Hi,

We're seeing occasional lockups on an embedded board (running an -rt
kernel), which I believe I've tracked down to the

            if (!rtnl_trylock())
                    return restart_syscall();

in net/bridge/br_sysfs_br.c. The problem is that some SCHED_FIFO task
writes a "1" to the /sys/class/net/foo/bridge/flush file, while some
lower-priority SCHED_FIFO task happens to hold rtnl_lock(). When that
happens, the higher-priority task is stuck in an eternal ERESTARTNOINTR
loop, and the lower-priority task never gets runtime and thus cannot
release the lock.

I've written a script that rather quickly reproduces this both on our
target and my desktop machine (pinning everything on one CPU to emulate
the uni-processor board), see below. Also, with this hacky patch

diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 0318a69888d4..df8078c023d2 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -36,6 +36,7 @@ static ssize_t store_bridge_parm(struct device *d,
        char *endp;
        unsigned long val;
        int err;
+       static unsigned int restarts;

        if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN))
                return -EPERM;
@@ -44,8 +45,14 @@ static ssize_t store_bridge_parm(struct device *d,
        if (endp == buf)
                return -EINVAL;

-       if (!rtnl_trylock())
-               return restart_syscall();
+       if (!rtnl_trylock()) {
+               restarts++;
+               if (restarts < 100)
+                       return restart_syscall();
+               pr_err("too many restarts, doing unconditional
rtnl_lock()\n");
+               rtnl_lock();
+       }
+       restarts = 0;

        err = (*set)(br, val);
        if (!err)

priority inheritance kicks in and boosts the lower-prio thread so the
lockup doesn't happen. But I'm failing to come up with a proper solution.

Thoughts?

Thanks,
Rasmus

Reproducer:

#!/bin/bash

dev=br-test

flusher() {
    # $$ doesn't work as expected in subshells
    read -r pid _ < /proc/self/stat
    echo "flusher: PID $pid"
    chrt -f -p 20 $pid
    while true ; do
        echo 1 > /sys/class/net/${dev}/bridge/flush
        sleep .15
    done
    exit 0
}

worker() {
    read -r pid _ < /proc/self/stat
    echo "worker: PID $pid"
    chrt -f -p 10 $pid
    while true ; do
        read -n 1 -u 12
        ip addr add 200.201.202.203/24 dev ${dev}
        ip addr del 200.201.202.203/24 dev ${dev}
        echo -n . >&21
    done
    exit 0
}

taskset -p 1 $$
chrt -f -p 30 $$

tmpdir=$(mktemp -d)
mkfifo ${tmpdir}/a
mkfifo ${tmpdir}/b

exec 12<> ${tmpdir}/a
exec 21<> ${tmpdir}/b

ip link add name $dev type bridge

( flusher ) &
flusher_pid=$!
( worker ) &
worker_pid=$!

sleep .1
printf '\n'

count=0
while ! [ -e /tmp/stop ] && [ $count -lt 1000 ]; do
    echo -n . >&12
    read -n 1 -u 21 -t 10
    if [ $? -gt 0 ] ; then
        printf '\nlockup?!\n'
        sleep 20
        break
    fi
    count=$((count+1))
    printf '\r%4d' $count
    sleep .02
done

kill $flusher_pid
kill $worker_pid

wait

rm -rf $tmpdir
ip link del $dev type bridge


Reply via email to