bpfilter.ko module can be removed while functions of bpfilter.ko are executing. so panic can occurred. in order to protect that, locks can be used. a bpfilter_lock protects routines in the __bpfilter_process_sockopt() but it's not enough because __exit routine can be executed concurrently.
test commands: while : do iptables -I FORWARD -m string --string ap --algo kmp & modprobe -rv bpfilter & done splat looks like: [ 1074.741758] BUG: unable to handle kernel paging request at fffffbfff805540b [ 1074.741864] PGD 124327067 P4D 124327067 PUD 11c1a3067 PMD 119eaf067 PTE 0 [ 1074.755849] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 1074.755849] CPU: 0 PID: 11561 Comm: iptables Not tainted 4.20.0-rc6+ #62 [ 1074.763907] RIP: 0010:__mutex_lock+0x6b9/0x16a0 [ 1074.763907] Code: d2 00 00 e8 d9 82 ff ff 80 bd 8f fc ff ff 00 0f 85 d9 05 00 00 48 8b 85 80 fc ff ff 48 bf 00 00 00 00 00 fc ff df 48 c1 e8 03 <80> 3c 38 00 0f 85 1d 0e 00 00 48 8b 85 c8 fc ff ff 49 39 47 58 c6 [ 1074.763907] RSP: 0018:ffff88810286f7a0 EFLAGS: 00010202 [ 1074.763907] RAX: 1ffffffff805540b RBX: ffff888112760040 RCX: 0000000000000000 [ 1074.763907] RDX: 1ffff110235ff806 RSI: ffff8881127607f8 RDI: dffffc0000000000 [ 1074.763907] RBP: ffff88810286fb30 R08: 0000000080000002 R09: 0000000000000000 [ 1074.763907] R10: 0000000000000000 R11: 0000000000000000 R12: fffffbfff5288220 [ 1074.763907] R13: ffff888112760040 R14: ffff88810d421a05 R15: ffffffffc02aa000 [ 1074.763907] FS: 00007fd35394e700(0000) GS:ffff88811ae00000(0000) knlGS:0000000000000000 [ 1074.763907] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1074.763907] CR2: fffffbfff805540b CR3: 000000010d182000 CR4: 00000000001006f0 [ 1074.763907] Call Trace: [ 1074.763907] ? mutex_lock_io_nested+0x1560/0x1560 [ 1074.763907] ? kasan_kmalloc+0xa0/0xd0 [ 1074.763907] ? kmem_cache_alloc+0x1c2/0x260 [ 1074.763907] ? __alloc_file+0x92/0x3c0 [ 1074.763907] ? alloc_empty_file+0x43/0x120 [ 1074.763907] ? alloc_file_pseudo+0x220/0x330 [ 1074.763907] ? sock_alloc_file+0x39/0x160 [ 1074.763907] ? __sys_socket+0x113/0x1d0 [ 1074.763907] ? __x64_sys_socket+0x6f/0xb0 [ 1074.763907] ? do_syscall_64+0x138/0x560 [ 1074.763907] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1074.763907] ? trace_hardirqs_on_caller+0x9a/0x220 [ 1074.763907] ? trace_hardirqs_off_caller+0x220/0x220 [ 1074.763907] ? __alloc_file+0x92/0x3c0 [ 1074.763907] ? __alloc_file+0x92/0x3c0 [ 1074.763907] ? __alloc_file+0x92/0x3c0 [ 1074.763907] ? cyc2ns_read_end+0x10/0x10 [ 1074.763907] ? cyc2ns_read_end+0x10/0x10 [ 1074.763907] ? hlock_class+0x140/0x140 [ 1074.763907] ? sched_clock_local+0xd4/0x140 [ 1074.763907] ? sched_clock_local+0xd4/0x140 [ 1074.763907] ? check_flags.part.35+0x440/0x440 [ 1074.763907] ? __lock_acquire+0x4f90/0x4f90 [ 1074.763907] ? perf_trace_sched_switch+0xee0/0xee0 [ 1074.763907] ? __fget_light+0xb3/0x360 [ 1074.763907] ? fget_raw+0x10/0x10 [ 1074.763907] ? bpfilter_ip_get_sockopt+0x30/0x60 [ 1074.763907] ? ip_getsockopt+0xc7/0x1a0 [ ... ] Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module") Signed-off-by: Taehee Yoo <ap420...@gmail.com> --- include/linux/bpfilter.h | 16 ++++++++++---- net/bpfilter/bpfilter_kern.c | 27 ++++++++--------------- net/ipv4/bpfilter/sockopt.c | 42 ++++++++++++++++++++++++------------ 3 files changed, 49 insertions(+), 36 deletions(-) diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h index 3039361cd65a..87afd0d2b9bf 100644 --- a/include/linux/bpfilter.h +++ b/include/linux/bpfilter.h @@ -9,9 +9,17 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen); int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen); -extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname, - char __user *optval, - unsigned int optlen, bool is_set); -extern int (*bpfilter_start_umh)(void); + +struct bpfilter_umh_ops { + int (*process_sockopt)(struct sock *sk, int optname, + char __user *optval, + unsigned int optlen, bool is_set); + int (*start_umh)(void); + /* + * since ip_getsockopt() can run in parallel, serialize access to umh. + */ + struct mutex mutex; +}; +extern struct bpfilter_umh_ops bpfilter_ops; #endif diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 7397f5e8da2f..a868c9b2e9c7 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -14,8 +14,6 @@ extern char bpfilter_umh_start; extern char bpfilter_umh_end; static struct umh_info info; -/* since ip_getsockopt() can run in parallel, serialize access to umh */ -static DEFINE_MUTEX(bpfilter_lock); static void shutdown_umh(struct umh_info *info) { @@ -33,21 +31,14 @@ static void shutdown_umh(struct umh_info *info) info->pid = 0; } -static void __stop_umh(void) +static void stop_umh(void) { if (IS_ENABLED(CONFIG_INET)) { - bpfilter_process_sockopt = NULL; + bpfilter_ops.process_sockopt = NULL; shutdown_umh(&info); } } -static void stop_umh(void) -{ - mutex_lock(&bpfilter_lock); - __stop_umh(); - mutex_unlock(&bpfilter_lock); -} - static int __bpfilter_process_sockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen, bool is_set) @@ -63,13 +54,12 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.cmd = optname; req.addr = (long __force __user)optval; req.len = optlen; - mutex_lock(&bpfilter_lock); if (!info.pid) goto out; n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos); if (n != sizeof(req)) { pr_err("write fail %zd\n", n); - __stop_umh(); + stop_umh(); ret = -EFAULT; goto out; } @@ -77,13 +67,12 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos); if (n != sizeof(reply)) { pr_err("read fail %zd\n", n); - __stop_umh(); + stop_umh(); ret = -EFAULT; goto out; } ret = reply.status; out: - mutex_unlock(&bpfilter_lock); return ret; } @@ -106,7 +95,7 @@ int start_umh(void) return -EFAULT; } if (IS_ENABLED(CONFIG_INET)) - bpfilter_process_sockopt = &__bpfilter_process_sockopt; + bpfilter_ops.process_sockopt = &__bpfilter_process_sockopt; return 0; } @@ -114,16 +103,18 @@ int start_umh(void) static int __init load_umh(void) { if (IS_ENABLED(CONFIG_INET)) - bpfilter_start_umh = &start_umh; + bpfilter_ops.start_umh = &start_umh; return start_umh(); } static void __exit fini_umh(void) { + mutex_lock(&bpfilter_ops.mutex); if (IS_ENABLED(CONFIG_INET)) - bpfilter_start_umh = NULL; + bpfilter_ops.start_umh = NULL; stop_umh(); + mutex_unlock(&bpfilter_ops.mutex); } module_init(load_umh); module_exit(fini_umh); diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index f7efcff9634d..4e377a9788ca 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -4,29 +4,35 @@ #include <uapi/linux/bpf.h> #include <linux/wait.h> #include <linux/kmod.h> +#include <linux/module.h> -int (*bpfilter_process_sockopt)(struct sock *sk, int optname, - char __user *optval, - unsigned int optlen, bool is_set); -EXPORT_SYMBOL_GPL(bpfilter_process_sockopt); - -int (*bpfilter_start_umh)(void); -EXPORT_SYMBOL_GPL(bpfilter_start_umh); +struct bpfilter_umh_ops bpfilter_ops; +EXPORT_SYMBOL_GPL(bpfilter_ops); static int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval, unsigned int optlen, bool is_set) { - if (!bpfilter_process_sockopt) { - int err = request_module("bpfilter"); + int err; + mutex_lock(&bpfilter_ops.mutex); + if (!bpfilter_ops.process_sockopt) { + err = request_module("bpfilter"); if (err) - return err; - if (!bpfilter_process_sockopt) - if (!bpfilter_start_umh || bpfilter_start_umh()) - return -ECHILD; + goto unlock; + + if (!bpfilter_ops.process_sockopt) { + if (!bpfilter_ops.start_umh || + bpfilter_ops.start_umh()) { + err = -ECHILD; + goto unlock; + } + } } - return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set); + err = bpfilter_ops.process_sockopt(sk, optname, optval, optlen, is_set); +unlock: + mutex_unlock(&bpfilter_ops.mutex); + return err; } int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, @@ -45,3 +51,11 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, return bpfilter_mbox_request(sk, optname, optval, len, false); } + +static int __init init_bpfilter_sockopt(void) +{ + mutex_init(&bpfilter_ops.mutex); + return 0; +} + +module_init(init_bpfilter_sockopt); -- 2.17.1