On 18/5/26 23:40, [email protected] wrote: >> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c >> index 83de8fb9b9aa..322865a88b3a 100644 >> --- a/kernel/bpf/syscall.c >> +++ b/kernel/bpf/syscall.c >> @@ -1647,11 +1647,18 @@ static int map_create(union bpf_attr *attr, bpfptr_t >> uattr, struct bpf_common_at >> >> /* preserve original error even if log finalization is successful */ >> ret = bpf_log_attr_finalize(&attr_log, log); >> - if (ret) { >> - if (err >= 0) >> - close_fd(err); >> + if (ret && err < 0) >> + /* >> + * Failed to finalize the log. >> + * Should not close_fd(err) here. Since the bpf_map_new_fd() >> + * has published the map fd, if a concurrent thread closes the >> + * fd, then opens new, unrelated file that receives the exact >> + * same fd number, close_fd(err) might inadvertently close the >> + * unrelated file. >> + * As a trade-off, override the err only when failed to finalize >> + * the log and failed to create map. >> + */ >> err = ret; > > In the v13 review thread, Alexei Starovoitov suggested moving > bpf_log_attr_finalize() earlier in the flow to eliminate the race > window entirely: > > "bot is correct. Let's avoid these races. Pls move > bpf_log_attr_finalize() into map_create and do it before > security_bpf_map_create." > > > https://lore.kernel.org/bpf/CAADnVQ+XR3kyqizGgGhtG5xiBu3oK3O+COUMfPDQDBSJbm=5...@mail.gmail.com/ > > Does the current approach fully address the race concern? While removing > close_fd() prevents the specific file descriptor hijacking issue, the > suggestion was to finalize the log before security_bpf_map_create(), > which would avoid creating the race window in the first place. > > Is there a reason why the log finalization cannot be moved earlier as > Alexei suggested? > I've tried his suggestion, attached below.
However, I think there should be a simpler approach to fix it. I preferred this simple one, so I posted it. Should I follow the suggestion instead of this trade-off approach? Thanks, Leon --- >From 7ea7ab53d24b7cf34bd2775ef5c871aa312d07c6 Mon Sep 17 00:00:00 2001 From: Leon Hwang <[email protected]> Date: Tue, 19 May 2026 10:32:01 +0800 Subject: [PATCH bpf-next 2/5] bpf: Fix concurrent regression in map_create() Because there is time gap between bpf_map_new_fd() and close_fd(), a concurrent thread is able to close the new fd and opens a new, unrelated file with the exact same fd number. Thereafter, this close_fd() might inadvertently close the unrelated file. To avoid such regression, add bpf_log_attr_finalize() before security_bpf_map_create() to report log_true_size on check-success path. While keep the original bpf_log_attr_finalize() and avoid finalizing bpf_log_attr twice, guard the finalization using attr_log->finalized. Fixes: 49f9b2b2a18c ("bpf: Add syscall common attributes support for map_create") Signed-off-by: Leon Hwang <[email protected]> --- include/linux/bpf_verifier.h | 1 + kernel/bpf/log.c | 4 ++++ kernel/bpf/syscall.c | 21 ++++++++++++++------- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 20c421b43849..a10ef58bb6ea 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -788,6 +788,7 @@ struct bpf_log_attr { u32 level; u32 offsetof_true_size; bpfptr_t uattr; + bool finalized; }; int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level, diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 62fe6ed18374..71e3035515cd 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -894,6 +894,10 @@ int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *lo u32 log_true_size; int err; + if (attr->finalized) + return 0; + attr->finalized = true; + err = bpf_vlog_finalize(log, &log_true_size); if (attr->offsetof_true_size && diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 83de8fb9b9aa..c117e2286cf0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1359,7 +1359,8 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size /* called via syscall */ -static int __map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_verifier_log *log) +static int __map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_verifier_log *log, + struct bpf_log_attr *attr_log) { const struct bpf_map_ops *ops; struct bpf_token *token = NULL; @@ -1598,6 +1599,10 @@ static int __map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_verifie goto free_map; } + err = bpf_log_attr_finalize(attr_log, log); + if (err) + goto free_map; + err = security_bpf_map_create(map, attr, token, uattr.is_kernel); if (err) goto free_map_sec; @@ -1643,15 +1648,17 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_common_at if (IS_ERR(log)) return PTR_ERR(log); - err = __map_create(attr, uattr, log); + err = __map_create(attr, uattr, log, &attr_log); - /* preserve original error even if log finalization is successful */ + /* preserve original error even if log finalization is + successful */ + /* + * No duplicated finalization here because of attr_log->finalized + * guard in bpf_log_attr_finalize(). + */ ret = bpf_log_attr_finalize(&attr_log, log); - if (ret) { - if (err >= 0) - close_fd(err); + if (ret) err = ret; - } kfree(log); return err; -- 2.54.0

