On 18/5/26 23:40, [email protected] wrote:
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 83de8fb9b9aa..322865a88b3a 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -1647,11 +1647,18 @@ static int map_create(union bpf_attr *attr, bpfptr_t 
>> uattr, struct bpf_common_at
>>
>>      /* preserve original error even if log finalization is successful */
>>      ret = bpf_log_attr_finalize(&attr_log, log);
>> -    if (ret) {
>> -            if (err >= 0)
>> -                    close_fd(err);
>> +    if (ret && err < 0)
>> +            /*
>> +             * Failed to finalize the log.
>> +             * Should not close_fd(err) here. Since the bpf_map_new_fd()
>> +             * has published the map fd, if a concurrent thread closes the
>> +             * fd, then opens new, unrelated file that receives the exact
>> +             * same fd number, close_fd(err) might inadvertently close the
>> +             * unrelated file.
>> +             * As a trade-off, override the err only when failed to finalize
>> +             * the log and failed to create map.
>> +             */
>>              err = ret;
> 
> In the v13 review thread, Alexei Starovoitov suggested moving
> bpf_log_attr_finalize() earlier in the flow to eliminate the race
> window entirely:
> 
>   "bot is correct. Let's avoid these races. Pls move
>    bpf_log_attr_finalize() into map_create and do it before
>    security_bpf_map_create."
> 
>   
> https://lore.kernel.org/bpf/CAADnVQ+XR3kyqizGgGhtG5xiBu3oK3O+COUMfPDQDBSJbm=5...@mail.gmail.com/
> 
> Does the current approach fully address the race concern? While removing
> close_fd() prevents the specific file descriptor hijacking issue, the
> suggestion was to finalize the log before security_bpf_map_create(),
> which would avoid creating the race window in the first place.
> 
> Is there a reason why the log finalization cannot be moved earlier as
> Alexei suggested?
> 
I've tried his suggestion, attached below.

However, I think there should be a simpler approach to fix it.

I preferred this simple one, so I posted it.

Should I follow the suggestion instead of this trade-off approach?

Thanks,
Leon

---

>From 7ea7ab53d24b7cf34bd2775ef5c871aa312d07c6 Mon Sep 17 00:00:00 2001
From: Leon Hwang <[email protected]>
Date: Tue, 19 May 2026 10:32:01 +0800
Subject: [PATCH bpf-next 2/5] bpf: Fix concurrent regression in map_create()

Because there is time gap between bpf_map_new_fd() and close_fd(), a
concurrent thread is able to close the new fd and opens a new, unrelated
file with the exact same fd number. Thereafter, this close_fd() might
inadvertently close the unrelated file.

To avoid such regression, add bpf_log_attr_finalize() before
security_bpf_map_create() to report log_true_size on check-success path.

While keep the original bpf_log_attr_finalize() and avoid finalizing
bpf_log_attr twice, guard the finalization using attr_log->finalized.

Fixes: 49f9b2b2a18c ("bpf: Add syscall common attributes support for
map_create")
Signed-off-by: Leon Hwang <[email protected]>
---
 include/linux/bpf_verifier.h |  1 +
 kernel/bpf/log.c             |  4 ++++
 kernel/bpf/syscall.c         | 21 ++++++++++++++-------
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 20c421b43849..a10ef58bb6ea 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -788,6 +788,7 @@ struct bpf_log_attr {
        u32 level;
        u32 offsetof_true_size;
        bpfptr_t uattr;
+       bool finalized;
 };

 int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32
log_size, u32 log_level,
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 62fe6ed18374..71e3035515cd 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -894,6 +894,10 @@ int bpf_log_attr_finalize(struct bpf_log_attr
*attr, struct bpf_verifier_log *lo
        u32 log_true_size;
        int err;

+       if (attr->finalized)
+               return 0;
+       attr->finalized = true;
+
        err = bpf_vlog_finalize(log, &log_true_size);

        if (attr->offsetof_true_size &&
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 83de8fb9b9aa..c117e2286cf0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1359,7 +1359,8 @@ static int map_check_btf(struct bpf_map *map,
struct bpf_token *token,

 #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size
 /* called via syscall */
-static int __map_create(union bpf_attr *attr, bpfptr_t uattr, struct
bpf_verifier_log *log)
+static int __map_create(union bpf_attr *attr, bpfptr_t uattr, struct
bpf_verifier_log *log,
+                       struct bpf_log_attr *attr_log)
 {
        const struct bpf_map_ops *ops;
        struct bpf_token *token = NULL;
@@ -1598,6 +1599,10 @@ static int __map_create(union bpf_attr *attr,
bpfptr_t uattr, struct bpf_verifie
                goto free_map;
        }

+       err = bpf_log_attr_finalize(attr_log, log);
+       if (err)
+               goto free_map;
+
        err = security_bpf_map_create(map, attr, token, uattr.is_kernel);
        if (err)
                goto free_map_sec;
@@ -1643,15 +1648,17 @@ static int map_create(union bpf_attr *attr,
bpfptr_t uattr, struct bpf_common_at
        if (IS_ERR(log))
                return PTR_ERR(log);

-       err = __map_create(attr, uattr, log);
+       err = __map_create(attr, uattr, log, &attr_log);

-       /* preserve original error even if log finalization is successful */
+       /* preserve original error even if log finalization is
+       successful */
+       /*
+        * No duplicated finalization here because of attr_log->finalized
+        * guard in bpf_log_attr_finalize().
+        */
        ret = bpf_log_attr_finalize(&attr_log, log);
-       if (ret) {
-               if (err >= 0)
-                       close_fd(err);
+       if (ret)
                err = ret;
-       }

        kfree(log);
        return err;
-- 
2.54.0



Reply via email to