From: Kaitao Cheng <[email protected]>

The issue only becomes exposed once bpf_list_del() is available: callers
can pass an arbitrary bpf_list_head and bpf_list_node pair, including
nodes that are not actually linked to the supplied head, or nodes that
outlive their original head after refcount-based retention.  This was
not practically reachable for callers restricted to pop-style helpers
alone; bpf_list_del() widens the API surface.

A failure mode appears when bpf_list_head_free() runs while a program
still holds an independent refcount on a node (for example via
bpf_refcount_acquire()).  The list head value embedded in map memory can
go away while the node object survives.  If node->owner is left pointing
at the old head address until drop completes, that pointer becomes stale.
If a new bpf_list_head is later allocated at the same address and the
stale node is passed to bpf_list_del(), the owner comparison can succeed
even though the node is not really linked to the new head, and
list_del_init() will follow bogus next/prev pointers with the risk of
memory corruption.

When draining a bpf_list_head, mark each node owner with BPF_PTR_POISON
under the map spinlock while moving it to a private drain list, then
list_del_init() the node and clear owner to NULL before calling
__bpf_obj_drop_impl().  Concurrent readers therefore never observe a
node that appears linked to a head while its list_head is inconsistent,
and surviving refcounted nodes never retain a stale non-NULL owner.

Signed-off-by: Kaitao Cheng <[email protected]>
---
 kernel/bpf/helpers.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 094457c3e6d3..59855b434f0b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2247,10 +2247,11 @@ EXPORT_SYMBOL_GPL(bpf_base_func_proto);
 void bpf_list_head_free(const struct btf_field *field, void *list_head,
                        struct bpf_spin_lock *spin_lock)
 {
-       struct list_head *head = list_head, *orig_head = list_head;
+       struct list_head *head = list_head, drain, *pos, *n;
 
        BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head));
        BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct 
bpf_list_head));
+       INIT_LIST_HEAD(&drain);
 
        /* Do the actual list draining outside the lock to not hold the lock for
         * too long, and also prevent deadlocks if tracing programs end up
@@ -2261,20 +2262,30 @@ void bpf_list_head_free(const struct btf_field *field, 
void *list_head,
        __bpf_spin_lock_irqsave(spin_lock);
        if (!head->next || list_empty(head))
                goto unlock;
-       head = head->next;
+       list_for_each_safe(pos, n, head) {
+               struct bpf_list_node_kern *node;
+
+               node = container_of(pos, struct bpf_list_node_kern, list_head);
+               WRITE_ONCE(node->owner, BPF_PTR_POISON);
+               list_move_tail(pos, &drain);
+       }
 unlock:
-       INIT_LIST_HEAD(orig_head);
+       INIT_LIST_HEAD(head);
        __bpf_spin_unlock_irqrestore(spin_lock);
 
-       while (head != orig_head) {
-               void *obj = head;
+       while (!list_empty(&drain)) {
+               struct bpf_list_node_kern *node;
 
-               obj -= field->graph_root.node_offset;
-               head = head->next;
+               pos = drain.next;
+               node = container_of(pos, struct bpf_list_node_kern, list_head);
+               list_del_init(pos);
+               /* Ensure __bpf_list_add() sees the node as unlinked. */
+               smp_store_release(&node->owner, NULL);
                /* The contained type can also have resources, including a
                 * bpf_list_head which needs to be freed.
                 */
-               __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
+               __bpf_obj_drop_impl((char *)pos - field->graph_root.node_offset,
+                                   field->graph_root.value_rec, false);
        }
 }
 
-- 
2.50.1 (Apple Git-155)


Reply via email to