Currently, we use bucket_lock when traversing bpf_sk_storage_map
elements. Since bpf_iter programs cannot use bpf_sk_storage_get()
and bpf_sk_storage_delete() helpers which may also grab bucket lock,
we do not have a deadlock issue which exists for hashmap when
using bucket_lock ([1]).

If a bucket contains a lot of sockets, during bpf_iter traversing
a bucket, concurrent bpf_sk_storage_{get,delete}() may experience
some undesirable delays. Using rcu_read_lock() is a reasonable
compromise here. Although it may lose some precision, e.g.,
access stale sockets, but it will not hurt performance of other
bpf programs.

[1] https://lore.kernel.org/bpf/20200902235341.2001534-1-...@fb.com

Cc: Martin KaFai Lau <ka...@fb.com>
Signed-off-by: Yonghong Song <y...@fb.com>
---
 net/core/bpf_sk_storage.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 4a86ea34f29e..a1db5e988d19 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -701,7 +701,7 @@ bpf_sk_storage_map_seq_find_next(struct 
bpf_iter_seq_sk_storage_map_info *info,
                if (!selem) {
                        /* not found, unlock and go to the next bucket */
                        b = &smap->buckets[bucket_id++];
-                       raw_spin_unlock_bh(&b->lock);
+                       rcu_read_unlock();
                        skip_elems = 0;
                        break;
                }
@@ -715,7 +715,7 @@ bpf_sk_storage_map_seq_find_next(struct 
bpf_iter_seq_sk_storage_map_info *info,
 
        for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
                b = &smap->buckets[i];
-               raw_spin_lock_bh(&b->lock);
+               rcu_read_lock();
                count = 0;
                hlist_for_each_entry(selem, &b->list, map_node) {
                        sk_storage = rcu_dereference_raw(selem->local_storage);
@@ -726,7 +726,7 @@ bpf_sk_storage_map_seq_find_next(struct 
bpf_iter_seq_sk_storage_map_info *info,
                        }
                        count++;
                }
-               raw_spin_unlock_bh(&b->lock);
+               rcu_read_unlock();
                skip_elems = 0;
        }
 
@@ -806,13 +806,10 @@ static void bpf_sk_storage_map_seq_stop(struct seq_file 
*seq, void *v)
        struct bpf_local_storage_map *smap;
        struct bpf_local_storage_map_bucket *b;
 
-       if (!v) {
+       if (!v)
                (void)__bpf_sk_storage_map_seq_show(seq, v);
-       } else {
-               smap = (struct bpf_local_storage_map *)info->map;
-               b = &smap->buckets[info->bucket_id];
-               raw_spin_unlock_bh(&b->lock);
-       }
+       else
+               rcu_read_unlock();
 }
 
 static int bpf_iter_init_sk_storage_map(void *priv_data,
-- 
2.24.1

Reply via email to