The cache_idx is currently picked by RR.  There is chance that
the same cache_idx will be picked by multiple sk_storage_maps while
other cache_idx is still unused.  e.g. It could happen when the
sk_storage_map is recreated during the restart of the user
space process.

This patch tracks the usage count for each cache_idx.  There is
16 of them now (defined in BPF_SK_STORAGE_CACHE_SIZE).
It will try to pick the free cache_idx.  If none was found,
it would pick one with the minimal usage count.

Signed-off-by: Martin KaFai Lau <ka...@fb.com>
---
 net/core/bpf_sk_storage.c | 41 +++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d2c4d16dadba..1dae4b543243 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -11,8 +11,6 @@
 #include <uapi/linux/sock_diag.h>
 #include <uapi/linux/btf.h>
 
-static atomic_t cache_idx;
-
 #define SK_STORAGE_CREATE_FLAG_MASK                                    \
        (BPF_F_NO_PREALLOC | BPF_F_CLONE)
 
@@ -81,6 +79,9 @@ struct bpf_sk_storage_elem {
 #define SDATA(_SELEM) (&(_SELEM)->sdata)
 #define BPF_SK_STORAGE_CACHE_SIZE      16
 
+static DEFINE_SPINLOCK(cache_idx_lock);
+static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE];
+
 struct bpf_sk_storage {
        struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE];
        struct hlist_head list; /* List of bpf_sk_storage_elem */
@@ -512,6 +513,37 @@ static int sk_storage_delete(struct sock *sk, struct 
bpf_map *map)
        return 0;
 }
 
+static u16 cache_idx_get(void)
+{
+       u64 min_usage = U64_MAX;
+       u16 i, res = 0;
+
+       spin_lock(&cache_idx_lock);
+
+       for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) {
+               if (cache_idx_usage_counts[i] < min_usage) {
+                       min_usage = cache_idx_usage_counts[i];
+                       res = i;
+
+                       /* Found a free cache_idx */
+                       if (!min_usage)
+                               break;
+               }
+       }
+       cache_idx_usage_counts[res]++;
+
+       spin_unlock(&cache_idx_lock);
+
+       return res;
+}
+
+static void cache_idx_free(u16 idx)
+{
+       spin_lock(&cache_idx_lock);
+       cache_idx_usage_counts[idx]--;
+       spin_unlock(&cache_idx_lock);
+}
+
 /* Called by __sk_destruct() & bpf_sk_storage_clone() */
 void bpf_sk_storage_free(struct sock *sk)
 {
@@ -560,6 +592,8 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
 
        smap = (struct bpf_sk_storage_map *)map;
 
+       cache_idx_free(smap->cache_idx);
+
        /* Note that this map might be concurrently cloned from
         * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
         * RCU read section to finish before proceeding. New RCU
@@ -673,8 +707,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union 
bpf_attr *attr)
        }
 
        smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
-       smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) %
-               BPF_SK_STORAGE_CACHE_SIZE;
+       smap->cache_idx = cache_idx_get();
 
        return &smap->map;
 }
-- 
2.24.1

Reply via email to