Hi all,
On Tue, Aug 8, 2023 at 5:39 AM Jason Wang <[email protected]> wrote:
>
> On Thu, Aug 3, 2023 at 5:01 AM Andrew Melnychenko <[email protected]> wrote:
> >
> > Changed eBPF map updates through mmaped array.
> > Mmaped arrays provide direct access to map data.
> > It should omit using bpf_map_update_elem() call,
> > which may require capabilities that are not present.
> >
> > Signed-off-by: Andrew Melnychenko <[email protected]>
> > ---
> > ebpf/ebpf_rss.c | 117 ++++++++++++++++++++++++++++++++++++++----------
> > ebpf/ebpf_rss.h | 5 +++
> > 2 files changed, 99 insertions(+), 23 deletions(-)
> >
> > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > index cee658c158b..247f5eee1b6 100644
> > --- a/ebpf/ebpf_rss.c
> > +++ b/ebpf/ebpf_rss.c
> > @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
> > {
> > if (ctx != NULL) {
> > ctx->obj = NULL;
> > + ctx->program_fd = -1;
> > + ctx->map_configuration = -1;
> > + ctx->map_toeplitz_key = -1;
> > + ctx->map_indirections_table = -1;
> > +
> > + ctx->mmap_configuration = NULL;
> > + ctx->mmap_toeplitz_key = NULL;
> > + ctx->mmap_indirections_table = NULL;
> > }
> > }
> >
> > bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
> > {
> > - return ctx != NULL && ctx->obj != NULL;
> > + return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> > +}
> > +
> > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> > +{
> > + if (!ebpf_rss_is_loaded(ctx)) {
> > + return false;
> > + }
> > +
> > + ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> > + PROT_READ | PROT_WRITE, MAP_SHARED,
> > + ctx->map_configuration, 0);
> > + if (ctx->mmap_configuration == MAP_FAILED) {
> > + trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration
> > array");
> > + return false;
> > + }
> > + ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> > + PROT_READ | PROT_WRITE, MAP_SHARED,
> > + ctx->map_toeplitz_key, 0);
> > + if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> > + trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> > + goto toeplitz_fail;
> > + }
> > + ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
> > + PROT_READ | PROT_WRITE, MAP_SHARED,
> > + ctx->map_indirections_table, 0);
> > + if (ctx->mmap_indirections_table == MAP_FAILED) {
> > + trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection
> > table");
> > + goto indirection_fail;
> > + }
> > +
> > + return true;
> > +
> > +indirection_fail:
> > + munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > +toeplitz_fail:
> > + munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > +
> > + ctx->mmap_configuration = NULL;
> > + ctx->mmap_toeplitz_key = NULL;
> > + ctx->mmap_indirections_table = NULL;
> > + return false;
> > +}
> > +
> > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> > +{
> > + if (!ebpf_rss_is_loaded(ctx)) {
> > + return;
> > + }
> > +
> > + munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> > + munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > + munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > +
> > + ctx->mmap_configuration = NULL;
> > + ctx->mmap_toeplitz_key = NULL;
> > + ctx->mmap_indirections_table = NULL;
> > }
> >
> > bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > {
> > struct rss_bpf *rss_bpf_ctx;
> >
> > - if (ctx == NULL) {
> > + if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> > return false;
> > }
> >
> > @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > ctx->map_toeplitz_key = bpf_map__fd(
> > rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
> >
> > + if (!ebpf_rss_mmap(ctx)) {
> > + goto error;
> > + }
> > +
> > return true;
> > error:
> > rss_bpf__destroy(rss_bpf_ctx);
> > ctx->obj = NULL;
> > + ctx->program_fd = -1;
> > + ctx->map_configuration = -1;
> > + ctx->map_toeplitz_key = -1;
> > + ctx->map_indirections_table = -1;
> >
> > return false;
> > }
> > @@ -77,15 +149,11 @@ error:
> > static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> > struct EBPFRSSConfig *config)
> > {
> > - uint32_t map_key = 0;
> > -
> > if (!ebpf_rss_is_loaded(ctx)) {
> > return false;
> > }
> > - if (bpf_map_update_elem(ctx->map_configuration,
> > - &map_key, config, 0) < 0) {
> > - return false;
> > - }
> > +
> > + memcpy(ctx->mmap_configuration, config, sizeof(*config));
> > return true;
> > }
> >
> > @@ -93,27 +161,19 @@ static bool ebpf_rss_set_indirections_table(struct
> > EBPFRSSContext *ctx,
> > uint16_t *indirections_table,
> > size_t len)
> > {
> > - uint32_t i = 0;
> > -
> > if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> > len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
> > return false;
> > }
> >
> > - for (; i < len; ++i) {
> > - if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> > - indirections_table + i, 0) < 0) {
> > - return false;
> > - }
> > - }
> > + memcpy(ctx->mmap_indirections_table, indirections_table,
> > + sizeof(*indirections_table) * len);
>
> As discussed, should we stick the compatibility on the host without
> bpf mmap support?
>
> If we don't, we need at least probe BPF mmap and disable ebpf rss? If
> yes, we should track if the map is mmaped and switch between memcpy
> and syscall.
>
> Thanks
I've made some tests.
I've checked eBPF program on kernels 5.4, 5.5, and 6.3 with libbpf
1.0.1, 1.1.0, and last 1.2.0.
Overall, eBPF program requires explicit declaration of BPF_F_MAPPABLE map_flags.
Without this flag, the program can be loaded on every tested
kernel/libbpf configuration but Qemu can't mmap() the eBPF
fds(obviously).
Alternative to mmap() is bpf_map_update_elem() syscall/method which
would require capabilities for Qemu.
With this flag, kernel 5.4 + libbpf can't load eBPF object.
So, compatibility would require 2 different eBPF objects or some kind
of hack, also it would require additional capability for Qemu.
I don't think that we need checks for disabling eBPF RSS. It wouldn't
brake anything on the old kernel and after an update, it should work
ok.
>
> > return true;
> > }
> >
> > static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > uint8_t *toeplitz_key)
> > {
> > - uint32_t map_key = 0;
> > -
> > /* prepare toeplitz key */
> > uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
> >
> > @@ -123,10 +183,7 @@ static bool ebpf_rss_set_toepliz_key(struct
> > EBPFRSSContext *ctx,
> > memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
> >
> > - if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> > - 0) < 0) {
> > - return false;
> > - }
> > + memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > return true;
> > }
> >
> > @@ -160,6 +217,20 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> > return;
> > }
> >
> > - rss_bpf__destroy(ctx->obj);
> > + ebpf_rss_munmap(ctx);
> > +
> > + if (ctx->obj) {
> > + rss_bpf__destroy(ctx->obj);
> > + } else {
> > + close(ctx->program_fd);
> > + close(ctx->map_configuration);
> > + close(ctx->map_toeplitz_key);
> > + close(ctx->map_indirections_table);
> > + }
> > +
> > ctx->obj = NULL;
> > + ctx->program_fd = -1;
> > + ctx->map_configuration = -1;
> > + ctx->map_toeplitz_key = -1;
> > + ctx->map_indirections_table = -1;
> > }
> > diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> > index bf3f2572c7c..ab08a7266d0 100644
> > --- a/ebpf/ebpf_rss.h
> > +++ b/ebpf/ebpf_rss.h
> > @@ -20,6 +20,11 @@ struct EBPFRSSContext {
> > int map_configuration;
> > int map_toeplitz_key;
> > int map_indirections_table;
> > +
> > + /* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
> > + void *mmap_configuration;
> > + void *mmap_toeplitz_key;
> > + void *mmap_indirections_table;
> > };
> >
> > struct EBPFRSSConfig {
> > --
> > 2.41.0
> >
>