On 1/23/2019 4:31 PM, Jiri Pirko wrote: > Tue, Jan 22, 2019 at 04:57:18PM CET, era...@mellanox.com wrote: >> Devlink msg is a mechanism to pass descriptors between drivers and >> devlink, in json-like format. The API allows the driver to add objects, >> object pair, value array (nested attributes), value and name. >> >> Driver can use this API to fill the msg context in a format which can be >> translated by the devlink to the netlink message later. >> >> With this API, driver does not need to declare the total size per message >> context, and it dynamically add more messages to the context (bounded by >> the system memory limitation only). >> There is an implicit request to have the preliminary main objects size >> created via this API to be upper bounded by netlink SKB size, as those >> objects do not get fragmented between different SKBs when passed to the >> netlink layer. >> >> Devlink msg will replace devlink buffers for health reporters. Devlink >> buffers which will be deprecated and removed in the downstream patch. >> >> Signed-off-by: Eran Ben Elisha <era...@mellanox.com> >> CC: Wei Yongjun <weiyongj...@huawei.com> >> Reviewed-by: Moshe Shemesh <mo...@mellanox.com> >> --- >> include/net/devlink.h | 70 ++++++ >> include/uapi/linux/devlink.h | 8 + >> net/core/devlink.c | 455 +++++++++++++++++++++++++++++++++++ >> 3 files changed, 533 insertions(+) >> >> diff --git a/include/net/devlink.h b/include/net/devlink.h >> index a81a1b7a67d7..fe323e9b14e1 100644 >> --- a/include/net/devlink.h >> +++ b/include/net/devlink.h >> @@ -424,6 +424,7 @@ struct devlink_region; >> >> typedef void devlink_snapshot_data_dest_t(const void *data); >> >> +struct devlink_msg_ctx; >> struct devlink_health_buffer; >> struct devlink_health_reporter; >> >> @@ -615,6 +616,21 @@ int devlink_region_snapshot_create(struct >> devlink_region *region, u64 data_len, >> u8 *data, u32 snapshot_id, >> devlink_snapshot_data_dest_t >> *data_destructor); >> >> +int devlink_msg_nest_start(struct devlink_msg_ctx *msg_ctx, int attrtype); > > > devlink_msg_*() functions should work with struct devlink_msg. > devlink_msg_ctx*() functions should work with struct devlink_msg_ctx. > Please be consistent with the naming. > > I think you can call this just "struct devlink_msg" of maybe "fmsg" as > for "formatted message" - so it is not confused with any other devlink > netlink message.
Ack. > > > >> +int devlink_msg_nest_end(struct devlink_msg_ctx *msg_ctx); >> +int devlink_msg_put_value(struct devlink_msg_ctx *msg_ctx, >> + void *value, u16 value_len, u8 value_nla_type); >> +int devlink_msg_put_name(struct devlink_msg_ctx *msg_ctx, >> + char *name); >> +int devlink_msg_put_name_value_pair(struct devlink_msg_ctx *msg_ctx, >> + char *name, void *value, u16 value_len, >> + u8 value_nla_type); >> +int devlink_msg_put_name_value_array(struct devlink_msg_ctx *msg_ctx, >> + char *name, void **value, u16 *value_len, >> + u8 *value_nla_type, int array_size); >> +int devlink_msg_object_start(struct devlink_msg_ctx *msg_ctx, char *name); >> +int devlink_msg_object_end(struct devlink_msg_ctx *msg_ctx, char *name); >> + >> int devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer, >> int attrtype); >> void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer); >> @@ -903,6 +919,60 @@ devlink_region_snapshot_create(struct devlink_region >> *region, u64 data_len, >> return 0; >> } >> >> +static inline int >> +devlink_msg_nest_start(struct devlink_msg_ctx *msg_ctx, int attrtype) >> +{ >> + return 0; >> +} >> + >> +static inline int >> +devlink_msg_nest_end(struct devlink_msg_ctx *msg_ctx) >> +{ >> + return 0; >> +} >> + >> +static inline int >> +devlink_msg_put_value(struct devlink_msg_ctx *msg_ctx, >> + void *value, u16 value_len, u8 value_nla_type) >> +{ >> + return 0; >> +} >> + >> +static inline int >> +devlink_msg_put_name(struct devlink_msg_ctx *msg_ctx, >> + char *name) >> +{ >> + return 0; >> +} >> + >> +static inline int >> +devlink_msg_put_name_value_pair(struct devlink_msg_ctx *msg_ctx, >> + char *name, void *value, u16 value_len, >> + u8 value_nla_type) >> +{ >> + return 0; >> +} >> + >> +static inline int >> +devlink_msg_put_name_value_array(struct devlink_msg_ctx *msg_ctx, >> + char *name, void **value, u16 *value_len, >> + u8 *value_nla_type, int array_size) >> +{ >> + return 0; >> +} >> + >> +static inline int >> +devlink_msg_object_start(struct devlink_msg_ctx *msg_ctx, char *name) >> +{ >> + return 0; >> +} >> + >> +static inline int >> +devlink_msg_object_end(struct devlink_msg_ctx *msg_ctx, char *name) >> +{ >> + return 0; >> +} >> + >> static inline int >> devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer, >> int attrtype) >> diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h >> index 6b26bb2ce4dc..68eeda1d0455 100644 >> --- a/include/uapi/linux/devlink.h >> +++ b/include/uapi/linux/devlink.h >> @@ -300,6 +300,14 @@ enum devlink_attr { >> DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, /* u8 */ >> DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, /* dynamic */ >> >> + DEVLINK_ATTR_MSG_OBJECT, /* nested */ >> + DEVLINK_ATTR_MSG_OBJECT_PAIR, /* nested */ >> + DEVLINK_ATTR_MSG_OBJECT_NAME, /* string */ >> + DEVLINK_ATTR_MSG_OBJECT_VALUE, /* nested */ >> + DEVLINK_ATTR_MSG_OBJECT_VALUE_ARRAY, /* nested */ >> + DEVLINK_ATTR_MSG_OBJECT_VALUE_TYPE, /* u8 */ >> + DEVLINK_ATTR_MSG_OBJECT_VALUE_DATA, /* dynamic */ >> + >> DEVLINK_ATTR_HEALTH_REPORTER, /* nested */ >> DEVLINK_ATTR_HEALTH_REPORTER_NAME, /* string */ >> DEVLINK_ATTR_HEALTH_REPORTER_STATE, /* u8 */ >> diff --git a/net/core/devlink.c b/net/core/devlink.c >> index 60248a53c0ad..57ca096849b3 100644 >> --- a/net/core/devlink.c >> +++ b/net/core/devlink.c >> @@ -4098,6 +4098,461 @@ devlink_health_buffer_snd(struct genl_info *info, >> return err; >> } >> >> +struct devlink_msg { > > Let's call this "struct devlink_msg_item" Ack. > > >> + struct list_head list; >> + int attrtype; >> + u8 nla_type; >> + u16 len; >> + int value[0]; >> +}; >> + >> +struct devlink_msg_ctx { >> + struct list_head msg_list; >> + int max_nested_depth; >> + int curr_nest; >> +}; >> + >> +#define DEVLINK_MSG_MAX_SIZE (4096 - GENL_HDRLEN) >> + >> +static struct devlink_msg_ctx *devlink_msg_ctx_alloc(void) >> +{ >> + struct devlink_msg_ctx *msg_ctx; >> + >> + msg_ctx = kzalloc(sizeof(*msg_ctx), GFP_KERNEL); >> + if (!msg_ctx) >> + return ERR_PTR(-ENOMEM); >> + >> + INIT_LIST_HEAD(&msg_ctx->msg_list); >> + >> + return msg_ctx; >> +} >> + >> +static void devlink_msg_ctx_free(struct devlink_msg_ctx *msg_ctx) >> +{ >> + struct devlink_msg *msg, *tm; >> + >> + list_for_each_entry_safe(msg, tm, &msg_ctx->msg_list, list) { >> + list_del(&msg->list); >> + kfree(msg); >> + } >> + kfree(msg_ctx); >> +} >> + >> +int devlink_msg_nest_start(struct devlink_msg_ctx *msg_ctx, int attrtype) >> +{ >> + struct devlink_msg *nest_msg; >> + >> + if (attrtype != DEVLINK_ATTR_MSG_OBJECT && >> + attrtype != DEVLINK_ATTR_MSG_OBJECT_PAIR && >> + attrtype != DEVLINK_ATTR_MSG_OBJECT_VALUE && >> + attrtype != DEVLINK_ATTR_MSG_OBJECT_VALUE_ARRAY) >> + return -EINVAL; >> + >> + nest_msg = kzalloc(sizeof(*nest_msg), GFP_KERNEL); >> + if (!nest_msg) >> + return -ENOMEM; >> + >> + nest_msg->attrtype = attrtype; >> + msg_ctx->curr_nest++; >> + msg_ctx->max_nested_depth = max(msg_ctx->max_nested_depth, >> + msg_ctx->curr_nest); >> + list_add_tail(&nest_msg->list, &msg_ctx->msg_list); >> + >> + return 0; >> +} >> +EXPORT_SYMBOL_GPL(devlink_msg_nest_start); >> + >> +int devlink_msg_nest_end(struct devlink_msg_ctx *msg_ctx) >> +{ >> + struct devlink_msg *nest_msg; >> + >> + WARN_ON(!msg_ctx->curr_nest); >> + nest_msg = kzalloc(sizeof(*nest_msg), GFP_KERNEL); >> + if (!nest_msg) >> + return -ENOMEM; >> + >> + msg_ctx->curr_nest--; >> + list_add_tail(&nest_msg->list, &msg_ctx->msg_list); >> + >> + return 0; >> +} >> +EXPORT_SYMBOL_GPL(devlink_msg_nest_end); >> + >> +int devlink_msg_put_value(struct devlink_msg_ctx *msg_ctx, >> + void *value, u16 value_len, u8 value_nla_type) >> +{ >> + struct devlink_msg *value_msg; >> + >> + if (value_len > DEVLINK_MSG_MAX_SIZE) >> + return -EMSGSIZE; >> + >> + if (value_nla_type != NLA_U8 && >> + value_nla_type != NLA_U32 && >> + value_nla_type != NLA_U64 && >> + value_nla_type != NLA_NUL_STRING && >> + value_nla_type != NLA_BINARY) >> + return -EINVAL; >> + >> + value_msg = kzalloc(sizeof(*value_msg) + value_len, GFP_KERNEL); > > Looks fine. > > >> + if (!value_msg) >> + return -ENOMEM; >> + >> + value_msg->nla_type = value_nla_type; >> + value_msg->len = value_len; >> + value_msg->attrtype = DEVLINK_ATTR_MSG_OBJECT_VALUE_DATA; >> + memcpy(&value_msg->value, value, value_len); >> + list_add_tail(&value_msg->list, &msg_ctx->msg_list); >> + >> + return 0; >> +} >> +EXPORT_SYMBOL_GPL(devlink_msg_put_value); >> + >> +int devlink_msg_put_name(struct devlink_msg_ctx *msg_ctx, char *name) >> +{ >> + struct devlink_msg *name_msg; >> + >> + if (strlen(name) + 1 > DEVLINK_MSG_MAX_SIZE) >> + return -EMSGSIZE; >> + >> + name_msg = kzalloc(sizeof(*name_msg) + strlen(name) + 1, GFP_KERNEL); >> + if (!name_msg) >> + return -ENOMEM; >> + >> + name_msg->nla_type = NLA_NUL_STRING; >> + name_msg->len = strlen(name) + 1; >> + name_msg->attrtype = DEVLINK_ATTR_MSG_OBJECT_NAME; >> + memcpy(&name_msg->value, name, name_msg->len); >> + list_add_tail(&name_msg->list, &msg_ctx->msg_list); >> + >> + return 0; >> +} >> +EXPORT_SYMBOL_GPL(devlink_msg_put_name); >> + >> +int devlink_msg_put_name_value_pair(struct devlink_msg_ctx *msg_ctx, >> + char *name, void *value, u16 value_len, >> + u8 value_nla_type) >> +{ >> + int err; >> + >> + err = devlink_msg_nest_start(msg_ctx, DEVLINK_ATTR_MSG_OBJECT_PAIR); >> + if (err) >> + return err; >> + >> + err = devlink_msg_put_name(msg_ctx, name); >> + if (err) >> + return err; >> + >> + err = devlink_msg_nest_start(msg_ctx, DEVLINK_ATTR_MSG_OBJECT_VALUE); >> + if (err) >> + return err; >> + >> + err = devlink_msg_put_value(msg_ctx, value, value_len, value_nla_type); >> + if (err) >> + return err; >> + >> + err = devlink_msg_nest_end(msg_ctx); >> + if (err) >> + return err; >> + >> + err = devlink_msg_nest_end(msg_ctx); >> + if (err) >> + return err; >> + >> + return 0; >> +} >> +EXPORT_SYMBOL_GPL(devlink_msg_put_name_value_pair); >> + >> +int devlink_msg_put_name_value_array(struct devlink_msg_ctx *msg_ctx, >> + char *name, void **value, u16 *value_len, >> + u8 *value_nla_type, int array_size) > > This is limitting the arrays to plain values. One should be able to nest > objects inside. If fact, that is what you can to do with sqs: > > object start > name sqs > array start > object start > index 0 > xxx yyy > object end > object start > index 1 > xxx yyy > object end > array end > object end > > So you need to have: > devlink_msg_put_array_start() > devlink_msg_put_array_end() Sounds good, will do. > > >> +{ >> + int err, i; >> + >> + err = devlink_msg_nest_start(msg_ctx, DEVLINK_ATTR_MSG_OBJECT_PAIR); >> + if (err) >> + return err; >> + >> + err = devlink_msg_put_name(msg_ctx, name); >> + if (err) >> + return err; >> + >> + err = devlink_msg_nest_start(msg_ctx, DEVLINK_ATTR_MSG_OBJECT_VALUE); >> + if (err) >> + return err; >> + >> + err = devlink_msg_nest_start(msg_ctx, >> + DEVLINK_ATTR_MSG_OBJECT_VALUE_ARRAY); >> + if (err) >> + return err; >> + >> + for (i = 0; i < array_size; i++) { >> + err = devlink_msg_nest_start(msg_ctx, >> + DEVLINK_ATTR_MSG_OBJECT_VALUE); >> + if (err) >> + return err; >> + >> + err = devlink_msg_put_value(msg_ctx, value[i], >> + value_len[i], value_nla_type[i]); >> + if (err) >> + return err; >> + err = devlink_msg_nest_end(msg_ctx); >> + if (err) >> + return err; >> + } >> + >> + err = devlink_msg_nest_end(msg_ctx); >> + if (err) >> + return err; >> + >> + err = devlink_msg_nest_end(msg_ctx); >> + if (err) >> + return err; >> + >> + err = devlink_msg_nest_end(msg_ctx); >> + if (err) >> + return err; >> + >> + return 0; >> +} >> +EXPORT_SYMBOL_GPL(devlink_msg_put_name_value_array); >> + >> +int devlink_msg_object_start(struct devlink_msg_ctx *msg_ctx, char *name) >> +{ >> + int err; >> + >> + err = devlink_msg_nest_start(msg_ctx, DEVLINK_ATTR_MSG_OBJECT); >> + if (err) >> + return err; >> + >> + if (!name) >> + return 0; >> + >> + err = devlink_msg_nest_start(msg_ctx, DEVLINK_ATTR_MSG_OBJECT_PAIR); >> + if (err) >> + return err; >> + >> + err = devlink_msg_put_name(msg_ctx, name); >> + if (err) >> + return err; >> + >> + err = devlink_msg_nest_start(msg_ctx, DEVLINK_ATTR_MSG_OBJECT_VALUE); >> + if (err) >> + return err; >> + >> + return 0; >> +} >> +EXPORT_SYMBOL_GPL(devlink_msg_object_start); >> + >> +int devlink_msg_object_end(struct devlink_msg_ctx *msg_ctx, char *name) >> +{ >> + int err; >> + >> + if (!name) >> + goto end_object; >> + >> + err = devlink_msg_nest_end(msg_ctx); /* DEVLINK_ATTR_MSG_OBJECT_VALUE */ >> + if (err) >> + return err; >> + >> + err = devlink_msg_nest_end(msg_ctx); /* DEVLINK_ATTR_MSG_OBJECT_PAIR */ >> + if (err) >> + return err; >> + >> +end_object: >> + err = devlink_msg_nest_end(msg_ctx); /* DEVLINK_ATTR_MSG_OBJECT */ >> + if (err) >> + return err; >> + >> + return 0; >> +} >> +EXPORT_SYMBOL_GPL(devlink_msg_object_end); >> + >> +static int >> +devlink_msg_fill_data(struct sk_buff *skb, struct devlink_msg *msg) >> +{ >> + int err; >> + >> + switch (msg->nla_type) { >> + case NLA_U8: >> + err = nla_put_u8(skb, DEVLINK_ATTR_MSG_OBJECT_VALUE_DATA, >> + *(u8 *)msg->value); >> + break; >> + case NLA_U32: >> + err = nla_put_u32(skb, DEVLINK_ATTR_MSG_OBJECT_VALUE_DATA, >> + *(u32 *)msg->value); >> + break; >> + case NLA_U64: >> + err = nla_put_u64_64bit(skb, >> + DEVLINK_ATTR_MSG_OBJECT_VALUE_DATA, >> + *(u64 *)msg->value, DEVLINK_ATTR_PAD); >> + break; >> + case NLA_NUL_STRING: >> + err = nla_put_string(skb, >> + DEVLINK_ATTR_MSG_OBJECT_VALUE_DATA, >> + (char *)&msg->value); >> + break; >> + case NLA_BINARY: >> + err = nla_put(skb, DEVLINK_ATTR_MSG_OBJECT_VALUE_DATA, >> + msg->len, (void *)&msg->value); >> + break; >> + default: >> + err = -EINVAL; >> + break; >> + } >> + >> + return err; >> +} >> + >> +static int >> +devlink_msg_fill_type(struct sk_buff *skb, struct devlink_msg *msg) >> +{ >> + int err; >> + >> + switch (msg->nla_type) { >> + case NLA_U8: >> + err = nla_put_u8(skb, DEVLINK_ATTR_MSG_OBJECT_VALUE_TYPE, >> + NLA_U8); >> + break; >> + case NLA_U32: >> + err = nla_put_u8(skb, DEVLINK_ATTR_MSG_OBJECT_VALUE_TYPE, >> + NLA_U32); >> + break; >> + case NLA_U64: >> + err = nla_put_u8(skb, DEVLINK_ATTR_MSG_OBJECT_VALUE_TYPE, >> + NLA_U64); >> + break; >> + case NLA_NUL_STRING: >> + err = nla_put_u8(skb, DEVLINK_ATTR_MSG_OBJECT_VALUE_TYPE, >> + NLA_NUL_STRING); >> + break; >> + case NLA_BINARY: >> + err = nla_put_u8(skb, DEVLINK_ATTR_MSG_OBJECT_VALUE_TYPE, >> + NLA_BINARY); >> + break; >> + default: >> + err = -EINVAL; >> + break; >> + } >> + >> + return err; >> +} >> + >> +static int >> +devlink_msg_prepare_skb(struct sk_buff *skb, struct devlink_msg_ctx >> *msg_ctx, >> + int *start) >> +{ >> + struct nlattr **nlattr_arr; >> + struct devlink_msg *msg; >> + int j = 0, i = 0; >> + int err; >> + >> + nlattr_arr = kcalloc(msg_ctx->max_nested_depth, >> + sizeof(*nlattr_arr), GFP_KERNEL); >> + if (!nlattr_arr) >> + return -EINVAL; >> + >> + list_for_each_entry(msg, &msg_ctx->msg_list, list) { >> + if (j < *start) { >> + j++; >> + continue; >> + } >> + >> + switch (msg->attrtype) { >> + case DEVLINK_ATTR_MSG_OBJECT: >> + case DEVLINK_ATTR_MSG_OBJECT_PAIR: >> + case DEVLINK_ATTR_MSG_OBJECT_VALUE: >> + case DEVLINK_ATTR_MSG_OBJECT_VALUE_ARRAY: >> + nlattr_arr[i] = nla_nest_start(skb, msg->attrtype); >> + if (!nlattr_arr[i]) { >> + err = -EMSGSIZE; >> + goto nla_put_failure; >> + } >> + i++; >> + break; >> + case DEVLINK_ATTR_MSG_OBJECT_VALUE_DATA: >> + err = devlink_msg_fill_data(skb, msg); >> + if (err) >> + goto nla_put_failure; >> + err = devlink_msg_fill_type(skb, msg); >> + if (err) >> + goto nla_put_failure; >> + break; >> + case DEVLINK_ATTR_MSG_OBJECT_NAME: >> + err = nla_put_string(skb, msg->attrtype, >> + (char *)&msg->value); >> + if (err) >> + goto nla_put_failure; >> + break; >> + default: /* No attrtype */ >> + nla_nest_end(skb, nlattr_arr[--i]); >> + break; >> + } >> + j++; >> + if (!i) >> + *start = j; >> + } >> + >> + kfree(nlattr_arr); >> + return 0; >> + >> +nla_put_failure: >> + for (j = 0; j < i; j++) >> + nla_nest_cancel(skb, nlattr_arr[j]); >> + kfree(nlattr_arr); >> + return err; >> +} >> + >> +static int devlink_msg_snd(struct genl_info *info, >> + enum devlink_command cmd, int flags, >> + struct devlink_msg_ctx *msg_ctx) >> +{ >> + struct sk_buff *skb; >> + struct nlmsghdr *nlh; >> + int err, index = 0; >> + bool last = false; >> + void *hdr; >> + >> + while (!last) { >> + int tmp_index = index; >> + >> + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); >> + if (!skb) >> + return -ENOMEM; >> + >> + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, >> + &devlink_nl_family, NLM_F_MULTI, cmd); >> + if (!hdr) >> + goto nla_put_failure; >> + >> + err = devlink_msg_prepare_skb(skb, msg_ctx, &index); >> + if (!err) >> + last = true; >> + else if (err != -EMSGSIZE || tmp_index == index) >> + goto nla_put_failure; >> + >> + genlmsg_end(skb, hdr); >> + err = genlmsg_reply(skb, info); >> + if (err) >> + return err; > > > Looks fine. > > >> + } >> + >> + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); >> + if (!skb) >> + return -ENOMEM; >> + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, >> + NLMSG_DONE, 0, flags | NLM_F_MULTI); >> + err = genlmsg_reply(skb, info); >> + if (err) >> + return err; >> + >> + return 0; >> + >> +nla_put_failure: >> + err = -EIO; >> + nlmsg_free(skb); >> + return err; >> +} >> + >> struct devlink_health_reporter { >> struct list_head list; >> struct devlink_health_buffer **dump_buffers_array; >> -- >> 2.17.1 >>