> -----Original Message----- > From: Marcelo Ricardo Leitner [mailto:marcelo.leit...@gmail.com] > Sent: Wednesday, January 10, 2018 3:14 AM > To: Chris Mi <chr...@mellanox.com> > Cc: netdev@vger.kernel.org; gerlitz...@gmail.com; > step...@networkplumber.org; dsah...@gmail.com; p...@nwl.cc > Subject: Re: [patch iproute2 v7 2/2] tc: Add batchsize feature for filter and > actions > > On Tue, Jan 09, 2018 at 03:59:08PM +0900, Chris Mi wrote: > > Currently in tc batch mode, only one command is read from the batch > > file and sent to kernel to process. With this support, at most 128 > > commands can be accumulated before sending to kernel. > > > > Now it only works for the following successive commands: > > filter and actions add/delete/change/replace. > > > > Signed-off-by: Chris Mi <chr...@mellanox.com> > > --- > > tc/m_action.c | 60 +++++++++++++++++---------- > > tc/tc.c | 127 > ++++++++++++++++++++++++++++++++++++++++++++++++++------- > > tc/tc_common.h | 5 ++- > > tc/tc_filter.c | 97 +++++++++++++++++++++++++------------------ > > 4 files changed, 210 insertions(+), 79 deletions(-) > > > > diff --git a/tc/m_action.c b/tc/m_action.c index fc422364..e5c53a80 > > 100644 > > --- a/tc/m_action.c > > +++ b/tc/m_action.c > > @@ -546,40 +546,56 @@ bad_val: > > return ret; > > } > > > > +struct tc_action_req { > > + struct nlmsghdr n; > > + struct tcamsg t; > > + char buf[MAX_MSG]; > > +}; > > + > > static int tc_action_modify(int cmd, unsigned int flags, > > - int *argc_p, char ***argv_p) > > + int *argc_p, char ***argv_p, > > + void *buf) > > { > > - int argc = *argc_p; > > + struct tc_action_req *req, action_req; > > char **argv = *argv_p; > > + struct rtattr *tail; > > + int argc = *argc_p; > > + struct iovec iov; > > int ret = 0; > > - struct { > > - struct nlmsghdr n; > > - struct tcamsg t; > > - char buf[MAX_MSG]; > > - } req = { > > - .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg)), > > - .n.nlmsg_flags = NLM_F_REQUEST | flags, > > - .n.nlmsg_type = cmd, > > - .t.tca_family = AF_UNSPEC, > > - }; > > - struct rtattr *tail = NLMSG_TAIL(&req.n); > > + > > + if (buf) > > + req = buf; > > + else > > + req = &action_req; > > + > > + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg)); > > + req->n.nlmsg_flags = NLM_F_REQUEST | flags; > > + req->n.nlmsg_type = cmd; > > + req->t.tca_family = AF_UNSPEC; > > + tail = NLMSG_TAIL(&req->n); > > > > argc -= 1; > > argv += 1; > > - if (parse_action(&argc, &argv, TCA_ACT_TAB, &req.n)) { > > + if (parse_action(&argc, &argv, TCA_ACT_TAB, &req->n)) { > > fprintf(stderr, "Illegal \"action\"\n"); > > return -1; > > } > > - tail->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail; > > + tail->rta_len = (void *) NLMSG_TAIL(&req->n) - (void *) tail; > > + > > + *argc_p = argc; > > + *argv_p = argv; > > > > - if (rtnl_talk(&rth, &req.n, NULL) < 0) { > > + iov.iov_base = &req->n; > > + iov.iov_len = req->n.nlmsg_len; > > + > > + if (buf) > > + return 0; > > + > > + if (rtnl_talk_iov(&rth, &iov, 1, NULL) < 0) { > > fprintf(stderr, "We have an error talking to the kernel\n"); > > ret = -1; > > } > > > > - *argc_p = argc; > > - *argv_p = argv; > > - > > return ret; > > } > > > > @@ -679,7 +695,7 @@ bad_val: > > return ret; > > } > > > > -int do_action(int argc, char **argv) > > +int do_action(int argc, char **argv, void *buf) > > { > > > > int ret = 0; > > @@ -689,12 +705,12 @@ int do_action(int argc, char **argv) > > if (matches(*argv, "add") == 0) { > > ret = tc_action_modify(RTM_NEWACTION, > > NLM_F_EXCL | > NLM_F_CREATE, > > - &argc, &argv); > > + &argc, &argv, buf); > > } else if (matches(*argv, "change") == 0 || > > matches(*argv, "replace") == 0) { > > ret = tc_action_modify(RTM_NEWACTION, > > NLM_F_CREATE | > NLM_F_REPLACE, > > - &argc, &argv); > > + &argc, &argv, buf); > > } else if (matches(*argv, "delete") == 0) { > > argc -= 1; > > argv += 1; > > diff --git a/tc/tc.c b/tc/tc.c > > index ad9f07e9..f32e4978 100644 > > --- a/tc/tc.c > > +++ b/tc/tc.c > > @@ -193,16 +193,16 @@ static void usage(void) > > " -nm | -nam[es] | { -cf | -conf } > > path } | - > j[son]\n"); > > } > > > > -static int do_cmd(int argc, char **argv) > > +static int do_cmd(int argc, char **argv, void *buf) > > { > > if (matches(*argv, "qdisc") == 0) > > return do_qdisc(argc-1, argv+1); > > if (matches(*argv, "class") == 0) > > return do_class(argc-1, argv+1); > > if (matches(*argv, "filter") == 0) > > - return do_filter(argc-1, argv+1); > > + return do_filter(argc-1, argv+1, buf); > > if (matches(*argv, "actions") == 0) > > - return do_action(argc-1, argv+1); > > + return do_action(argc-1, argv+1, buf); > > if (matches(*argv, "monitor") == 0) > > return do_tcmonitor(argc-1, argv+1); > > if (matches(*argv, "exec") == 0) > > @@ -217,11 +217,39 @@ static int do_cmd(int argc, char **argv) > > return -1; > > } > > > > +static bool batchsize_enabled(int argc, char *argv[]) { > > + if (argc < 2) > > + return false; > > + if ((strcmp(argv[0], "filter") && strcmp(argv[0], "action")) > > + || (strcmp(argv[1], "add") && strcmp(argv[1], "delete") > > + && strcmp(argv[1], "change") && strcmp(argv[1], "replace"))) > > + return false; > > + > > + return true; > > +} > > + > > +struct batch_buf { > > + struct batch_buf *next; > > + char buf[16420]; /* sizeof (struct nlmsghdr) + > > + sizeof (struct tcmsg) + > > + MAX_MSG */ > > +}; > > + > > static int batch(const char *name) > > { > > - char *line = NULL; > > + struct batch_buf *head = NULL, *tail = NULL, *buf; > > Hi. Overall it looks much better IMHO. > > Maybe declare *buf in the two sections that it is used, to make it clear that > it > cannot be reused between them? (minor, yes..) Done. > > > + char *largv[100], *largv_next[100]; > > A define for the 100 is probably welcomed, > > > + char *line, *line_next = NULL; > > + bool bs_enabled_next = false; > > + bool bs_enabled = false; > > + bool lastline = false; > > + int largc, largc_next; > > + bool bs_enabled_saved; > > + int batchsize = 0; > > size_t len = 0; > > - int ret = 0; > > + int ret = 0, i; > > + bool send; > > > > batch_mode = 1; > > if (name && strcmp(name, "-") != 0) { @@ -240,23 +268,92 @@ static > > int batch(const char *name) > > } > > > > cmdlineno = 0; > > - while (getcmdline(&line, &len, stdin) != -1) { > > - char *largv[100]; > > - int largc; > > + if (getcmdline(&line, &len, stdin) == -1) > > + goto Exit; > > + largc = makeargs(line, largv, 100); > > + bs_enabled = batchsize_enabled(largc, largv); > > + bs_enabled_saved = bs_enabled; > > + do { > > + if (getcmdline(&line_next, &len, stdin) == -1) > > + lastline = true; > > + > > + largc_next = makeargs(line_next, largv_next, 100); > > + bs_enabled_next = batchsize_enabled(largc_next, > largv_next); > > + if (bs_enabled) { > > + buf = calloc(1, sizeof (struct batch_buf)); > > if (!buf) .. ? > > > + if (head == NULL) { > > + head = tail = buf; > > + buf->next = NULL; > > this > > > + } else { > > + buf->next = NULL; > > and this NULL assignments are not needed because you used calloc(), so it's > already zeroed. Done. > > > + tail->next = buf; > > + tail = buf; > > + } > > + ++batchsize; > > + } > > + > > + /* > > + * In batch mode, if we haven't accumulated enough > commands > > + * and this is not the last command and this command & next > > + * command both support the batchsize feature, don't send > the > > + * message immediately. > > + */ > > + if (!lastline && bs_enabled && bs_enabled_next > > + && batchsize != MSG_IOV_MAX) > > + send = false; > > + else > > + send = true; > > + > > + line = line_next; > > + line_next = NULL; > > + len = 0; > > + bs_enabled_saved = bs_enabled; > > + bs_enabled = bs_enabled_next; > > + bs_enabled_next = false; > > > > - largc = makeargs(line, largv, 100); > > if (largc == 0) > > continue; /* blank line */ > > > > - if (do_cmd(largc, largv)) { > > - fprintf(stderr, "Command failed %s:%d\n", name, > cmdlineno); > > + ret = do_cmd(largc, largv, tail == NULL ? NULL : tail->buf); > > + if (ret != 0) { > > + fprintf(stderr, "Command failed %s:%d\n", name, > > + cmdlineno - 1); > > ret = 1; > > if (!force) > > break; > > } > > - } > > - if (line) > > - free(line); > > + largc = largc_next; > > + for (i = 0; i < largc; i ++) > > + largv[i] = largv_next[i]; > > This loop can be a single memcpy() > memcpy(largv, largv_next, sizeof(largv)); Done. I changed it to: memcpy(largv, largv_next, largc * sizeof(char *)); > > > + > > + if (send && bs_enabled_saved) { > > + struct iovec *iov, *iovs; > > + struct nlmsghdr *n; > > + iov = iovs = malloc(batchsize * sizeof (struct iovec)); > > + for (buf = head; buf != NULL; buf = buf->next, ++iov) > { > > + n = (struct nlmsghdr *)&buf->buf; > > + iov->iov_base = n; > > + iov->iov_len = n->nlmsg_len; > > + } > > + > > + ret = rtnl_talk_iov(&rth, iovs, batchsize, NULL); > > + if (ret < 0) { > > + fprintf(stderr, "Command failed %s:%d\n", > name, > > + cmdlineno - (batchsize + ret) - 1); > > + return 2; > > + } > > + for (buf = head; buf != NULL; buf = head) { > > + head = buf->next; > > + free(buf); > > Maybe we could reuse these buffers? Put them in another list when free > and pull from it when allocating. And allocate a new one if the recycle pool > is > empty. But maybe glibc already does some work for that. I thought slab could deal with it properly. But after changing the code as you suggested, the performance is as good as before. There was a little performance penalty while allocating the memory dynamically.
Now it takes about 12.5 seconds to insert 1 million rules. real 0m12.531s user 0m6.221s sys 0m6.246s > > > That's all my comments in this patchset. Thanks Chris. Thanks for all your comments. That really helps me a lot. > > > + } > > + head = tail = NULL; > > + batchsize = 0; > > + free(iovs); > > + } > > + } while (!lastline); > > + > > +Exit: > > + free(line); > > > > rtnl_close(&rth); > > return ret; > > @@ -341,7 +438,7 @@ int main(int argc, char **argv) > > goto Exit; > > } > > > > - ret = do_cmd(argc-1, argv+1); > > + ret = do_cmd(argc-1, argv+1, NULL); > > Exit: > > rtnl_close(&rth); > > > > diff --git a/tc/tc_common.h b/tc/tc_common.h index 264fbdac..59018da5 > > 100644 > > --- a/tc/tc_common.h > > +++ b/tc/tc_common.h > > @@ -1,13 +1,14 @@ > > /* SPDX-License-Identifier: GPL-2.0 */ > > > > #define TCA_BUF_MAX (64*1024) > > +#define MSG_IOV_MAX 128 > > > > extern struct rtnl_handle rth; > > > > extern int do_qdisc(int argc, char **argv); extern int do_class(int > > argc, char **argv); -extern int do_filter(int argc, char **argv); > > -extern int do_action(int argc, char **argv); > > +extern int do_filter(int argc, char **argv, void *buf); extern int > > +do_action(int argc, char **argv, void *buf); > > extern int do_tcmonitor(int argc, char **argv); extern int > > do_exec(int argc, char **argv); > > > > diff --git a/tc/tc_filter.c b/tc/tc_filter.c index 545cc3a1..7db4964b > > 100644 > > --- a/tc/tc_filter.c > > +++ b/tc/tc_filter.c > > @@ -42,28 +42,38 @@ static void usage(void) > > "OPTIONS := ... try tc filter add <desired FILTER_KIND> > help\n"); > > } > > > > -static int tc_filter_modify(int cmd, unsigned int flags, int argc, > > char **argv) > > +struct tc_filter_req { > > + struct nlmsghdr n; > > + struct tcmsg t; > > + char buf[MAX_MSG]; > > +}; > > + > > +static int tc_filter_modify(int cmd, unsigned int flags, int argc, char > > **argv, > > + void *buf) > > { > > - struct { > > - struct nlmsghdr n; > > - struct tcmsg t; > > - char buf[MAX_MSG]; > > - } req = { > > - .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), > > - .n.nlmsg_flags = NLM_F_REQUEST | flags, > > - .n.nlmsg_type = cmd, > > - .t.tcm_family = AF_UNSPEC, > > - }; > > + struct tc_filter_req *req, filter_req; > > struct filter_util *q = NULL; > > - __u32 prio = 0; > > - __u32 protocol = 0; > > - int protocol_set = 0; > > - __u32 chain_index; > > + struct tc_estimator est = {}; > > + char k[FILTER_NAMESZ] = {}; > > int chain_index_set = 0; > > + char d[IFNAMSIZ] = {}; > > + int protocol_set = 0; > > char *fhandle = NULL; > > - char d[IFNAMSIZ] = {}; > > - char k[FILTER_NAMESZ] = {}; > > - struct tc_estimator est = {}; > > + __u32 protocol = 0; > > + __u32 chain_index; > > + struct iovec iov; > > + __u32 prio = 0; > > + int ret; > > + > > + if (buf) > > + req = buf; > > + else > > + req = &filter_req; > > + > > + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); > > + req->n.nlmsg_flags = NLM_F_REQUEST | flags; > > + req->n.nlmsg_type = cmd; > > + req->t.tcm_family = AF_UNSPEC; > > > > if (cmd == RTM_NEWTFILTER && flags & NLM_F_CREATE) > > protocol = htons(ETH_P_ALL); > > @@ -75,37 +85,37 @@ static int tc_filter_modify(int cmd, unsigned int flags, > int argc, char **argv) > > duparg("dev", *argv); > > strncpy(d, *argv, sizeof(d)-1); > > } else if (strcmp(*argv, "root") == 0) { > > - if (req.t.tcm_parent) { > > + if (req->t.tcm_parent) { > > fprintf(stderr, > > "Error: \"root\" is duplicate parent > ID\n"); > > return -1; > > } > > - req.t.tcm_parent = TC_H_ROOT; > > + req->t.tcm_parent = TC_H_ROOT; > > } else if (strcmp(*argv, "ingress") == 0) { > > - if (req.t.tcm_parent) { > > + if (req->t.tcm_parent) { > > fprintf(stderr, > > "Error: \"ingress\" is duplicate parent > ID\n"); > > return -1; > > } > > - req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, > > + req->t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, > > TC_H_MIN_INGRESS); > > } else if (strcmp(*argv, "egress") == 0) { > > - if (req.t.tcm_parent) { > > + if (req->t.tcm_parent) { > > fprintf(stderr, > > "Error: \"egress\" is duplicate parent > ID\n"); > > return -1; > > } > > - req.t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, > > + req->t.tcm_parent = TC_H_MAKE(TC_H_CLSACT, > > TC_H_MIN_EGRESS); > > } else if (strcmp(*argv, "parent") == 0) { > > __u32 handle; > > > > NEXT_ARG(); > > - if (req.t.tcm_parent) > > + if (req->t.tcm_parent) > > duparg("parent", *argv); > > if (get_tc_classid(&handle, *argv)) > > invarg("Invalid parent ID", *argv); > > - req.t.tcm_parent = handle; > > + req->t.tcm_parent = handle; > > } else if (strcmp(*argv, "handle") == 0) { > > NEXT_ARG(); > > if (fhandle) > > @@ -152,26 +162,26 @@ static int tc_filter_modify(int cmd, unsigned int > flags, int argc, char **argv) > > argc--; argv++; > > } > > > > - req.t.tcm_info = TC_H_MAKE(prio<<16, protocol); > > + req->t.tcm_info = TC_H_MAKE(prio<<16, protocol); > > > > if (chain_index_set) > > - addattr32(&req.n, sizeof(req), TCA_CHAIN, chain_index); > > + addattr32(&req->n, sizeof(*req), TCA_CHAIN, chain_index); > > > > if (k[0]) > > - addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1); > > + addattr_l(&req->n, sizeof(*req), TCA_KIND, k, strlen(k)+1); > > > > if (d[0]) { > > ll_init_map(&rth); > > > > - req.t.tcm_ifindex = ll_name_to_index(d); > > - if (req.t.tcm_ifindex == 0) { > > + req->t.tcm_ifindex = ll_name_to_index(d); > > + if (req->t.tcm_ifindex == 0) { > > fprintf(stderr, "Cannot find device \"%s\"\n", d); > > return 1; > > } > > } > > > > if (q) { > > - if (q->parse_fopt(q, fhandle, argc, argv, &req.n)) > > + if (q->parse_fopt(q, fhandle, argc, argv, &req->n)) > > return 1; > > } else { > > if (fhandle) { > > @@ -190,10 +200,17 @@ static int tc_filter_modify(int cmd, unsigned int > flags, int argc, char **argv) > > } > > > > if (est.ewma_log) > > - addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est)); > > + addattr_l(&req->n, sizeof(*req), TCA_RATE, &est, > sizeof(est)); > > > > - if (rtnl_talk(&rth, &req.n, NULL) < 0) { > > - fprintf(stderr, "We have an error talking to the kernel\n"); > > + iov.iov_base = &req->n; > > + iov.iov_len = req->n.nlmsg_len; > > + > > + if (buf) > > + return 0; > > + > > + ret = rtnl_talk_iov(&rth, &iov, 1, NULL); > > + if (ret < 0) { > > + fprintf(stderr, "We have an error talking to the kernel, %d\n", > > +ret); > > return 2; > > } > > > > @@ -636,20 +653,20 @@ static int tc_filter_list(int argc, char **argv) > > return 0; > > } > > > > -int do_filter(int argc, char **argv) > > +int do_filter(int argc, char **argv, void *buf) > > { > > if (argc < 1) > > return tc_filter_list(0, NULL); > > if (matches(*argv, "add") == 0) > > return tc_filter_modify(RTM_NEWTFILTER, > NLM_F_EXCL|NLM_F_CREATE, > > - argc-1, argv+1); > > + argc-1, argv+1, buf); > > if (matches(*argv, "change") == 0) > > - return tc_filter_modify(RTM_NEWTFILTER, 0, argc-1, argv+1); > > + return tc_filter_modify(RTM_NEWTFILTER, 0, argc-1, argv+1, > buf); > > if (matches(*argv, "replace") == 0) > > return tc_filter_modify(RTM_NEWTFILTER, NLM_F_CREATE, > argc-1, > > - argv+1); > > + argv+1, buf); > > if (matches(*argv, "delete") == 0) > > - return tc_filter_modify(RTM_DELTFILTER, 0, argc-1, argv+1); > > + return tc_filter_modify(RTM_DELTFILTER, 0, argc-1, argv+1, > buf); > > if (matches(*argv, "get") == 0) > > return tc_filter_get(RTM_GETTFILTER, 0, argc-1, argv+1); > > if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0 > > -- > > 2.14.3 > >