Hi Xiaoyun, > -----Original Message----- > From: dev <dev-boun...@dpdk.org> On Behalf Of Xiaoyun wang > Sent: Wednesday, September 25, 2019 10:31 PM > To: ferruh.yi...@intel.com > Cc: dev@dpdk.org; xuanziya...@huawei.com; shahar.bel...@huawei.com; > luoxian...@huawei.com; tanya.brokh...@huawei.com; > zhouguoy...@huawei.com; wuli...@huawei.com; Xiaoyun wang > <cloud.wangxiao...@huawei.com> > Subject: [dpdk-dev] [PATCH v2 17/17] net/hinic: optimize tx&rx > performance > > This patch optimizes receive packets performance > in arm platform. > > Signed-off-by: Xiaoyun wang <cloud.wangxiao...@huawei.com> > --- > drivers/net/hinic/hinic_pmd_rx.c | 17 +++++++++++++++++ > drivers/net/hinic/hinic_pmd_rx.h | 11 +++++++++++ > 2 files changed, 28 insertions(+) > > diff --git a/drivers/net/hinic/hinic_pmd_rx.c > b/drivers/net/hinic/hinic_pmd_rx.c > index 37b4f5c..94071ee 100644 > --- a/drivers/net/hinic/hinic_pmd_rx.c > +++ b/drivers/net/hinic/hinic_pmd_rx.c > @@ -950,6 +950,19 @@ void hinic_rx_alloc_pkts(struct hinic_rxq *rxq) > } > } > > +#if defined(__ARM64_NEON__) No NEON intrinsics used, maybe RTE_ARCH_ARM64 is better. In the following line __rte_always_inline is commonly used in DPDK, the effect is same. /Gavin
> +static inline uint32_t __attribute__((always_inline)) > +hinic_read_cqe_status(uintptr_t addr) > +{ > + uint32_t val; > + > + asm volatile("ldar %x[val], [%x[addr]]" > + : [val] "=r" (val) > + : [addr] "r" (addr)); > + return val; > +} > +#endif I understand your intention is the reading of the status is observed before the following reads. This can be fulfilled by __atomic_load_n(...) with __ATOMIC_ACQUIRE semantics. This C11 way applies to all the arches, and you don't need the differentiation of arches. /Gavin > + > u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 > nb_pkts) > { > struct rte_mbuf *rxm; > @@ -972,7 +985,11 @@ u16 hinic_recv_pkts(void *rx_queue, struct > rte_mbuf **rx_pkts, u16 nb_pkts) > while (pkts < nb_pkts) { > /* 2. current ci is done */ > rx_cqe = &rxq->rx_cqe[sw_ci]; > +#if defined(__X86_64_SSE__) > status = rx_cqe->status; > +#elif defined(__ARM64_NEON__) > + status = hinic_read_cqe_status((uintptr_t)&rxq- > >rx_cqe[sw_ci]); > +#endif > if (!HINIC_GET_RX_DONE_BE(status)) > break; > > diff --git a/drivers/net/hinic/hinic_pmd_rx.h > b/drivers/net/hinic/hinic_pmd_rx.h > index fe2735b..fa27e91 100644 > --- a/drivers/net/hinic/hinic_pmd_rx.h > +++ b/drivers/net/hinic/hinic_pmd_rx.h > @@ -28,6 +28,7 @@ struct hinic_rq_ctrl { > u32 ctrl_fmt; > }; > > +#if defined(__X86_64_SSE__) > struct hinic_rq_cqe { > u32 status; > u32 vlan_len; > @@ -36,6 +37,16 @@ struct hinic_rq_cqe { > > u32 rsvd[4]; > }; > +#elif defined(__ARM64_NEON__) > +struct hinic_rq_cqe { > + u32 status; > + u32 vlan_len; > + u32 offload_type; > + u32 rss_hash; > + > + u32 rsvd[4]; > +} __rte_cache_aligned; > +#endif > > struct hinic_rq_cqe_sect { > struct hinic_sge sge; > -- > 1.8.3.1