Hi Xiaoyun,

> -----Original Message-----
> From: dev <dev-boun...@dpdk.org> On Behalf Of Xiaoyun wang
> Sent: Wednesday, September 25, 2019 10:31 PM
> To: ferruh.yi...@intel.com
> Cc: dev@dpdk.org; xuanziya...@huawei.com; shahar.bel...@huawei.com;
> luoxian...@huawei.com; tanya.brokh...@huawei.com;
> zhouguoy...@huawei.com; wuli...@huawei.com; Xiaoyun wang
> <cloud.wangxiao...@huawei.com>
> Subject: [dpdk-dev] [PATCH v2 17/17] net/hinic: optimize tx&rx
> performance
> 
> This patch optimizes receive packets performance
> in arm platform.
> 
> Signed-off-by: Xiaoyun wang <cloud.wangxiao...@huawei.com>
> ---
>  drivers/net/hinic/hinic_pmd_rx.c | 17 +++++++++++++++++
>  drivers/net/hinic/hinic_pmd_rx.h | 11 +++++++++++
>  2 files changed, 28 insertions(+)
> 
> diff --git a/drivers/net/hinic/hinic_pmd_rx.c
> b/drivers/net/hinic/hinic_pmd_rx.c
> index 37b4f5c..94071ee 100644
> --- a/drivers/net/hinic/hinic_pmd_rx.c
> +++ b/drivers/net/hinic/hinic_pmd_rx.c
> @@ -950,6 +950,19 @@ void hinic_rx_alloc_pkts(struct hinic_rxq *rxq)
>       }
>  }
> 
> +#if defined(__ARM64_NEON__)
No NEON intrinsics used, maybe RTE_ARCH_ARM64 is better. 
In the following line __rte_always_inline is commonly used in DPDK, the effect 
is same.
/Gavin

> +static inline uint32_t __attribute__((always_inline))
> +hinic_read_cqe_status(uintptr_t addr)
> +{
> +     uint32_t val;
> +
> +     asm volatile("ldar %x[val], [%x[addr]]"
> +             : [val] "=r" (val)
> +             : [addr] "r" (addr));
> +     return val;
> +}
> +#endif
I understand your intention is the reading of the status is observed before the 
following reads.
This can be fulfilled by __atomic_load_n(...) with __ATOMIC_ACQUIRE semantics. 
This C11 way applies to all the arches, and you don't need the differentiation 
of arches. 
/Gavin
> +
>  u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16
> nb_pkts)
>  {
>       struct rte_mbuf *rxm;
> @@ -972,7 +985,11 @@ u16 hinic_recv_pkts(void *rx_queue, struct
> rte_mbuf **rx_pkts, u16 nb_pkts)
>       while (pkts < nb_pkts) {
>                /* 2. current ci is done */
>               rx_cqe = &rxq->rx_cqe[sw_ci];
> +#if defined(__X86_64_SSE__)
>               status = rx_cqe->status;
> +#elif defined(__ARM64_NEON__)
> +             status = hinic_read_cqe_status((uintptr_t)&rxq-
> >rx_cqe[sw_ci]);
> +#endif
>               if (!HINIC_GET_RX_DONE_BE(status))
>                       break;
> 
> diff --git a/drivers/net/hinic/hinic_pmd_rx.h
> b/drivers/net/hinic/hinic_pmd_rx.h
> index fe2735b..fa27e91 100644
> --- a/drivers/net/hinic/hinic_pmd_rx.h
> +++ b/drivers/net/hinic/hinic_pmd_rx.h
> @@ -28,6 +28,7 @@ struct hinic_rq_ctrl {
>       u32     ctrl_fmt;
>  };
> 
> +#if defined(__X86_64_SSE__)
>  struct hinic_rq_cqe {
>       u32 status;
>       u32 vlan_len;
> @@ -36,6 +37,16 @@ struct hinic_rq_cqe {
> 
>       u32 rsvd[4];
>  };
> +#elif defined(__ARM64_NEON__)
> +struct hinic_rq_cqe {
> +     u32 status;
> +     u32 vlan_len;
> +     u32 offload_type;
> +     u32 rss_hash;
> +
> +     u32 rsvd[4];
> +} __rte_cache_aligned;
> +#endif
> 
>  struct hinic_rq_cqe_sect {
>       struct hinic_sge        sge;
> --
> 1.8.3.1

Reply via email to