From: Pavan Nikhilesh Date: Tue, 29 Jun 2021 07:44:19 +0000 (+0530) Subject: net/cnxk: enable PTP processing in vector Rx X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=7c6bee34064f59fe572d129223e9734bb78025ad;p=dpdk.git net/cnxk: enable PTP processing in vector Rx Enable PTP offload in vector Rx burst function, use vector path for processing mbufs and finally switch to scalar when extracting timestamp. Signed-off-by: Pavan Nikhilesh Acked-by: Nithin Dabilpuram --- diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c index b079edbd35..7caec6cf14 100644 --- a/drivers/net/cnxk/cn10k_ethdev.c +++ b/drivers/net/cnxk/cn10k_ethdev.c @@ -301,7 +301,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev) if (nix_recalc_mtu(eth_dev)) plt_err("Failed to set MTU size for ptp"); - dev->scalar_ena = true; dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F; /* Setting up the function pointers as per new offload flags */ diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c index 3a9fd71309..69e767ac3d 100644 --- a/drivers/net/cnxk/cn10k_rx.c +++ b/drivers/net/cnxk/cn10k_rx.c @@ -75,10 +75,7 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->scalar_ena) { if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); return pick_rx_func(eth_dev, nix_eth_rx_burst); diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 5926ff7f46..d9572b19e7 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -109,7 +109,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, static __rte_always_inline void nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, - uint64_t rearm) + uint64_t rearm, const uint16_t flags) { const rte_iova_t *iova_list; struct rte_mbuf *head; @@ -125,8 +125,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, return; } - mbuf->pkt_len = rx->pkt_lenm1 + 1; - mbuf->data_len = sg & 0xFFFF; + mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); mbuf->nb_segs = nb_segs; sg = sg >> 16; @@ -207,7 +209,7 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, *(uint64_t *)(&mbuf->rearm_data) = val; if (flag & NIX_RX_MULTI_SEG_F) - nix_cqe_xtract_mseg(rx, mbuf, val); + nix_cqe_xtract_mseg(rx, mbuf, val, flag); else mbuf->next = NULL; } @@ -272,8 +274,9 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, flags); cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F), - (uint64_t *)((uint8_t *)mbuf + data_off) - ); + (flags & NIX_RX_MULTI_SEG_F), + (uint64_t *)((uint8_t *)mbuf + + data_off)); rx_pkts[packets++] = mbuf; roc_prefetch_store_keep(mbuf); head++; @@ -469,6 +472,99 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, mbuf3); } + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { + const uint16x8_t len_off = { + 0, /* ptype 0:15 */ + 0, /* ptype 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/ + 0, /* pktlen 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */ + 0, + 0, + 0}; + const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC}; + const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | + PKT_RX_IEEE1588_TMST | + rxq->tstamp->rx_tstamp_dynflag; + const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; + uint64x2_t ts01, ts23, mask; + uint64_t ts[4]; + uint8_t res; + + /* Subtract timesync length from total pkt length. */ + f0 = vsubq_u16(f0, len_off); + f1 = vsubq_u16(f1, len_off); + f2 = vsubq_u16(f2, len_off); + f3 = vsubq_u16(f3, len_off); + + /* Get the address of actual timestamp. */ + ts01 = vaddq_u64(mbuf01, data_off); + ts23 = vaddq_u64(mbuf23, data_off); + /* Load timestamp from address. */ + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 0), + ts01, 0); + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 1), + ts01, 1); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 0), + ts23, 0); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 1), + ts23, 1); + /* Convert from be to cpu byteorder. */ + ts01 = vrev64q_u8(ts01); + ts23 = vrev64q_u8(ts23); + /* Store timestamp into scalar for later use. */ + ts[0] = vgetq_lane_u64(ts01, 0); + ts[1] = vgetq_lane_u64(ts01, 1); + ts[2] = vgetq_lane_u64(ts23, 0); + ts[3] = vgetq_lane_u64(ts23, 1); + + /* Store timestamp into dynfield. */ + *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = + ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = + ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = + ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = + ts[3]; + + /* Generate ptype mask to filter L2 ether timesync */ + mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); + mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1); + mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2); + mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3); + + /* Match against L2 ether timesync. */ + mask = vceqq_u32(mask, ptype); + /* Convert from vector from scalar mask */ + res = vaddvq_u32(vandq_u32(mask, and_mask)); + res &= 0xF; + + if (res) { + /* Fill in the ol_flags for any packets that + * matched. + */ + ol_flags0 |= ((res & 0x1) ? ts_olf : 0); + ol_flags1 |= ((res & 0x2) ? ts_olf : 0); + ol_flags2 |= ((res & 0x4) ? ts_olf : 0); + ol_flags3 |= ((res & 0x8) ? ts_olf : 0); + + /* Update Rxq timestamp with the latest + * timestamp. + */ + rxq->tstamp->rx_ready = 1; + rxq->tstamp->rx_tstamp = + ts[31 - __builtin_clz(res)]; + } + } + /* Form rearm_data with ol_flags */ rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1); rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1); @@ -496,17 +592,17 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer); + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer); + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer); + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer); + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; diff --git a/drivers/net/cnxk/cn10k_rx_vec.c b/drivers/net/cnxk/cn10k_rx_vec.c index 65ffa97841..93528a44f9 100644 --- a/drivers/net/cnxk/cn10k_rx_vec.c +++ b/drivers/net/cnxk/cn10k_rx_vec.c @@ -11,9 +11,6 @@ struct rte_mbuf **rx_pkts, \ uint16_t pkts) \ { \ - /* TSTMP is not supported by vector */ \ - if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \ - return 0; \ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ (flags)); \ } diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c index 994fdb7c3b..115e678916 100644 --- a/drivers/net/cnxk/cn9k_ethdev.c +++ b/drivers/net/cnxk/cn9k_ethdev.c @@ -309,7 +309,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev) if (nix_recalc_mtu(eth_dev)) plt_err("Failed to set MTU size for ptp"); - dev->scalar_ena = true; dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F; /* Setting up the function pointers as per new offload flags */ diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c index d293d4eac3..7d9f1bd61f 100644 --- a/drivers/net/cnxk/cn9k_rx.c +++ b/drivers/net/cnxk/cn9k_rx.c @@ -75,10 +75,7 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev) dev->rx_pkt_burst_no_offload = nix_eth_rx_burst_mseg[0][0][0][0][0][0]; - /* For PTP enabled, scalar rx function should be chosen as most of the - * PTP apps are implemented to rx burst 1 pkt. - */ - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + if (dev->scalar_ena) { if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER) return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg); return pick_rx_func(eth_dev, nix_eth_rx_burst); diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h index 5ae9e8195c..beb52f39d5 100644 --- a/drivers/net/cnxk/cn9k_rx.h +++ b/drivers/net/cnxk/cn9k_rx.h @@ -110,7 +110,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, static __rte_always_inline void nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, - uint64_t rearm) + uint64_t rearm, const uint16_t flags) { const rte_iova_t *iova_list; struct rte_mbuf *head; @@ -126,8 +126,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, return; } - mbuf->pkt_len = rx->pkt_lenm1 + 1; - mbuf->data_len = sg & 0xFFFF; + mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); + mbuf->data_len = (sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? + CNXK_NIX_TIMESYNC_RX_OFFSET : 0); mbuf->nb_segs = nb_segs; sg = sg >> 16; @@ -210,7 +212,7 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, *(uint64_t *)(&mbuf->rearm_data) = val; if (flag & NIX_RX_MULTI_SEG_F) - nix_cqe_xtract_mseg(rx, mbuf, val); + nix_cqe_xtract_mseg(rx, mbuf, val, flag); else mbuf->next = NULL; } @@ -275,8 +277,9 @@ cn9k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, flags); cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F), - (uint64_t *)((uint8_t *)mbuf + data_off) - ); + (flags & NIX_RX_MULTI_SEG_F), + (uint64_t *)((uint8_t *)mbuf + + data_off)); rx_pkts[packets++] = mbuf; roc_prefetch_store_keep(mbuf); head++; @@ -472,6 +475,99 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, mbuf3); } + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) { + const uint16x8_t len_off = { + 0, /* ptype 0:15 */ + 0, /* ptype 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen 0:15*/ + 0, /* pktlen 16:32 */ + CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */ + 0, + 0, + 0}; + const uint32x4_t ptype = {RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC, + RTE_PTYPE_L2_ETHER_TIMESYNC}; + const uint64_t ts_olf = PKT_RX_IEEE1588_PTP | + PKT_RX_IEEE1588_TMST | + rxq->tstamp->rx_tstamp_dynflag; + const uint32x4_t and_mask = {0x1, 0x2, 0x4, 0x8}; + uint64x2_t ts01, ts23, mask; + uint64_t ts[4]; + uint8_t res; + + /* Subtract timesync length from total pkt length. */ + f0 = vsubq_u16(f0, len_off); + f1 = vsubq_u16(f1, len_off); + f2 = vsubq_u16(f2, len_off); + f3 = vsubq_u16(f3, len_off); + + /* Get the address of actual timestamp. */ + ts01 = vaddq_u64(mbuf01, data_off); + ts23 = vaddq_u64(mbuf23, data_off); + /* Load timestamp from address. */ + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 0), + ts01, 0); + ts01 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts01, + 1), + ts01, 1); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 0), + ts23, 0); + ts23 = vsetq_lane_u64(*(uint64_t *)vgetq_lane_u64(ts23, + 1), + ts23, 1); + /* Convert from be to cpu byteorder. */ + ts01 = vrev64q_u8(ts01); + ts23 = vrev64q_u8(ts23); + /* Store timestamp into scalar for later use. */ + ts[0] = vgetq_lane_u64(ts01, 0); + ts[1] = vgetq_lane_u64(ts01, 1); + ts[2] = vgetq_lane_u64(ts23, 0); + ts[3] = vgetq_lane_u64(ts23, 1); + + /* Store timestamp into dynfield. */ + *cnxk_nix_timestamp_dynfield(mbuf0, rxq->tstamp) = + ts[0]; + *cnxk_nix_timestamp_dynfield(mbuf1, rxq->tstamp) = + ts[1]; + *cnxk_nix_timestamp_dynfield(mbuf2, rxq->tstamp) = + ts[2]; + *cnxk_nix_timestamp_dynfield(mbuf3, rxq->tstamp) = + ts[3]; + + /* Generate ptype mask to filter L2 ether timesync */ + mask = vdupq_n_u32(vgetq_lane_u32(f0, 0)); + mask = vsetq_lane_u32(vgetq_lane_u32(f1, 0), mask, 1); + mask = vsetq_lane_u32(vgetq_lane_u32(f2, 0), mask, 2); + mask = vsetq_lane_u32(vgetq_lane_u32(f3, 0), mask, 3); + + /* Match against L2 ether timesync. */ + mask = vceqq_u32(mask, ptype); + /* Convert from vector from scalar mask */ + res = vaddvq_u32(vandq_u32(mask, and_mask)); + res &= 0xF; + + if (res) { + /* Fill in the ol_flags for any packets that + * matched. + */ + ol_flags0 |= ((res & 0x1) ? ts_olf : 0); + ol_flags1 |= ((res & 0x2) ? ts_olf : 0); + ol_flags2 |= ((res & 0x4) ? ts_olf : 0); + ol_flags3 |= ((res & 0x8) ? ts_olf : 0); + + /* Update Rxq timestamp with the latest + * timestamp. + */ + rxq->tstamp->rx_ready = 1; + rxq->tstamp->rx_tstamp = + ts[31 - __builtin_clz(res)]; + } + } + /* Form rearm_data with ol_flags */ rearm0 = vsetq_lane_u64(ol_flags0, rearm0, 1); rearm1 = vsetq_lane_u64(ol_flags1, rearm1, 1); @@ -499,17 +595,17 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, * individual mbufs in scalar mode. */ nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(0) + 8), mbuf0, - mbuf_initializer); + (cq0 + CQE_SZ(0) + 8), mbuf0, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(1) + 8), mbuf1, - mbuf_initializer); + (cq0 + CQE_SZ(1) + 8), mbuf1, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(2) + 8), mbuf2, - mbuf_initializer); + (cq0 + CQE_SZ(2) + 8), mbuf2, + mbuf_initializer, flags); nix_cqe_xtract_mseg((union nix_rx_parse_u *) - (cq0 + CQE_SZ(3) + 8), mbuf3, - mbuf_initializer); + (cq0 + CQE_SZ(3) + 8), mbuf3, + mbuf_initializer, flags); } else { /* Update that no more segments */ mbuf0->next = NULL; diff --git a/drivers/net/cnxk/cn9k_rx_vec.c b/drivers/net/cnxk/cn9k_rx_vec.c index e61c2225c6..ef5f771ef7 100644 --- a/drivers/net/cnxk/cn9k_rx_vec.c +++ b/drivers/net/cnxk/cn9k_rx_vec.c @@ -9,9 +9,6 @@ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \ { \ - /* TSTMP is not supported by vector */ \ - if ((flags) & NIX_RX_OFFLOAD_TSTAMP_F) \ - return 0; \ return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \ (flags)); \ } diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index 67b1f42531..4eead03905 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -136,13 +136,12 @@ struct cnxk_eth_qconf { }; struct cnxk_timesync_info { + uint8_t rx_ready; + uint64_t rx_tstamp; uint64_t rx_tstamp_dynflag; + int tstamp_dynfield_offset; rte_iova_t tx_tstamp_iova; uint64_t *tx_tstamp; - uint64_t rx_tstamp; - int tstamp_dynfield_offset; - uint8_t tx_ready; - uint8_t rx_ready; } __plt_cache_aligned; struct cnxk_eth_dev { @@ -465,13 +464,15 @@ cnxk_nix_timestamp_dynfield(struct rte_mbuf *mbuf, static __rte_always_inline void cnxk_nix_mbuf_to_tstamp(struct rte_mbuf *mbuf, - struct cnxk_timesync_info *tstamp, bool ts_enable, + struct cnxk_timesync_info *tstamp, + const uint8_t ts_enable, const uint8_t mseg_enable, uint64_t *tstamp_ptr) { - if (ts_enable && - (mbuf->data_off == - RTE_PKTMBUF_HEADROOM + CNXK_NIX_TIMESYNC_RX_OFFSET)) { - mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + if (ts_enable) { + if (!mseg_enable) { + mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + mbuf->data_len -= CNXK_NIX_TIMESYNC_RX_OFFSET; + } /* Reading the rx timestamp inserted by CGX, viz at * starting of the packet data.