From: Pavan Nikhilesh Date: Tue, 29 Jun 2021 07:44:20 +0000 (+0530) Subject: net/cnxk: enable VLAN processing in vector Tx X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=3186a3a49c3a33502ba6189a80b8317c0a064830;p=dpdk.git net/cnxk: enable VLAN processing in vector Tx Enable VLAN offload in vector Tx burst function. Signed-off-by: Pavan Nikhilesh Acked-by: Nithin Dabilpuram --- diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c index 18694dc704..05bc163a40 100644 --- a/drivers/net/cnxk/cn10k_tx.c +++ b/drivers/net/cnxk/cn10k_tx.c @@ -69,8 +69,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev) if (dev->scalar_ena || (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | - NIX_TX_OFFLOAD_TSO_F))) + (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h index 8b1446f25c..1e16978584 100644 --- a/drivers/net/cnxk/cn10k_tx.h +++ b/drivers/net/cnxk/cn10k_tx.h @@ -62,9 +62,14 @@ cn10k_nix_tx_ext_subs(const uint16_t flags) static __rte_always_inline uint8_t cn10k_nix_pkts_per_vec_brst(const uint16_t flags) { - RTE_SET_USED(flags); - /* We can pack up to 4 packets per LMTLINE if there are no offloads. */ - return 4 << ROC_LMT_LINES_PER_CORE_LOG2; + return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4) + << ROC_LMT_LINES_PER_CORE_LOG2; +} + +static __rte_always_inline uint8_t +cn10k_nix_tx_dwords_per_line(const uint16_t flags) +{ + return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8; } static __rte_always_inline uint64_t @@ -98,10 +103,9 @@ cn10k_nix_tx_steor_data(const uint16_t flags) static __rte_always_inline uint64_t cn10k_nix_tx_steor_vec_data(const uint16_t flags) { - const uint64_t dw_m1 = 0x7; + const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1; uint64_t data; - RTE_SET_USED(flags); /* This will be moved to addr area */ data = dw_m1; /* 15 vector sizes for single seg */ @@ -690,11 +694,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; - uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP]; + uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], + cmd2[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; uint16_t left, scalar, burst, i, lmt_id; + uint64x2_t sendext01_w0, sendext23_w0; + uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn10k_eth_txq *txq = tx_queue; @@ -720,6 +727,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0); sgdesc23_w0 = sgdesc01_w0; + /* Load command defaults into vector variables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]); + sendext23_w0 = sendext01_w0; + sendext01_w1 = vdupq_n_u64(12 | 12U << 24); + sendext23_w1 = sendext01_w1; + } + /* Get LMT base address and LMT ID as lcore id */ ROC_LMT_BASE_ID_GET(laddr, lmt_id); left = pkts; @@ -738,6 +753,13 @@ again: senddesc23_w0 = senddesc01_w0; sgdesc23_w0 = sgdesc01_w0; + /* Clear vlan enables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w1 = vbicq_u64(sendext01_w1, + vdupq_n_u64(0x3FFFF00FFFF00)); + sendext23_w1 = sendext01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1303,6 +1325,52 @@ again: senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { + /* Tx ol_flag for vlan. */ + const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN}; + /* Bit enable for VLAN1 */ + const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)}; + /* Tx ol_flag for QnQ. */ + const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ}; + /* Bit enable for VLAN0 */ + const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)}; + /* Load vlan values from packet. outer is VLAN 0 */ + uint64x2_t ext01 = { + ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[0]->vlan_tci) << 32, + ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[1]->vlan_tci) << 32, + }; + uint64x2_t ext23 = { + ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[2]->vlan_tci) << 32, + ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[3]->vlan_tci) << 32, + }; + + /* Get ol_flags of the packets. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* ORR vlan outer/inner values into cmd. */ + sendext01_w1 = vorrq_u64(sendext01_w1, ext01); + sendext23_w1 = vorrq_u64(sendext23_w1, ext23); + + /* Test for offload enable bits and generate masks. */ + xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv), + mlv), + vandq_u64(vtstq_u64(xtmp128, olq), + mlq)); + ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv), + mlv), + vandq_u64(vtstq_u64(ytmp128, olq), + mlq)); + + /* Set vlan enable bits into cmd based on mask. */ + sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128); + sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1381,16 +1449,41 @@ again: cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1); cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1); - /* Store the prepared send desc to LMT lines */ - vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]); - vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]); - vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]); - vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]); - vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]); - lnum += 1; + if (flags & NIX_TX_NEED_EXT_HDR) { + cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1); + cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1); + cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1); + cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); + } + + if (flags & NIX_TX_NEED_EXT_HDR) { + /* Store the prepared send desc to LMT lines */ + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]); + lnum += 1; + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]); + lnum += 1; + } else { + /* Store the prepared send desc to LMT lines */ + vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]); + vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]); + vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]); + vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]); + vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]); + lnum += 1; + } tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c index 7453f3bc98..beb5c649bb 100644 --- a/drivers/net/cnxk/cn10k_tx_vec.c +++ b/drivers/net/cnxk/cn10k_tx_vec.c @@ -14,8 +14,7 @@ uint64_t cmd[sz]; \ \ /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \ - (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ + if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ (flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\ diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c index b802606075..4b43cdaff9 100644 --- a/drivers/net/cnxk/cn9k_tx.c +++ b/drivers/net/cnxk/cn9k_tx.c @@ -68,8 +68,7 @@ cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev) if (dev->scalar_ena || (dev->tx_offload_flags & - (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | - NIX_TX_OFFLOAD_TSO_F))) + (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F))) pick_tx_func(eth_dev, nix_eth_tx_burst); else pick_tx_func(eth_dev, nix_eth_tx_vec_burst); diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h index 1899d6670f..d5715bb52d 100644 --- a/drivers/net/cnxk/cn9k_tx.h +++ b/drivers/net/cnxk/cn9k_tx.h @@ -552,10 +552,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, { uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3; uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3; - uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP]; + uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP], + cmd2[NIX_DESCS_PER_LOOP]; uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3; uint64x2_t senddesc01_w0, senddesc23_w0; uint64x2_t senddesc01_w1, senddesc23_w1; + uint64x2_t sendext01_w0, sendext23_w0; + uint64x2_t sendext01_w1, sendext23_w1; uint64x2_t sgdesc01_w0, sgdesc23_w0; uint64x2_t sgdesc01_w1, sgdesc23_w1; struct cn9k_eth_txq *txq = tx_queue; @@ -585,8 +588,19 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; senddesc01_w1 = vdupq_n_u64(0); senddesc23_w1 = senddesc01_w1; - sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); - sgdesc23_w0 = sgdesc01_w0; + + /* Load command defaults into vector variables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sendext23_w0 = sendext01_w0; + sendext01_w1 = vdupq_n_u64(12 | 12U << 24); + sendext23_w1 = sendext01_w1; + sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]); + sgdesc23_w0 = sgdesc01_w0; + } else { + sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]); + sgdesc23_w0 = sgdesc01_w0; + } for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) { /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */ @@ -597,6 +611,13 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc23_w0 = senddesc01_w0; sgdesc23_w0 = sgdesc01_w0; + /* Clear vlan enables. */ + if (flags & NIX_TX_NEED_EXT_HDR) { + sendext01_w1 = vbicq_u64(sendext01_w1, + vdupq_n_u64(0x3FFFF00FFFF00)); + sendext23_w1 = sendext01_w1; + } + /* Move mbufs to iova */ mbuf0 = (uint64_t *)tx_pkts[0]; mbuf1 = (uint64_t *)tx_pkts[1]; @@ -1162,6 +1183,52 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01); senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23); + if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) { + /* Tx ol_flag for vlan. */ + const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN}; + /* Bit enable for VLAN1 */ + const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)}; + /* Tx ol_flag for QnQ. */ + const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ}; + /* Bit enable for VLAN0 */ + const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)}; + /* Load vlan values from packet. outer is VLAN 0 */ + uint64x2_t ext01 = { + ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[0]->vlan_tci) << 32, + ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[1]->vlan_tci) << 32, + }; + uint64x2_t ext23 = { + ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[2]->vlan_tci) << 32, + ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 | + ((uint64_t)tx_pkts[3]->vlan_tci) << 32, + }; + + /* Get ol_flags of the packets. */ + xtmp128 = vzip1q_u64(len_olflags0, len_olflags1); + ytmp128 = vzip1q_u64(len_olflags2, len_olflags3); + + /* ORR vlan outer/inner values into cmd. */ + sendext01_w1 = vorrq_u64(sendext01_w1, ext01); + sendext23_w1 = vorrq_u64(sendext23_w1, ext23); + + /* Test for offload enable bits and generate masks. */ + xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv), + mlv), + vandq_u64(vtstq_u64(xtmp128, olq), + mlq)); + ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv), + mlv), + vandq_u64(vtstq_u64(ytmp128, olq), + mlq)); + + /* Set vlan enable bits into cmd based on mask. */ + sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128); + sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128); + } + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { /* Set don't free bit if reference count > 1 */ xmask01 = vdupq_n_u64(0); @@ -1247,17 +1314,50 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts, cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1); cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1); - do { - vst1q_u64(lmt_addr, cmd0[0]); - vst1q_u64(lmt_addr + 2, cmd1[0]); - vst1q_u64(lmt_addr + 4, cmd0[1]); - vst1q_u64(lmt_addr + 6, cmd1[1]); - vst1q_u64(lmt_addr + 8, cmd0[2]); - vst1q_u64(lmt_addr + 10, cmd1[2]); - vst1q_u64(lmt_addr + 12, cmd0[3]); - vst1q_u64(lmt_addr + 14, cmd1[3]); - lmt_status = roc_lmt_submit_ldeor(io_addr); - } while (lmt_status == 0); + if (flags & NIX_TX_NEED_EXT_HDR) { + cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1); + cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1); + cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1); + cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1); + } + + if (flags & NIX_TX_NEED_EXT_HDR) { + /* With ext header in the command we can no longer send + * all 4 packets together since LMTLINE is 128bytes. + * Split and Tx twice. + */ + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd2[0]); + vst1q_u64(lmt_addr + 4, cmd1[0]); + vst1q_u64(lmt_addr + 6, cmd0[1]); + vst1q_u64(lmt_addr + 8, cmd2[1]); + vst1q_u64(lmt_addr + 10, cmd1[1]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + + do { + vst1q_u64(lmt_addr, cmd0[2]); + vst1q_u64(lmt_addr + 2, cmd2[2]); + vst1q_u64(lmt_addr + 4, cmd1[2]); + vst1q_u64(lmt_addr + 6, cmd0[3]); + vst1q_u64(lmt_addr + 8, cmd2[3]); + vst1q_u64(lmt_addr + 10, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + } else { + do { + vst1q_u64(lmt_addr, cmd0[0]); + vst1q_u64(lmt_addr + 2, cmd1[0]); + vst1q_u64(lmt_addr + 4, cmd0[1]); + vst1q_u64(lmt_addr + 6, cmd1[1]); + vst1q_u64(lmt_addr + 8, cmd0[2]); + vst1q_u64(lmt_addr + 10, cmd1[2]); + vst1q_u64(lmt_addr + 12, cmd0[3]); + vst1q_u64(lmt_addr + 14, cmd1[3]); + lmt_status = roc_lmt_submit_ldeor(io_addr); + } while (lmt_status == 0); + } tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP; } diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c index a6e7c9e542..5842facb58 100644 --- a/drivers/net/cnxk/cn9k_tx_vec.c +++ b/drivers/net/cnxk/cn9k_tx_vec.c @@ -14,8 +14,7 @@ uint64_t cmd[sz]; \ \ /* VLAN, TSTMP, TSO is not supported by vec */ \ - if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \ - (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ + if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F || \ (flags) & NIX_TX_OFFLOAD_TSO_F) \ return 0; \ return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \