From 100f6992429e451206509db036ef5dbe79adedb1 Mon Sep 17 00:00:00 2001 From: Harman Kalra Date: Mon, 16 Mar 2020 15:03:44 +0530 Subject: [PATCH] net/octeontx: support Rx/Tx checksum offload This patch implements rx/tx checksum offload. In case of wrong checksum received (inner/outer l3/l4) it reports the corresponding layer which has bad checksum and also corrects it if hw checksum is enabled on tx side. Signed-off-by: Harman Kalra --- doc/guides/nics/features/octeontx.ini | 4 + drivers/net/octeontx/octeontx_ethdev.c | 22 ++- drivers/net/octeontx/octeontx_ethdev.h | 21 ++- drivers/net/octeontx/octeontx_rxtx.c | 12 +- drivers/net/octeontx/octeontx_rxtx.h | 194 +++++++++++++++++++++++-- 5 files changed, 228 insertions(+), 25 deletions(-) diff --git a/doc/guides/nics/features/octeontx.ini b/doc/guides/nics/features/octeontx.ini index 6049c1c432..8a95c216cd 100644 --- a/doc/guides/nics/features/octeontx.ini +++ b/doc/guides/nics/features/octeontx.ini @@ -17,6 +17,10 @@ Unicast MAC filter = Y VLAN filter = Y VLAN offload = P CRC offload = Y +L3 checksum offload = Y +L4 checksum offload = Y +Inner L3 checksum = Y +Inner L4 checksum = Y Packet type parsing = Y Flow control = Y Basic stats = Y diff --git a/drivers/net/octeontx/octeontx_ethdev.c b/drivers/net/octeontx/octeontx_ethdev.c index 1918696834..ea3b278a15 100644 --- a/drivers/net/octeontx/octeontx_ethdev.c +++ b/drivers/net/octeontx/octeontx_ethdev.c @@ -370,6 +370,16 @@ octeontx_tx_offload_flags(struct rte_eth_dev *eth_dev) struct octeontx_nic *nic = octeontx_pmd_priv(eth_dev); uint16_t flags = 0; + if (nic->tx_offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM || + nic->tx_offloads & DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) + flags |= OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F; + + if (nic->tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM || + nic->tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM || + nic->tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM || + nic->tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) + flags |= OCCTX_TX_OFFLOAD_L3_L4_CSUM_F; + if (!(nic->tx_offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE)) flags |= OCCTX_TX_OFFLOAD_MBUF_NOFF_F; @@ -383,13 +393,15 @@ static uint16_t octeontx_rx_offload_flags(struct rte_eth_dev *eth_dev) { struct octeontx_nic *nic = octeontx_pmd_priv(eth_dev); - struct rte_eth_dev_data *data = eth_dev->data; - struct rte_eth_conf *conf = &data->dev_conf; - struct rte_eth_rxmode *rxmode = &conf->rxmode; uint16_t flags = 0; - if (rxmode->mq_mode == ETH_MQ_RX_RSS) - flags |= OCCTX_RX_OFFLOAD_RSS_F; + if (nic->rx_offloads & (DEV_RX_OFFLOAD_TCP_CKSUM | + DEV_RX_OFFLOAD_UDP_CKSUM)) + flags |= OCCTX_RX_OFFLOAD_CSUM_F; + + if (nic->rx_offloads & (DEV_RX_OFFLOAD_IPV4_CKSUM | + DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)) + flags |= OCCTX_RX_OFFLOAD_CSUM_F; if (nic->rx_offloads & DEV_RX_OFFLOAD_SCATTER) { flags |= OCCTX_RX_MULTI_SEG_F; diff --git a/drivers/net/octeontx/octeontx_ethdev.h b/drivers/net/octeontx/octeontx_ethdev.h index dc53b53be0..7246fb6d1d 100644 --- a/drivers/net/octeontx/octeontx_ethdev.h +++ b/drivers/net/octeontx/octeontx_ethdev.h @@ -53,13 +53,24 @@ #define OCCTX_MAX_MTU (OCCTX_MAX_FRS - OCCTX_L2_OVERHEAD) -#define OCTEONTX_RX_OFFLOADS (DEV_RX_OFFLOAD_CHECKSUM | \ - DEV_RX_OFFLOAD_SCATTER | \ - DEV_RX_OFFLOAD_JUMBO_FRAME | \ +#define OCTEONTX_RX_OFFLOADS ( \ + DEV_RX_OFFLOAD_CHECKSUM | \ + DEV_RX_OFFLOAD_SCTP_CKSUM | \ + DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | \ + DEV_RX_OFFLOAD_SCATTER | \ + DEV_RX_OFFLOAD_SCATTER | \ + DEV_RX_OFFLOAD_JUMBO_FRAME | \ DEV_RX_OFFLOAD_VLAN_FILTER) -#define OCTEONTX_TX_OFFLOADS (DEV_TX_OFFLOAD_MT_LOCKFREE | \ - DEV_TX_OFFLOAD_MBUF_FAST_FREE | \ +#define OCTEONTX_TX_OFFLOADS ( \ + DEV_TX_OFFLOAD_MBUF_FAST_FREE | \ + DEV_TX_OFFLOAD_MT_LOCKFREE | \ + DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | \ + DEV_TX_OFFLOAD_OUTER_UDP_CKSUM | \ + DEV_TX_OFFLOAD_IPV4_CKSUM | \ + DEV_TX_OFFLOAD_TCP_CKSUM | \ + DEV_TX_OFFLOAD_UDP_CKSUM | \ + DEV_TX_OFFLOAD_SCTP_CKSUM | \ DEV_TX_OFFLOAD_MULTI_SEGS) static inline struct octeontx_nic * diff --git a/drivers/net/octeontx/octeontx_rxtx.c b/drivers/net/octeontx/octeontx_rxtx.c index 4cdc5a0f9b..bbe43a874b 100644 --- a/drivers/net/octeontx/octeontx_rxtx.c +++ b/drivers/net/octeontx/octeontx_rxtx.c @@ -41,8 +41,8 @@ octeontx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) return count; /* return number of pkts received */ } -#define T(name, f1, f0, sz, flags) \ -static uint16_t __rte_noinline __rte_hot \ +#define T(name, f3, f2, f1, f0, sz, flags) \ +static uint16_t __rte_noinline __rte_hot \ octeontx_xmit_pkts_ ##name(void *tx_queue, \ struct rte_mbuf **tx_pkts, uint16_t pkts) \ { \ @@ -60,9 +60,9 @@ octeontx_set_tx_function(struct rte_eth_dev *dev) { struct octeontx_nic *nic = octeontx_pmd_priv(dev); - const eth_tx_burst_t tx_burst_func[2][2] = { -#define T(name, f1, f0, sz, flags) \ - [f1][f0] = octeontx_xmit_pkts_ ##name, + const eth_tx_burst_t tx_burst_func[2][2][2][2] = { +#define T(name, f3, f2, f1, f0, sz, flags) \ + [f3][f2][f1][f0] = octeontx_xmit_pkts_ ##name, OCCTX_TX_FASTPATH_MODES #undef T @@ -70,5 +70,7 @@ OCCTX_TX_FASTPATH_MODES dev->tx_pkt_burst = tx_burst_func [!!(nic->tx_offload_flags & OCCTX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(nic->tx_offload_flags & OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(nic->tx_offload_flags & OCCTX_TX_OFFLOAD_L3_L4_CSUM_F)] [!!(nic->tx_offload_flags & OCCTX_TX_MULTI_SEG_F)]; } diff --git a/drivers/net/octeontx/octeontx_rxtx.h b/drivers/net/octeontx/octeontx_rxtx.h index 35ba271385..5e607f1705 100644 --- a/drivers/net/octeontx/octeontx_rxtx.h +++ b/drivers/net/octeontx/octeontx_rxtx.h @@ -14,17 +14,65 @@ #define BIT(nr) (1UL << (nr)) #define OCCTX_RX_OFFLOAD_NONE (0) -#define OCCTX_RX_OFFLOAD_RSS_F BIT(0) -#define OCCTX_RX_VLAN_FLTR_F BIT(1) -#define OCCTX_RX_MULTI_SEG_F BIT(15) +#define OCCTX_RX_MULTI_SEG_F BIT(0) +#define OCCTX_RX_OFFLOAD_CSUM_F BIT(1) +#define OCCTX_RX_VLAN_FLTR_F BIT(2) #define OCCTX_TX_OFFLOAD_NONE (0) +#define OCCTX_TX_MULTI_SEG_F BIT(0) +#define OCCTX_TX_OFFLOAD_L3_L4_CSUM_F BIT(1) +#define OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(2) #define OCCTX_TX_OFFLOAD_MBUF_NOFF_F BIT(3) -#define OCCTX_TX_MULTI_SEG_F BIT(15) /* Packet type table */ #define PTYPE_SIZE OCCTX_PKI_LTYPE_LAST +/* octeontx send header sub descriptor structure */ +RTE_STD_C11 +union octeontx_send_hdr_w0_u { + uint64_t u; + struct { + uint64_t total : 16; + uint64_t markptr : 8; + uint64_t l3ptr : 8; + uint64_t l4ptr : 8; + uint64_t ii : 1; + uint64_t shp_dis : 1; + uint64_t ckle : 1; + uint64_t cklf : 2; + uint64_t ckl3 : 1; + uint64_t ckl4 : 2; + uint64_t p : 1; + uint64_t format : 7; + uint64_t tstamp : 1; + uint64_t tso_eom : 1; + uint64_t df : 1; + uint64_t tso : 1; + uint64_t n2 : 1; + uint64_t scntn1 : 3; + }; +}; + +RTE_STD_C11 +union octeontx_send_hdr_w1_u { + uint64_t u; + struct { + uint64_t tso_mss : 14; + uint64_t shp_ra : 2; + uint64_t tso_sb : 8; + uint64_t leptr : 8; + uint64_t lfptr : 8; + uint64_t shp_chg : 9; + uint64_t tso_fn : 7; + uint64_t l2len : 8; + }; +}; + +struct octeontx_send_hdr_s { + union octeontx_send_hdr_w0_u w0; + union octeontx_send_hdr_w1_u w1; +}; + static const uint32_t __rte_cache_aligned ptype_table[PTYPE_SIZE][PTYPE_SIZE][PTYPE_SIZE] = { [LC_NONE][LE_NONE][LF_NONE] = RTE_PTYPE_UNKNOWN, @@ -178,6 +226,90 @@ octeontx_prefree_seg(struct rte_mbuf *m) return 1; } +static __rte_always_inline void +octeontx_tx_checksum_offload(uint64_t *cmd_buf, const uint16_t flags, + struct rte_mbuf *m) +{ + struct octeontx_send_hdr_s *send_hdr = + (struct octeontx_send_hdr_s *)cmd_buf; + uint64_t ol_flags = m->ol_flags; + + /* PKO Checksum L4 Algorithm Enumeration + * 0x0 - No checksum + * 0x1 - UDP L4 checksum + * 0x2 - TCP L4 checksum + * 0x3 - SCTP L4 checksum + */ + const uint8_t csum = (!(((ol_flags ^ PKT_TX_UDP_CKSUM) >> 52) & 0x3) + + (!(((ol_flags ^ PKT_TX_TCP_CKSUM) >> 52) & 0x3) * 2) + + (!(((ol_flags ^ PKT_TX_SCTP_CKSUM) >> 52) & 0x3) * 3)); + + const uint8_t is_tunnel_parsed = (!!(ol_flags & PKT_TX_TUNNEL_GTP) || + !!(ol_flags & PKT_TX_TUNNEL_VXLAN_GPE) || + !!(ol_flags & PKT_TX_TUNNEL_VXLAN) || + !!(ol_flags & PKT_TX_TUNNEL_GRE) || + !!(ol_flags & PKT_TX_TUNNEL_GENEVE) || + !!(ol_flags & PKT_TX_TUNNEL_IP) || + !!(ol_flags & PKT_TX_TUNNEL_IPIP)); + + const uint8_t csum_outer = (!!(ol_flags & PKT_TX_OUTER_UDP_CKSUM) || + !!(ol_flags & PKT_TX_TUNNEL_UDP)); + const uint8_t outer_l2_len = m->outer_l2_len; + const uint8_t l2_len = m->l2_len; + + if ((flags & OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F) && + (flags & OCCTX_TX_OFFLOAD_L3_L4_CSUM_F)) { + if (is_tunnel_parsed) { + /* Outer L3 */ + send_hdr->w0.l3ptr = outer_l2_len; + send_hdr->w0.l4ptr = outer_l2_len + m->outer_l3_len; + /* Set clk3 for PKO to calculate IPV4 header checksum */ + send_hdr->w0.ckl3 = !!(ol_flags & PKT_TX_OUTER_IPV4); + + /* Outer L4 */ + send_hdr->w0.ckl4 = csum_outer; + + /* Inner L3 */ + send_hdr->w1.leptr = send_hdr->w0.l4ptr + l2_len; + send_hdr->w1.lfptr = send_hdr->w1.leptr + m->l3_len; + /* Set clke for PKO to calculate inner IPV4 header + * checksum. + */ + send_hdr->w0.ckle = !!(ol_flags & PKT_TX_IPV4); + + /* Inner L4 */ + send_hdr->w0.cklf = csum; + } else { + /* Inner L3 */ + send_hdr->w0.l3ptr = l2_len; + send_hdr->w0.l4ptr = l2_len + m->l3_len; + /* Set clk3 for PKO to calculate IPV4 header checksum */ + send_hdr->w0.ckl3 = !!(ol_flags & PKT_TX_IPV4); + + /* Inner L4 */ + send_hdr->w0.ckl4 = csum; + } + } else if (flags & OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F) { + /* Outer L3 */ + send_hdr->w0.l3ptr = outer_l2_len; + send_hdr->w0.l4ptr = outer_l2_len + m->outer_l3_len; + /* Set clk3 for PKO to calculate IPV4 header checksum */ + send_hdr->w0.ckl3 = !!(ol_flags & PKT_TX_OUTER_IPV4); + + /* Outer L4 */ + send_hdr->w0.ckl4 = csum_outer; + } else if (flags & OCCTX_TX_OFFLOAD_L3_L4_CSUM_F) { + /* Inner L3 */ + send_hdr->w0.l3ptr = l2_len; + send_hdr->w0.l4ptr = l2_len + m->l3_len; + /* Set clk3 for PKO to calculate IPV4 header checksum */ + send_hdr->w0.ckl3 = !!(ol_flags & PKT_TX_IPV4); + + /* Inner L4 */ + send_hdr->w0.ckl4 = csum; + } +} + static __rte_always_inline uint16_t __octeontx_xmit_prepare(struct rte_mbuf *tx_pkt, uint64_t *cmd_buf, const uint16_t flag) @@ -188,6 +320,11 @@ __octeontx_xmit_prepare(struct rte_mbuf *tx_pkt, uint64_t *cmd_buf, cmd_buf[nb_desc++] = tx_pkt->data_len & 0xffff; cmd_buf[nb_desc++] = 0x0; + /* Enable tx checksum offload */ + if ((flag & OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F) || + (flag & OCCTX_TX_OFFLOAD_L3_L4_CSUM_F)) + octeontx_tx_checksum_offload(cmd_buf, flag, tx_pkt); + /* SEND_HDR[DF] bit controls if buffer is to be freed or * not, as SG_DESC[I] and SEND_HDR[II] are clear. */ @@ -226,6 +363,11 @@ __octeontx_xmit_mseg_prepare(struct rte_mbuf *tx_pkt, uint64_t *cmd_buf, cmd_buf[nb_desc++] = tx_pkt->pkt_len & 0xffff; cmd_buf[nb_desc++] = 0x0; + /* Enable tx checksum offload */ + if ((flag & OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F) || + (flag & OCCTX_TX_OFFLOAD_L3_L4_CSUM_F)) + octeontx_tx_checksum_offload(cmd_buf, flag, tx_pkt); + do { m_next = tx_pkt->next; /* To handle case where mbufs belong to diff pools, like @@ -301,13 +443,45 @@ __octeontx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t octeontx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +#define L3L4CSUM_F OCCTX_TX_OFFLOAD_L3_L4_CSUM_F +#define OL3OL4CSUM_F OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F #define NOFF_F OCCTX_TX_OFFLOAD_MBUF_NOFF_F #define MULT_F OCCTX_TX_MULTI_SEG_F -/* [NOFF] [MULTI_SEG] */ -#define OCCTX_TX_FASTPATH_MODES \ -T(no_offload, 0, 0, 4, OCCTX_TX_OFFLOAD_NONE) \ -T(mseg, 0, 1, 14, MULT_F) \ -T(noff, 1, 0, 4, NOFF_F) \ -T(noff_mseg, 1, 1, 14, NOFF_F | MULT_F) + +/* [L3L4CSUM_F] [OL3OL4CSUM_F] [NOFF] [MULTI_SEG] */ +#define OCCTX_TX_FASTPATH_MODES \ +T(no_offload, 0, 0, 0, 0, 4, \ + OCCTX_TX_OFFLOAD_NONE) \ +T(mseg, 0, 0, 0, 1, 14, \ + MULT_F) \ +T(l3l4csum, 0, 0, 1, 0, 4, \ + L3L4CSUM_F) \ +T(l3l4csum_mseg, 0, 0, 1, 1, 14, \ + L3L4CSUM_F | MULT_F) \ +T(ol3ol4csum, 0, 1, 0, 0, 4, \ + OL3OL4CSUM_F) \ +T(ol3l4csum_mseg, 0, 1, 0, 1, 14, \ + OL3OL4CSUM_F | MULT_F) \ +T(ol3l4csum_l3l4csum, 0, 1, 1, 0, 4, \ + OL3OL4CSUM_F | L3L4CSUM_F) \ +T(ol3l4csum_l3l4csum_mseg, 0, 1, 1, 1, 14, \ + OL3OL4CSUM_F | L3L4CSUM_F | MULT_F) \ +T(noff, 1, 0, 0, 0, 4, \ + NOFF_F) \ +T(noff_mseg, 1, 0, 0, 1, 14, \ + NOFF_F | MULT_F) \ +T(noff_l3l4csum, 1, 0, 1, 0, 4, \ + NOFF_F | L3L4CSUM_F) \ +T(noff_l3l4csum_mseg, 1, 0, 1, 1, 14, \ + NOFF_F | L3L4CSUM_F | MULT_F) \ +T(noff_ol3ol4csum, 1, 1, 0, 0, 4, \ + NOFF_F | OL3OL4CSUM_F) \ +T(noff_ol3ol4csum_mseg, 1, 1, 0, 1, 14, \ + NOFF_F | OL3OL4CSUM_F | MULT_F) \ +T(noff_ol3ol4csum_l3l4csum, 1, 1, 1, 0, 4, \ + NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \ +T(noff_ol3ol4csum_l3l4csum_mseg, 1, 1, 1, 1, 14, \ + NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F | \ + MULT_F) #endif /* __OCTEONTX_RXTX_H__ */ -- 2.20.1