X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fcnxk%2Fcn10k_rx.h;h=5ecb20f038293676b66d9309ab546e19c59759ef;hb=6baa15684c5a2ea3f4d7a6f4cfc7f30d86a51fea;hp=abf280102be33b2e8f21f96e5f450ad30c65635d;hpb=68c48ab31875299e17c963db1b3a56c3ca4cd3c6;p=dpdk.git diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index abf280102b..5ecb20f038 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -23,6 +23,7 @@ * Defining it from backwards to denote its been * not used as offload flags to pick function */ +#define NIX_RX_REAS_F BIT(12) #define NIX_RX_VWQE_F BIT(13) #define NIX_RX_MULTI_SEG_F BIT(14) #define CPT_RX_WQE_F BIT(15) @@ -36,6 +37,50 @@ (((f) & NIX_RX_VWQE_F) ? \ (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \ (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o))) +#define CQE_PTR_DIFF(b, i, o, f) \ + (((f) & NIX_RX_VWQE_F) ? \ + (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) - (o)) : \ + (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) - (o))) + +#define NIX_RX_SEC_UCC_CONST \ + ((RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1) << 8 | \ + ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1) \ + << 24 | \ + ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1) \ + << 32 | \ + ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1) \ + << 40 | \ + ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1) \ + << 48 | \ + (RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1) << 56) + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +static inline void +nix_mbuf_validate_next(struct rte_mbuf *m) +{ + if (m->nb_segs == 1 && m->next) { + rte_panic("mbuf->next[%p] valid when mbuf->nb_segs is %d", + m->next, m->nb_segs); + } +} +#else +static inline void +nix_mbuf_validate_next(struct rte_mbuf *m) +{ + RTE_SET_USED(m); +} +#endif + +#define NIX_RX_SEC_REASSEMBLY_F \ + (NIX_RX_REAS_F | NIX_RX_OFFLOAD_SECURITY_F) + +static inline rte_eth_ip_reassembly_dynfield_t * +cnxk_ip_reassembly_dynfield(struct rte_mbuf *mbuf, + int ip_reassembly_dynfield_offset) +{ + return RTE_MBUF_DYNFIELD(mbuf, ip_reassembly_dynfield_offset, + rte_eth_ip_reassembly_dynfield_t *); +} union mbuf_initializer { struct { @@ -104,22 +149,299 @@ nix_sec_flush_meta(uintptr_t laddr, uint16_t lmt_id, uint8_t loff, roc_lmt_submit_steorl(lmt_id, pa); } +static struct rte_mbuf * +nix_sec_attach_frags(const struct cpt_parse_hdr_s *hdr, + struct cn10k_inb_priv_data *inb_priv, + const uint64_t mbuf_init) +{ + struct rte_mbuf *head, *mbuf, *mbuf_prev; + uint32_t offset = hdr->w2.fi_offset; + union nix_rx_parse_u *frag_rx; + struct cpt_frag_info_s *finfo; + uint64_t *frag_ptr = NULL; + uint64_t ol_flags; + uint16_t frag_size; + uint16_t rlen; + uint64_t *wqe; + int off; + + off = inb_priv->reass_dynfield_off; + ol_flags = BIT_ULL(inb_priv->reass_dynflag_bit); + ol_flags |= RTE_MBUF_F_RX_SEC_OFFLOAD; + + /* offset of 0 implies 256B, otherwise it implies offset*8B */ + offset = (((offset - 1) & 0x1f) + 1) * 8; + finfo = RTE_PTR_ADD(hdr, offset + hdr->w2.fi_pad); + + /* Frag-0: */ + wqe = (uint64_t *)(rte_be_to_cpu_64(hdr->wqe_ptr)); + rlen = ((*(wqe + 10)) >> 16) & 0xFFFF; + + frag_rx = (union nix_rx_parse_u *)(wqe + 1); + frag_size = rlen + frag_rx->lcptr - frag_rx->laptr; + frag_rx->pkt_lenm1 = frag_size - 1; + + mbuf = (struct rte_mbuf *)((uintptr_t)wqe - sizeof(struct rte_mbuf)); + *(uint64_t *)(&mbuf->rearm_data) = mbuf_init; + mbuf->data_len = frag_size; + mbuf->pkt_len = frag_size; + mbuf->ol_flags = ol_flags; + mbuf->next = NULL; + head = mbuf; + mbuf_prev = mbuf; + /* Update dynamic field with userdata */ + *rte_security_dynfield(mbuf) = (uint64_t)inb_priv->userdata; + + cnxk_ip_reassembly_dynfield(head, off)->nb_frags = hdr->w0.num_frags - 1; + cnxk_ip_reassembly_dynfield(head, off)->next_frag = NULL; + + /* Frag-1: */ + if (hdr->w0.num_frags > 1) { + wqe = (uint64_t *)(rte_be_to_cpu_64(hdr->frag1_wqe_ptr)); + rlen = ((*(wqe + 10)) >> 16) & 0xFFFF; + + frag_rx = (union nix_rx_parse_u *)(wqe + 1); + frag_size = rlen + frag_rx->lcptr - frag_rx->laptr; + frag_rx->pkt_lenm1 = frag_size - 1; + + mbuf = (struct rte_mbuf *)((uintptr_t)wqe - + sizeof(struct rte_mbuf)); + *(uint64_t *)(&mbuf->rearm_data) = mbuf_init; + mbuf->data_len = frag_size; + mbuf->pkt_len = frag_size; + mbuf->ol_flags = ol_flags; + mbuf->next = NULL; + + /* Update dynamic field with userdata */ + *rte_security_dynfield(mbuf) = (uint64_t)inb_priv->userdata; + + cnxk_ip_reassembly_dynfield(mbuf, off)->nb_frags = + hdr->w0.num_frags - 2; + cnxk_ip_reassembly_dynfield(mbuf, off)->next_frag = NULL; + cnxk_ip_reassembly_dynfield(mbuf_prev, off)->next_frag = mbuf; + mbuf_prev = mbuf; + } + + /* Frag-2: */ + if (hdr->w0.num_frags > 2) { + frag_ptr = (uint64_t *)(finfo + 1); + wqe = (uint64_t *)(rte_be_to_cpu_64(*frag_ptr)); + rlen = ((*(wqe + 10)) >> 16) & 0xFFFF; + + frag_rx = (union nix_rx_parse_u *)(wqe + 1); + frag_size = rlen + frag_rx->lcptr - frag_rx->laptr; + frag_rx->pkt_lenm1 = frag_size - 1; + + mbuf = (struct rte_mbuf *)((uintptr_t)wqe - + sizeof(struct rte_mbuf)); + *(uint64_t *)(&mbuf->rearm_data) = mbuf_init; + mbuf->data_len = frag_size; + mbuf->pkt_len = frag_size; + mbuf->ol_flags = ol_flags; + mbuf->next = NULL; + + /* Update dynamic field with userdata */ + *rte_security_dynfield(mbuf) = (uint64_t)inb_priv->userdata; + + cnxk_ip_reassembly_dynfield(mbuf, off)->nb_frags = + hdr->w0.num_frags - 3; + cnxk_ip_reassembly_dynfield(mbuf, off)->next_frag = NULL; + cnxk_ip_reassembly_dynfield(mbuf_prev, off)->next_frag = mbuf; + mbuf_prev = mbuf; + } + + /* Frag-3: */ + if (hdr->w0.num_frags > 3) { + wqe = (uint64_t *)(rte_be_to_cpu_64(*(frag_ptr + 1))); + rlen = ((*(wqe + 10)) >> 16) & 0xFFFF; + + frag_rx = (union nix_rx_parse_u *)(wqe + 1); + frag_size = rlen + frag_rx->lcptr - frag_rx->laptr; + frag_rx->pkt_lenm1 = frag_size - 1; + + mbuf = (struct rte_mbuf *)((uintptr_t)wqe - + sizeof(struct rte_mbuf)); + *(uint64_t *)(&mbuf->rearm_data) = mbuf_init; + mbuf->data_len = frag_size; + mbuf->pkt_len = frag_size; + mbuf->ol_flags = ol_flags; + mbuf->next = NULL; + + /* Update dynamic field with userdata */ + *rte_security_dynfield(mbuf) = (uint64_t)inb_priv->userdata; + + cnxk_ip_reassembly_dynfield(mbuf, off)->nb_frags = + hdr->w0.num_frags - 4; + cnxk_ip_reassembly_dynfield(mbuf, off)->next_frag = NULL; + cnxk_ip_reassembly_dynfield(mbuf_prev, off)->next_frag = mbuf; + } + return head; +} + +static struct rte_mbuf * +nix_sec_reassemble_frags(const struct cpt_parse_hdr_s *hdr, uint64_t cq_w1, + uint64_t cq_w5, uint64_t mbuf_init) +{ + uint32_t fragx_sum, pkt_hdr_len, l3_hdr_size; + uint32_t offset = hdr->w2.fi_offset; + union nix_rx_parse_u *inner_rx; + uint16_t rlen, data_off, b_off; + union nix_rx_parse_u *frag_rx; + struct cpt_frag_info_s *finfo; + struct rte_mbuf *head, *mbuf; + uint64_t *frag_ptr = NULL; + rte_iova_t *inner_iova; + uint16_t frag_size; + uint64_t *wqe; + + /* Base data offset */ + b_off = mbuf_init & 0xFFFFUL; + mbuf_init &= ~0xFFFFUL; + + /* offset of 0 implies 256B, otherwise it implies offset*8B */ + offset = (((offset - 1) & 0x1f) + 1) * 8; + finfo = RTE_PTR_ADD(hdr, offset + hdr->w2.fi_pad); + + /* Frag-0: */ + wqe = (uint64_t *)rte_be_to_cpu_64(hdr->wqe_ptr); + inner_rx = (union nix_rx_parse_u *)(wqe + 1); + inner_iova = (rte_iova_t *)*(wqe + 9); + + /* Update only the upper 28-bits from meta pkt parse info */ + *((uint64_t *)inner_rx) = ((*((uint64_t *)inner_rx) & ((1ULL << 36) - 1)) | + (cq_w1 & ~((1ULL << 36) - 1))); + + rlen = ((*(wqe + 10)) >> 16) & 0xFFFF; + frag_size = rlen + ((cq_w5 >> 16) & 0xFF) - (cq_w5 & 0xFF); + fragx_sum = rte_be_to_cpu_16(finfo->w1.frag_size0); + pkt_hdr_len = frag_size - fragx_sum; + + mbuf = (struct rte_mbuf *)((uintptr_t)wqe - sizeof(struct rte_mbuf)); + *(uint64_t *)(&mbuf->rearm_data) = mbuf_init | b_off; + mbuf->data_len = frag_size; + head = mbuf; + + if (inner_rx->lctype == NPC_LT_LC_IP) { + struct rte_ipv4_hdr *hdr = (struct rte_ipv4_hdr *) + RTE_PTR_ADD(inner_iova, inner_rx->lcptr); + + l3_hdr_size = (hdr->version_ihl & 0xf) << 2; + } else { + struct rte_ipv6_hdr *hdr = (struct rte_ipv6_hdr *) + RTE_PTR_ADD(inner_iova, inner_rx->lcptr); + size_t ext_len = sizeof(struct rte_ipv6_hdr); + uint8_t *nxt_hdr = (uint8_t *)hdr; + int nh = hdr->proto; + + l3_hdr_size = 0; + while (nh != -EINVAL) { + nxt_hdr += ext_len; + l3_hdr_size += ext_len; + nh = rte_ipv6_get_next_ext(nxt_hdr, nh, &ext_len); + } + } + + /* Frag-1: */ + wqe = (uint64_t *)(rte_be_to_cpu_64(hdr->frag1_wqe_ptr)); + frag_size = rte_be_to_cpu_16(finfo->w1.frag_size1); + frag_rx = (union nix_rx_parse_u *)(wqe + 1); + + mbuf->next = (struct rte_mbuf *)((uintptr_t)wqe - sizeof(struct rte_mbuf)); + mbuf = mbuf->next; + data_off = b_off + frag_rx->lcptr + l3_hdr_size; + *(uint64_t *)(&mbuf->rearm_data) = mbuf_init | data_off; + mbuf->data_len = frag_size; + fragx_sum += frag_size; + + /* Mark frag as get */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1); + + /* Frag-2: */ + if (hdr->w0.num_frags > 2) { + frag_ptr = (uint64_t *)(finfo + 1); + wqe = (uint64_t *)(rte_be_to_cpu_64(*frag_ptr)); + frag_size = rte_be_to_cpu_16(finfo->w1.frag_size2); + frag_rx = (union nix_rx_parse_u *)(wqe + 1); + + mbuf->next = (struct rte_mbuf *)((uintptr_t)wqe - sizeof(struct rte_mbuf)); + mbuf = mbuf->next; + data_off = b_off + frag_rx->lcptr + l3_hdr_size; + *(uint64_t *)(&mbuf->rearm_data) = mbuf_init | data_off; + mbuf->data_len = frag_size; + fragx_sum += frag_size; + + /* Mark frag as get */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1); + } + + /* Frag-3: */ + if (hdr->w0.num_frags > 3) { + wqe = (uint64_t *)(rte_be_to_cpu_64(*(frag_ptr + 1))); + frag_size = rte_be_to_cpu_16(finfo->w1.frag_size3); + frag_rx = (union nix_rx_parse_u *)(wqe + 1); + + mbuf->next = (struct rte_mbuf *)((uintptr_t)wqe - sizeof(struct rte_mbuf)); + mbuf = mbuf->next; + data_off = b_off + frag_rx->lcptr + l3_hdr_size; + *(uint64_t *)(&mbuf->rearm_data) = mbuf_init | data_off; + mbuf->data_len = frag_size; + fragx_sum += frag_size; + + /* Mark frag as get */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1); + } + + if (inner_rx->lctype == NPC_LT_LC_IP) { + struct rte_ipv4_hdr *hdr = (struct rte_ipv4_hdr *) + RTE_PTR_ADD(inner_iova, inner_rx->lcptr); + + hdr->fragment_offset = 0; + hdr->total_length = rte_cpu_to_be_16(fragx_sum + l3_hdr_size); + hdr->hdr_checksum = 0; + hdr->hdr_checksum = rte_ipv4_cksum(hdr); + + inner_rx->pkt_lenm1 = pkt_hdr_len + fragx_sum - 1; + } else { + /* Remove the frag header by moving header 8 bytes forward */ + struct rte_ipv6_hdr *hdr = (struct rte_ipv6_hdr *) + RTE_PTR_ADD(inner_iova, inner_rx->lcptr); + + hdr->payload_len = rte_cpu_to_be_16(fragx_sum + l3_hdr_size - + 8 - sizeof(struct rte_ipv6_hdr)); + + rte_memcpy(rte_pktmbuf_mtod_offset(head, void *, 8), + rte_pktmbuf_mtod(head, void *), + inner_rx->lcptr + sizeof(struct rte_ipv6_hdr)); + + inner_rx->pkt_lenm1 = pkt_hdr_len + fragx_sum - 8 - 1; + head->data_len -= 8; + head->data_off += 8; + } + mbuf->next = NULL; + head->pkt_len = inner_rx->pkt_lenm1 + 1; + head->nb_segs = hdr->w0.num_frags; + + return head; +} + static __rte_always_inline struct rte_mbuf * -nix_sec_meta_to_mbuf_sc(uint64_t cq_w1, const uint64_t sa_base, uintptr_t laddr, - uint8_t *loff, struct rte_mbuf *mbuf, uint16_t data_off) +nix_sec_meta_to_mbuf_sc(uint64_t cq_w1, uint64_t cq_w5, const uint64_t sa_base, + uintptr_t laddr, uint8_t *loff, struct rte_mbuf *mbuf, + uint16_t data_off, const uint16_t flags, + const uint64_t mbuf_init) { const void *__p = (void *)((uintptr_t)mbuf + (uint16_t)data_off); const struct cpt_parse_hdr_s *hdr = (const struct cpt_parse_hdr_s *)__p; struct cn10k_inb_priv_data *inb_priv; - struct rte_mbuf *inner; + struct rte_mbuf *inner = NULL; uint32_t sa_idx; + uint16_t ucc; + uint32_t len; + uintptr_t ip; void *inb_sa; uint64_t w0; - if (cq_w1 & BIT(11)) { - inner = (struct rte_mbuf *)(rte_be_to_cpu_64(hdr->wqe_ptr) - - sizeof(struct rte_mbuf)); - + if ((flags & NIX_RX_REAS_F) && (cq_w1 & BIT(11))) { /* Get SPI from CPT_PARSE_S's cookie(already swapped) */ w0 = hdr->w0.u64; sa_idx = w0 >> 32; @@ -127,12 +449,56 @@ nix_sec_meta_to_mbuf_sc(uint64_t cq_w1, const uint64_t sa_base, uintptr_t laddr, inb_sa = roc_nix_inl_ot_ipsec_inb_sa(sa_base, sa_idx); inb_priv = roc_nix_inl_ot_ipsec_inb_sa_sw_rsvd(inb_sa); - /* Update dynamic field with userdata */ - *rte_security_dynfield(inner) = (uint64_t)inb_priv->userdata; + if (!hdr->w0.num_frags) { + /* No Reassembly or inbound error */ + inner = (struct rte_mbuf *) + (rte_be_to_cpu_64(hdr->wqe_ptr) - + sizeof(struct rte_mbuf)); + + /* Update dynamic field with userdata */ + *rte_security_dynfield(inner) = + (uint64_t)inb_priv->userdata; + + /* Get ucc from cpt parse header */ + ucc = hdr->w3.hw_ccode; - /* Update l2 hdr length first */ - inner->pkt_len = (hdr->w2.il3_off - - sizeof(struct cpt_parse_hdr_s) - (w0 & 0x7)); + /* Calculate inner packet length as + * IP total len + l2 len + */ + ip = (uintptr_t)hdr + ((cq_w5 >> 16) & 0xFF); + ip += ((cq_w1 >> 40) & 0x6); + len = rte_be_to_cpu_16(*(uint16_t *)ip); + len += ((cq_w5 >> 16) & 0xFF) - (cq_w5 & 0xFF); + len += (cq_w1 & BIT(42)) ? 40 : 0; + + inner->pkt_len = len; + inner->data_len = len; + *(uint64_t *)(&inner->rearm_data) = mbuf_init; + + inner->ol_flags = ((ucc == CPT_COMP_WARN) ? + RTE_MBUF_F_RX_SEC_OFFLOAD : + (RTE_MBUF_F_RX_SEC_OFFLOAD | + RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED)); + + ucc = hdr->w3.uc_ccode; + inner->ol_flags |= ((ucc & 0xF0) == 0xF0) ? + ((NIX_RX_SEC_UCC_CONST >> ((ucc & 0xF) << 3)) + & 0xFF) << 1 : 0; + } else if (!(hdr->w0.err_sum) && !(hdr->w0.reas_sts)) { + /* Reassembly success */ + inner = nix_sec_reassemble_frags(hdr, cq_w1, cq_w5, + mbuf_init); + + /* Update dynamic field with userdata */ + *rte_security_dynfield(inner) = + (uint64_t)inb_priv->userdata; + + /* Assume success */ + inner->ol_flags = RTE_MBUF_F_RX_SEC_OFFLOAD; + } else { + /* Reassembly failure */ + inner = nix_sec_attach_frags(hdr, inb_priv, mbuf_init); + } /* Store meta in lmtline to free * Assume all meta's from same aura. @@ -147,30 +513,10 @@ nix_sec_meta_to_mbuf_sc(uint64_t cq_w1, const uint64_t sa_base, uintptr_t laddr, RTE_MEMPOOL_CHECK_COOKIES(inner->pool, (void **)&inner, 1, 1); return inner; - } - return mbuf; -} - -#if defined(RTE_ARCH_ARM64) - -static __rte_always_inline struct rte_mbuf * -nix_sec_meta_to_mbuf(uint64_t cq_w1, uintptr_t sa_base, uintptr_t laddr, - uint8_t *loff, struct rte_mbuf *mbuf, uint16_t data_off, - uint8x16_t *rx_desc_field1, uint64_t *ol_flags) -{ - const void *__p = (void *)((uintptr_t)mbuf + (uint16_t)data_off); - const struct cpt_parse_hdr_s *hdr = (const struct cpt_parse_hdr_s *)__p; - struct cn10k_inb_priv_data *inb_priv; - struct rte_mbuf *inner; - uint64_t *sg, res_w1; - uint32_t sa_idx; - void *inb_sa; - uint16_t len; - uint64_t w0; - - if (cq_w1 & BIT(11)) { + } else if (cq_w1 & BIT(11)) { inner = (struct rte_mbuf *)(rte_be_to_cpu_64(hdr->wqe_ptr) - sizeof(struct rte_mbuf)); + /* Get SPI from CPT_PARSE_S's cookie(already swapped) */ w0 = hdr->w0.u64; sa_idx = w0 >> 32; @@ -181,23 +527,29 @@ nix_sec_meta_to_mbuf(uint64_t cq_w1, uintptr_t sa_base, uintptr_t laddr, /* Update dynamic field with userdata */ *rte_security_dynfield(inner) = (uint64_t)inb_priv->userdata; - /* CPT result(struct cpt_cn10k_res_s) is at - * after first IOVA in meta - */ - sg = (uint64_t *)(inner + 1); - res_w1 = sg[10]; - - /* Clear checksum flags and update security flag */ - *ol_flags &= ~(RTE_MBUF_F_RX_L4_CKSUM_MASK | RTE_MBUF_F_RX_IP_CKSUM_MASK); - *ol_flags |= (((res_w1 & 0xFF) == CPT_COMP_WARN) ? - RTE_MBUF_F_RX_SEC_OFFLOAD : - (RTE_MBUF_F_RX_SEC_OFFLOAD | RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED)); - /* Calculate inner packet length */ - len = ((res_w1 >> 16) & 0xFFFF) + hdr->w2.il3_off - - sizeof(struct cpt_parse_hdr_s) - (w0 & 0x7); - /* Update pkt_len and data_len */ - *rx_desc_field1 = vsetq_lane_u16(len, *rx_desc_field1, 2); - *rx_desc_field1 = vsetq_lane_u16(len, *rx_desc_field1, 4); + /* Get ucc from cpt parse header */ + ucc = hdr->w3.hw_ccode; + + /* Calculate inner packet length as IP total len + l2 len */ + ip = (uintptr_t)hdr + ((cq_w5 >> 16) & 0xFF); + ip += ((cq_w1 >> 40) & 0x6); + len = rte_be_to_cpu_16(*(uint16_t *)ip); + len += ((cq_w5 >> 16) & 0xFF) - (cq_w5 & 0xFF); + len += (cq_w1 & BIT(42)) ? 40 : 0; + + inner->pkt_len = len; + inner->data_len = len; + *(uint64_t *)(&inner->rearm_data) = mbuf_init; + + inner->ol_flags = ((ucc == CPT_COMP_WARN) ? + RTE_MBUF_F_RX_SEC_OFFLOAD : + (RTE_MBUF_F_RX_SEC_OFFLOAD | + RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED)); + + ucc = hdr->w3.uc_ccode; + inner->ol_flags |= ((ucc & 0xF0) == 0xF0) ? + ((NIX_RX_SEC_UCC_CONST >> ((ucc & 0xF) << 3)) + & 0xFF) << 1 : 0; /* Store meta in lmtline to free * Assume all meta's from same aura. @@ -211,13 +563,68 @@ nix_sec_meta_to_mbuf(uint64_t cq_w1, uintptr_t sa_base, uintptr_t laddr, /* Mark inner mbuf as get */ RTE_MEMPOOL_CHECK_COOKIES(inner->pool, (void **)&inner, 1, 1); - /* Return inner mbuf */ return inner; } - /* Return same mbuf as it is not a decrypted pkt */ return mbuf; } + +#if defined(RTE_ARCH_ARM64) + +static __rte_always_inline void +nix_sec_meta_to_mbuf(uint64_t cq_w1, uint64_t cq_w5, uintptr_t inb_sa, + uintptr_t cpth, struct rte_mbuf *inner, + uint8x16_t *rx_desc_field1, uint64_t *ol_flags, + const uint16_t flags, uint64x2_t *rearm) +{ + const struct cpt_parse_hdr_s *hdr = + (const struct cpt_parse_hdr_s *)cpth; + uint64_t mbuf_init = vgetq_lane_u64(*rearm, 0); + struct cn10k_inb_priv_data *inb_priv; + + /* Clear checksum flags */ + *ol_flags &= ~(RTE_MBUF_F_RX_L4_CKSUM_MASK | + RTE_MBUF_F_RX_IP_CKSUM_MASK); + + /* Get SPI from CPT_PARSE_S's cookie(already swapped) */ + inb_priv = roc_nix_inl_ot_ipsec_inb_sa_sw_rsvd((void *)inb_sa); + + /* Update dynamic field with userdata */ + *rte_security_dynfield(inner) = (uint64_t)inb_priv->userdata; + + /* Mark inner mbuf as get */ + RTE_MEMPOOL_CHECK_COOKIES(inner->pool, (void **)&inner, 1, 1); + + if (flags & NIX_RX_REAS_F && hdr->w0.num_frags) { + if (!(hdr->w0.err_sum) && !(hdr->w0.reas_sts)) { + /* Reassembly success */ + nix_sec_reassemble_frags(hdr, cq_w1, cq_w5, mbuf_init); + + /* Assume success */ + *ol_flags |= RTE_MBUF_F_RX_SEC_OFFLOAD; + + /* Update pkt_len and data_len */ + *rx_desc_field1 = vsetq_lane_u16(inner->pkt_len, + *rx_desc_field1, 2); + *rx_desc_field1 = vsetq_lane_u16(inner->data_len, + *rx_desc_field1, 4); + + /* Data offset might be updated */ + mbuf_init = *(uint64_t *)(&inner->rearm_data); + *rearm = vsetq_lane_u64(mbuf_init, *rearm, 0); + } else { + /* Reassembly failure */ + nix_sec_attach_frags(hdr, inb_priv, mbuf_init); + *ol_flags |= inner->ol_flags; + + /* Update pkt_len and data_len */ + *rx_desc_field1 = vsetq_lane_u16(inner->pkt_len, + *rx_desc_field1, 2); + *rx_desc_field1 = vsetq_lane_u16(inner->data_len, + *rx_desc_field1, 4); + } + } +} #endif static __rte_always_inline uint32_t @@ -283,7 +690,7 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, sg = *(const uint64_t *)(rx + 1); nb_segs = (sg >> 48) & 0x3; - if (nb_segs == 1) { + if (nb_segs == 1 && !(flags & NIX_RX_SEC_REASSEMBLY_F)) { mbuf->next = NULL; return; } @@ -346,30 +753,10 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, ol_flags |= RTE_MBUF_F_RX_RSS_HASH; } - /* Process Security packets */ - if (flag & NIX_RX_OFFLOAD_SECURITY_F) { - if (w1 & BIT(11)) { - /* CPT result(struct cpt_cn10k_res_s) is at - * after first IOVA in meta - */ - const uint64_t *sg = (const uint64_t *)(mbuf + 1); - const uint64_t res_w1 = sg[10]; - const uint16_t uc_cc = res_w1 & 0xFF; - - /* Rlen */ - len = ((res_w1 >> 16) & 0xFFFF) + mbuf->pkt_len; - ol_flags |= ((uc_cc == CPT_COMP_WARN) ? - RTE_MBUF_F_RX_SEC_OFFLOAD : - (RTE_MBUF_F_RX_SEC_OFFLOAD | - RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED)); - } else { - if (flag & NIX_RX_OFFLOAD_CHECKSUM_F) - ol_flags |= nix_rx_olflags_get(lookup_mem, w1); - } - } else { - if (flag & NIX_RX_OFFLOAD_CHECKSUM_F) - ol_flags |= nix_rx_olflags_get(lookup_mem, w1); - } + /* Skip rx ol flags extraction for Security packets */ + if ((!(flag & NIX_RX_SEC_REASSEMBLY_F) || !(w1 & BIT(11))) && + flag & NIX_RX_OFFLOAD_CHECKSUM_F) + ol_flags |= nix_rx_olflags_get(lookup_mem, w1); if (flag & NIX_RX_OFFLOAD_VLAN_STRIP_F) { if (rx->vtag0_gone) { @@ -385,10 +772,15 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, if (flag & NIX_RX_OFFLOAD_MARK_UPDATE_F) ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf); - mbuf->ol_flags = ol_flags; - mbuf->pkt_len = len; - mbuf->data_len = len; - *(uint64_t *)(&mbuf->rearm_data) = val; + /* Packet data length and ol flags is already updated for sec */ + if (flag & NIX_RX_SEC_REASSEMBLY_F && w1 & BIT_ULL(11)) { + mbuf->ol_flags |= ol_flags; + } else { + mbuf->ol_flags = ol_flags; + mbuf->pkt_len = len; + mbuf->data_len = len; + *(uint64_t *)(&mbuf->rearm_data) = val; + } if (flag & NIX_RX_MULTI_SEG_F) /* @@ -398,8 +790,6 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, * Hence, flag argument is not required. */ nix_cqe_xtract_mseg(rx, mbuf, val, 0); - else - mbuf->next = NULL; } static inline uint16_t @@ -509,9 +899,11 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, /* Translate meta to mbuf */ if (flags & NIX_RX_OFFLOAD_SECURITY_F) { const uint64_t cq_w1 = *((const uint64_t *)cq + 1); + const uint64_t cq_w5 = *((const uint64_t *)cq + 5); - mbuf = nix_sec_meta_to_mbuf_sc(cq_w1, sa_base, laddr, - &loff, mbuf, data_off); + mbuf = nix_sec_meta_to_mbuf_sc(cq_w1, cq_w5, sa_base, laddr, + &loff, mbuf, data_off, + flags, mbuf_init); } cn10k_nix_cqe_to_mbuf(cq, cq->tag, mbuf, lookup_mem, mbuf_init, @@ -547,10 +939,11 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, plt_write64((wdata | nb_pkts), rxq->cq_door); /* Free remaining meta buffers if any */ - if (flags & NIX_RX_OFFLOAD_SECURITY_F && loff) { + if (flags & NIX_RX_OFFLOAD_SECURITY_F && loff) nix_sec_flush_meta(laddr, lmt_id + lnum, loff, aura_handle); - plt_io_wmb(); - } + + if (flags & NIX_RX_OFFLOAD_SECURITY_F) + rte_io_wmb(); return nb_pkts; } @@ -579,6 +972,14 @@ nix_qinq_update(const uint64_t w2, uint64_t ol_flags, struct rte_mbuf *mbuf) return ol_flags; } +#define NIX_PUSH_META_TO_FREE(_mbuf, _laddr, _loff_p) \ + do { \ + *(uint64_t *)((_laddr) + (*(_loff_p) << 3)) = (uint64_t)_mbuf; \ + *(_loff_p) = *(_loff_p) + 1; \ + /* Mark meta mbuf as put */ \ + RTE_MEMPOOL_CHECK_COOKIES(_mbuf->pool, (void **)&_mbuf, 1, 0); \ + } while (0) + static __rte_always_inline uint16_t cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, const uint16_t flags, void *lookup_mem, @@ -590,8 +991,8 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, *(uint64_t *)args : rxq->mbuf_initializer; const uint64x2_t data_off = flags & NIX_RX_VWQE_F ? - vdupq_n_u64(0x80ULL) : - vdupq_n_u64(rxq->data_off); + vdupq_n_u64(RTE_PKTMBUF_HEADROOM) : + vdupq_n_u64(rxq->data_off); const uint32_t qmask = flags & NIX_RX_VWQE_F ? 0 : rxq->qmask; const uint64_t wdata = flags & NIX_RX_VWQE_F ? 0 : rxq->wdata; const uintptr_t desc = flags & NIX_RX_VWQE_F ? 0 : rxq->desc; @@ -622,6 +1023,12 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP); if (flags & NIX_RX_OFFLOAD_TSTAMP_F) tstamp = rxq->tstamp; + + cq0 = desc + CQE_SZ(head); + rte_prefetch0(CQE_PTR_OFF(cq0, 0, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 1, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 2, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 3, 64, flags)); } else { RTE_SET_USED(head); } @@ -674,19 +1081,97 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, cq0 = (uintptr_t)&mbufs[packets]; } - /* Prefetch N desc ahead */ - rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 4, 64, flags)); - rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 5, 64, flags)); - rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 6, 64, flags)); - rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 7, 64, flags)); + if (flags & NIX_RX_VWQE_F) { + if (pkts - packets > 4) { + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, + 4, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, + 5, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, + 6, 0, flags)); + rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, + 7, 0, flags)); + + if (likely(pkts - packets > 8)) { + rte_prefetch1(CQE_PTR_OFF(cq0, + 8, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, + 9, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, + 10, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, + 11, 0, flags)); + if (pkts - packets > 12) { + rte_prefetch1(CQE_PTR_OFF(cq0, + 12, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, + 13, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, + 14, 0, flags)); + rte_prefetch1(CQE_PTR_OFF(cq0, + 15, 0, flags)); + } + } + + rte_prefetch0(CQE_PTR_DIFF(cq0, + 4, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0(CQE_PTR_DIFF(cq0, + 5, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0(CQE_PTR_DIFF(cq0, + 6, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0(CQE_PTR_DIFF(cq0, + 7, RTE_PKTMBUF_HEADROOM, flags)); + + if (likely(pkts - packets > 8)) { + rte_prefetch0(CQE_PTR_DIFF(cq0, + 8, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0(CQE_PTR_DIFF(cq0, + 9, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0(CQE_PTR_DIFF(cq0, + 10, RTE_PKTMBUF_HEADROOM, flags)); + rte_prefetch0(CQE_PTR_DIFF(cq0, + 11, RTE_PKTMBUF_HEADROOM, flags)); + } + } + } else { + if (flags & NIX_RX_OFFLOAD_SECURITY_F && + pkts - packets > 4) { + /* Fetch cpt parse header */ + void *p0 = + (void *)*CQE_PTR_OFF(cq0, 4, 72, flags); + void *p1 = + (void *)*CQE_PTR_OFF(cq0, 5, 72, flags); + void *p2 = + (void *)*CQE_PTR_OFF(cq0, 6, 72, flags); + void *p3 = + (void *)*CQE_PTR_OFF(cq0, 7, 72, flags); + rte_prefetch0(p0); + rte_prefetch0(p1); + rte_prefetch0(p2); + rte_prefetch0(p3); + } - /* Get NIX_RX_SG_S for size and buffer pointer */ - cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags)); - cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags)); - cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags)); - cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags)); + if (pkts - packets > 8) { + if (flags) { + rte_prefetch0(CQE_PTR_OFF(cq0, 8, 0, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 9, 0, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 10, 0, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 11, 0, flags)); + } + rte_prefetch0(CQE_PTR_OFF(cq0, 8, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 9, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 10, 64, flags)); + rte_prefetch0(CQE_PTR_OFF(cq0, 11, 64, flags)); + } + } if (!(flags & NIX_RX_VWQE_F)) { + /* Get NIX_RX_SG_S for size and buffer pointer */ + cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags)); + cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags)); + cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags)); + cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags)); + /* Extract mbuf from NIX_RX_SG_S */ mbuf01 = vzip2q_u64(cq0_w8, cq1_w8); mbuf23 = vzip2q_u64(cq2_w8, cq3_w8); @@ -705,38 +1190,62 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, mbuf2 = (struct rte_mbuf *)vgetq_lane_u64(mbuf23, 0); mbuf3 = (struct rte_mbuf *)vgetq_lane_u64(mbuf23, 1); - /* Mask to get packet len from NIX_RX_SG_S */ - const uint8x16_t shuf_msk = { - 0xFF, 0xFF, /* pkt_type set as unknown */ - 0xFF, 0xFF, /* pkt_type set as unknown */ - 0, 1, /* octet 1~0, low 16 bits pkt_len */ - 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ - 0, 1, /* octet 1~0, 16 bits data_len */ - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; - - /* Form the rx_descriptor_fields1 with pkt_len and data_len */ - f0 = vqtbl1q_u8(cq0_w8, shuf_msk); - f1 = vqtbl1q_u8(cq1_w8, shuf_msk); - f2 = vqtbl1q_u8(cq2_w8, shuf_msk); - f3 = vqtbl1q_u8(cq3_w8, shuf_msk); - - if (flags & NIX_RX_OFFLOAD_SECURITY_F) { - /* Prefetch probable CPT parse header area */ - rte_prefetch_non_temporal(RTE_PTR_ADD(mbuf0, d_off)); - rte_prefetch_non_temporal(RTE_PTR_ADD(mbuf1, d_off)); - rte_prefetch_non_temporal(RTE_PTR_ADD(mbuf2, d_off)); - rte_prefetch_non_temporal(RTE_PTR_ADD(mbuf3, d_off)); + if (!(flags & NIX_RX_VWQE_F)) { + /* Mask to get packet len from NIX_RX_SG_S */ + const uint8x16_t shuf_msk = { + 0xFF, 0xFF, /* pkt_type set as unknown */ + 0xFF, 0xFF, /* pkt_type set as unknown */ + 0, 1, /* octet 1~0, low 16 bits pkt_len */ + 0xFF, 0xFF, /* skip high 16it pkt_len, zero out */ + 0, 1, /* octet 1~0, 16 bits data_len */ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + + /* Form the rx_descriptor_fields1 with pkt_len and data_len */ + f0 = vqtbl1q_u8(cq0_w8, shuf_msk); + f1 = vqtbl1q_u8(cq1_w8, shuf_msk); + f2 = vqtbl1q_u8(cq2_w8, shuf_msk); + f3 = vqtbl1q_u8(cq3_w8, shuf_msk); } /* Load CQE word0 and word 1 */ const uint64_t cq0_w0 = *CQE_PTR_OFF(cq0, 0, 0, flags); const uint64_t cq0_w1 = *CQE_PTR_OFF(cq0, 0, 8, flags); + const uint64_t cq0_w2 = *CQE_PTR_OFF(cq0, 0, 16, flags); const uint64_t cq1_w0 = *CQE_PTR_OFF(cq0, 1, 0, flags); const uint64_t cq1_w1 = *CQE_PTR_OFF(cq0, 1, 8, flags); + const uint64_t cq1_w2 = *CQE_PTR_OFF(cq0, 1, 16, flags); const uint64_t cq2_w0 = *CQE_PTR_OFF(cq0, 2, 0, flags); const uint64_t cq2_w1 = *CQE_PTR_OFF(cq0, 2, 8, flags); + const uint64_t cq2_w2 = *CQE_PTR_OFF(cq0, 2, 16, flags); const uint64_t cq3_w0 = *CQE_PTR_OFF(cq0, 3, 0, flags); const uint64_t cq3_w1 = *CQE_PTR_OFF(cq0, 3, 8, flags); + const uint64_t cq3_w2 = *CQE_PTR_OFF(cq0, 3, 16, flags); + + if (flags & NIX_RX_VWQE_F) { + uint16_t psize0, psize1, psize2, psize3; + + psize0 = (cq0_w2 & 0xFFFF) + 1; + psize1 = (cq1_w2 & 0xFFFF) + 1; + psize2 = (cq2_w2 & 0xFFFF) + 1; + psize3 = (cq3_w2 & 0xFFFF) + 1; + + f0 = vdupq_n_u64(0); + f1 = vdupq_n_u64(0); + f2 = vdupq_n_u64(0); + f3 = vdupq_n_u64(0); + + f0 = vsetq_lane_u16(psize0, f0, 2); + f0 = vsetq_lane_u16(psize0, f0, 4); + + f1 = vsetq_lane_u16(psize1, f1, 2); + f1 = vsetq_lane_u16(psize1, f1, 4); + + f2 = vsetq_lane_u16(psize2, f2, 2); + f2 = vsetq_lane_u16(psize2, f2, 4); + + f3 = vsetq_lane_u16(psize3, f3, 2); + f3 = vsetq_lane_u16(psize3, f3, 4); + } if (flags & NIX_RX_OFFLOAD_RSS_F) { /* Fill rss in the rx_descriptor_fields1 */ @@ -782,33 +1291,255 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, /* Translate meta to mbuf */ if (flags & NIX_RX_OFFLOAD_SECURITY_F) { + uint64_t cq0_w5 = *CQE_PTR_OFF(cq0, 0, 40, flags); + uint64_t cq1_w5 = *CQE_PTR_OFF(cq0, 1, 40, flags); + uint64_t cq2_w5 = *CQE_PTR_OFF(cq0, 2, 40, flags); + uint64_t cq3_w5 = *CQE_PTR_OFF(cq0, 3, 40, flags); + uintptr_t cpth0 = (uintptr_t)mbuf0 + d_off; + uintptr_t cpth1 = (uintptr_t)mbuf1 + d_off; + uintptr_t cpth2 = (uintptr_t)mbuf2 + d_off; + uintptr_t cpth3 = (uintptr_t)mbuf3 + d_off; + + uint64x2_t inner0, inner1, inner2, inner3; + uint64x2_t wqe01, wqe23, sa01, sa23; + uint16x4_t lens, l2lens, ltypes; + uint8x8_t ucc; + + inner0 = vld1q_u64((const uint64_t *)cpth0); + inner1 = vld1q_u64((const uint64_t *)cpth1); + inner2 = vld1q_u64((const uint64_t *)cpth2); + inner3 = vld1q_u64((const uint64_t *)cpth3); + + /* Extract and reverse wqe pointers */ + wqe01 = vzip2q_u64(inner0, inner1); + wqe23 = vzip2q_u64(inner2, inner3); + wqe01 = vrev64q_u8(wqe01); + wqe23 = vrev64q_u8(wqe23); + /* Adjust wqe pointers to point to mbuf */ + wqe01 = vsubq_u64(wqe01, + vdupq_n_u64(sizeof(struct rte_mbuf))); + wqe23 = vsubq_u64(wqe23, + vdupq_n_u64(sizeof(struct rte_mbuf))); + + /* Extract sa idx from cookie area and add to sa_base */ + sa01 = vzip1q_u64(inner0, inner1); + sa23 = vzip1q_u64(inner2, inner3); + + sa01 = vshrq_n_u64(sa01, 32); + sa23 = vshrq_n_u64(sa23, 32); + sa01 = vshlq_n_u64(sa01, + ROC_NIX_INL_OT_IPSEC_INB_SA_SZ_LOG2); + sa23 = vshlq_n_u64(sa23, + ROC_NIX_INL_OT_IPSEC_INB_SA_SZ_LOG2); + sa01 = vaddq_u64(sa01, vdupq_n_u64(sa_base)); + sa23 = vaddq_u64(sa23, vdupq_n_u64(sa_base)); + + const uint8x16_t tbl = { + /* ROC_IE_OT_UCC_SUCCESS_SA_SOFTEXP_FIRST */ + 0, + /* ROC_IE_OT_UCC_SUCCESS_PKT_IP_BADCSUM */ + RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1, + /* ROC_IE_OT_UCC_SUCCESS_SA_SOFTEXP_AGAIN */ + 0, + /* ROC_IE_OT_UCC_SUCCESS_PKT_L4_GOODCSUM */ + (RTE_MBUF_F_RX_IP_CKSUM_GOOD | + RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1, + /* ROC_IE_OT_UCC_SUCCESS_PKT_L4_BADCSUM */ + (RTE_MBUF_F_RX_IP_CKSUM_GOOD | + RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1, + /* ROC_IE_OT_UCC_SUCCESS_PKT_UDPESP_NZCSUM */ + (RTE_MBUF_F_RX_IP_CKSUM_GOOD | + RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1, + /* ROC_IE_OT_UCC_SUCCESS_PKT_UDP_ZEROCSUM */ + (RTE_MBUF_F_RX_IP_CKSUM_GOOD | + RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1, + /* ROC_IE_OT_UCC_SUCCESS_PKT_IP_GOODCSUM */ + RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1, + /* HW_CCODE -> RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED */ + 1, 0, 1, 1, 1, 1, 0, 1, + }; + + const int8x8_t err_off = { + /* UCC of significance starts from 0xF0 */ + 0xF0, + /* Move HW_CCODE from 0:6 -> 8:14 */ + -8, + 0xF0, + -8, + 0xF0, + -8, + 0xF0, + -8, + }; + + ucc = vdup_n_u8(0); + ucc = vset_lane_u16(*(uint16_t *)(cpth0 + 30), ucc, 0); + ucc = vset_lane_u16(*(uint16_t *)(cpth1 + 30), ucc, 1); + ucc = vset_lane_u16(*(uint16_t *)(cpth2 + 30), ucc, 2); + ucc = vset_lane_u16(*(uint16_t *)(cpth3 + 30), ucc, 3); + ucc = vsub_s8(ucc, err_off); + ucc = vqtbl1_u8(tbl, ucc); + + RTE_BUILD_BUG_ON(NPC_LT_LC_IP != 2); + RTE_BUILD_BUG_ON(NPC_LT_LC_IP_OPT != 3); + RTE_BUILD_BUG_ON(NPC_LT_LC_IP6 != 4); + RTE_BUILD_BUG_ON(NPC_LT_LC_IP6_EXT != 5); + + ltypes = vdup_n_u16(0); + ltypes = vset_lane_u16((cq0_w1 >> 40) & 0x6, ltypes, 0); + ltypes = vset_lane_u16((cq1_w1 >> 40) & 0x6, ltypes, 1); + ltypes = vset_lane_u16((cq2_w1 >> 40) & 0x6, ltypes, 2); + ltypes = vset_lane_u16((cq3_w1 >> 40) & 0x6, ltypes, 3); + + /* Extract and reverse l3 length from IPv4/IPv6 hdr + * that is in same cacheline most probably as cpth. + */ + cpth0 += ((cq0_w5 >> 16) & 0xFF) + + vget_lane_u16(ltypes, 0); + cpth1 += ((cq1_w5 >> 16) & 0xFF) + + vget_lane_u16(ltypes, 1); + cpth2 += ((cq2_w5 >> 16) & 0xFF) + + vget_lane_u16(ltypes, 2); + cpth3 += ((cq3_w5 >> 16) & 0xFF) + + vget_lane_u16(ltypes, 3); + lens = vdup_n_u16(0); + lens = vset_lane_u16(*(uint16_t *)cpth0, lens, 0); + lens = vset_lane_u16(*(uint16_t *)cpth1, lens, 1); + lens = vset_lane_u16(*(uint16_t *)cpth2, lens, 2); + lens = vset_lane_u16(*(uint16_t *)cpth3, lens, 3); + lens = vrev16_u8(lens); + + /* Add l2 length to l3 lengths */ + l2lens = vdup_n_u16(0); + l2lens = vset_lane_u16(((cq0_w5 >> 16) & 0xFF) - + (cq0_w5 & 0xFF), + l2lens, 0); + l2lens = vset_lane_u16(((cq1_w5 >> 16) & 0xFF) - + (cq1_w5 & 0xFF), + l2lens, 1); + l2lens = vset_lane_u16(((cq2_w5 >> 16) & 0xFF) - + (cq2_w5 & 0xFF), + l2lens, 2); + l2lens = vset_lane_u16(((cq3_w5 >> 16) & 0xFF) - + (cq3_w5 & 0xFF), + l2lens, 3); + lens = vadd_u16(lens, l2lens); + + /* L3 header adjust */ + const int8x8_t l3adj = { + 0, 0, 0, 0, 40, 0, 0, 0, + }; + lens = vadd_u16(lens, vtbl1_u8(l3adj, ltypes)); + + /* Initialize rearm data when reassembly is enabled as + * data offset might change. + */ + if (flags & NIX_RX_REAS_F) { + rearm0 = vdupq_n_u64(mbuf_initializer); + rearm1 = vdupq_n_u64(mbuf_initializer); + rearm2 = vdupq_n_u64(mbuf_initializer); + rearm3 = vdupq_n_u64(mbuf_initializer); + } + /* Checksum ol_flags will be cleared if mbuf is meta */ - mbuf0 = nix_sec_meta_to_mbuf(cq0_w1, sa_base, laddr, - &loff, mbuf0, d_off, &f0, - &ol_flags0); - mbuf01 = vsetq_lane_u64((uint64_t)mbuf0, mbuf01, 0); - - mbuf1 = nix_sec_meta_to_mbuf(cq1_w1, sa_base, laddr, - &loff, mbuf1, d_off, &f1, - &ol_flags1); - mbuf01 = vsetq_lane_u64((uint64_t)mbuf1, mbuf01, 1); - - mbuf2 = nix_sec_meta_to_mbuf(cq2_w1, sa_base, laddr, - &loff, mbuf2, d_off, &f2, - &ol_flags2); - mbuf23 = vsetq_lane_u64((uint64_t)mbuf2, mbuf23, 0); - - mbuf3 = nix_sec_meta_to_mbuf(cq3_w1, sa_base, laddr, - &loff, mbuf3, d_off, &f3, - &ol_flags3); - mbuf23 = vsetq_lane_u64((uint64_t)mbuf3, mbuf23, 1); + if (cq0_w1 & BIT(11)) { + uintptr_t wqe = vgetq_lane_u64(wqe01, 0); + uintptr_t sa = vgetq_lane_u64(sa01, 0); + uint16_t len = vget_lane_u16(lens, 0); + + cpth0 = (uintptr_t)mbuf0 + d_off; + /* Free meta to aura */ + NIX_PUSH_META_TO_FREE(mbuf0, laddr, &loff); + mbuf01 = vsetq_lane_u64(wqe, mbuf01, 0); + mbuf0 = (struct rte_mbuf *)wqe; + + /* Update pkt_len and data_len */ + f0 = vsetq_lane_u16(len, f0, 2); + f0 = vsetq_lane_u16(len, f0, 4); + + nix_sec_meta_to_mbuf(cq0_w1, cq0_w5, sa, cpth0, + mbuf0, &f0, &ol_flags0, + flags, &rearm0); + ol_flags0 |= ((uint64_t)vget_lane_u8(ucc, 0)) + << 1; + ol_flags0 |= (RTE_MBUF_F_RX_SEC_OFFLOAD | + (uint64_t)vget_lane_u8(ucc, 1) << 19); + } + + if (cq1_w1 & BIT(11)) { + uintptr_t wqe = vgetq_lane_u64(wqe01, 1); + uintptr_t sa = vgetq_lane_u64(sa01, 1); + uint16_t len = vget_lane_u16(lens, 1); + + cpth1 = (uintptr_t)mbuf1 + d_off; + /* Free meta to aura */ + NIX_PUSH_META_TO_FREE(mbuf1, laddr, &loff); + mbuf01 = vsetq_lane_u64(wqe, mbuf01, 1); + mbuf1 = (struct rte_mbuf *)wqe; + + /* Update pkt_len and data_len */ + f1 = vsetq_lane_u16(len, f1, 2); + f1 = vsetq_lane_u16(len, f1, 4); + + nix_sec_meta_to_mbuf(cq1_w1, cq1_w5, sa, cpth1, + mbuf1, &f1, &ol_flags1, + flags, &rearm1); + ol_flags1 |= ((uint64_t)vget_lane_u8(ucc, 2)) + << 1; + ol_flags1 |= (RTE_MBUF_F_RX_SEC_OFFLOAD | + (uint64_t)vget_lane_u8(ucc, 3) << 19); + } + + if (cq2_w1 & BIT(11)) { + uintptr_t wqe = vgetq_lane_u64(wqe23, 0); + uintptr_t sa = vgetq_lane_u64(sa23, 0); + uint16_t len = vget_lane_u16(lens, 2); + + cpth2 = (uintptr_t)mbuf2 + d_off; + /* Free meta to aura */ + NIX_PUSH_META_TO_FREE(mbuf2, laddr, &loff); + mbuf23 = vsetq_lane_u64(wqe, mbuf23, 0); + mbuf2 = (struct rte_mbuf *)wqe; + + /* Update pkt_len and data_len */ + f2 = vsetq_lane_u16(len, f2, 2); + f2 = vsetq_lane_u16(len, f2, 4); + + nix_sec_meta_to_mbuf(cq2_w1, cq2_w5, sa, cpth2, + mbuf2, &f2, &ol_flags2, + flags, &rearm2); + ol_flags2 |= ((uint64_t)vget_lane_u8(ucc, 4)) + << 1; + ol_flags2 |= (RTE_MBUF_F_RX_SEC_OFFLOAD | + (uint64_t)vget_lane_u8(ucc, 5) << 19); + } + + if (cq3_w1 & BIT(11)) { + uintptr_t wqe = vgetq_lane_u64(wqe23, 1); + uintptr_t sa = vgetq_lane_u64(sa23, 1); + uint16_t len = vget_lane_u16(lens, 3); + + cpth3 = (uintptr_t)mbuf3 + d_off; + /* Free meta to aura */ + NIX_PUSH_META_TO_FREE(mbuf3, laddr, &loff); + mbuf23 = vsetq_lane_u64(wqe, mbuf23, 1); + mbuf3 = (struct rte_mbuf *)wqe; + + /* Update pkt_len and data_len */ + f3 = vsetq_lane_u16(len, f3, 2); + f3 = vsetq_lane_u16(len, f3, 4); + + nix_sec_meta_to_mbuf(cq3_w1, cq3_w5, sa, cpth3, + mbuf3, &f3, &ol_flags3, + flags, &rearm3); + ol_flags3 |= ((uint64_t)vget_lane_u8(ucc, 6)) + << 1; + ol_flags3 |= (RTE_MBUF_F_RX_SEC_OFFLOAD | + (uint64_t)vget_lane_u8(ucc, 7) << 19); + } } if (flags & NIX_RX_OFFLOAD_VLAN_STRIP_F) { - uint64_t cq0_w2 = *(uint64_t *)(cq0 + CQE_SZ(0) + 16); - uint64_t cq1_w2 = *(uint64_t *)(cq0 + CQE_SZ(1) + 16); - uint64_t cq2_w2 = *(uint64_t *)(cq0 + CQE_SZ(2) + 16); - uint64_t cq3_w2 = *(uint64_t *)(cq0 + CQE_SZ(3) + 16); ol_flags0 = nix_vlan_update(cq0_w2, ol_flags0, &f0); ol_flags1 = nix_vlan_update(cq1_w2, ol_flags1, &f1); @@ -942,10 +1673,6 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); - /* Store the mbufs to rx_pkts */ - vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); - vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); - if (flags & NIX_RX_MULTI_SEG_F) { /* Multi segment is enable build mseg list for * individual mbufs in scalar mode. @@ -962,19 +1689,22 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, nix_cqe_xtract_mseg((union nix_rx_parse_u *) (CQE_PTR_OFF(cq0, 3, 8, flags)), mbuf3, mbuf_initializer, flags); - } else { - /* Update that no more segments */ - mbuf0->next = NULL; - mbuf1->next = NULL; - mbuf2->next = NULL; - mbuf3->next = NULL; } - /* Prefetch mbufs */ - roc_prefetch_store_keep(mbuf0); - roc_prefetch_store_keep(mbuf1); - roc_prefetch_store_keep(mbuf2); - roc_prefetch_store_keep(mbuf3); + /* Store the mbufs to rx_pkts */ + vst1q_u64((uint64_t *)&mbufs[packets], mbuf01); + vst1q_u64((uint64_t *)&mbufs[packets + 2], mbuf23); + + /* Mark mempool obj as "get" as it is alloc'ed by NIX */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf0->pool, (void **)&mbuf0, 1, 1); + RTE_MEMPOOL_CHECK_COOKIES(mbuf1->pool, (void **)&mbuf1, 1, 1); + RTE_MEMPOOL_CHECK_COOKIES(mbuf2->pool, (void **)&mbuf2, 1, 1); + RTE_MEMPOOL_CHECK_COOKIES(mbuf3->pool, (void **)&mbuf3, 1, 1); + + nix_mbuf_validate_next(mbuf0); + nix_mbuf_validate_next(mbuf1); + nix_mbuf_validate_next(mbuf2); + nix_mbuf_validate_next(mbuf3); packets += NIX_DESCS_PER_LOOP; @@ -1261,6 +1991,7 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, R(sec_vlan_ts_mark_cksum_ptype_rss, \ R_SEC_F | RX_VLAN_F | TS_F | MARK_F | CKSUM_F | PTYPE_F | RSS_F) + #define NIX_RX_FASTPATH_MODES \ NIX_RX_FASTPATH_MODES_0_15 \ NIX_RX_FASTPATH_MODES_16_31 \ @@ -1269,7 +2000,7 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, NIX_RX_FASTPATH_MODES_64_79 \ NIX_RX_FASTPATH_MODES_80_95 \ NIX_RX_FASTPATH_MODES_96_111 \ - NIX_RX_FASTPATH_MODES_112_127 + NIX_RX_FASTPATH_MODES_112_127 \ #define R(name, flags) \ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_##name( \ @@ -1279,6 +2010,14 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts, uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_reas_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_reas_mseg_##name(\ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_reas_vec_##name( \ + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \ + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_reas_vec_mseg_##name( \ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); NIX_RX_FASTPATH_MODES