X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Ffm10k%2Ffm10k_rxtx_vec.c;h=9ea747e11933b1270b49e08dd28d2cefa1c8b072;hb=3f5ac8f2925bf27c85c90e6fb8d89634c3761029;hp=ffd022a1a636300387771ea8838abe3b894553fd;hpb=fe65e1e1ce61c6d56bb433dc76650a3361593e1f;p=dpdk.git diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c b/drivers/net/fm10k/fm10k_rxtx_vec.c index ffd022a1a6..9ea747e119 100644 --- a/drivers/net/fm10k/fm10k_rxtx_vec.c +++ b/drivers/net/fm10k/fm10k_rxtx_vec.c @@ -44,6 +44,9 @@ #pragma GCC diagnostic ignored "-Wcast-qual" #endif +static void +fm10k_reset_tx_queue(struct fm10k_tx_queue *txq); + /* Handling the offload flags (olflags) field takes computation * time when receiving packets. Therefore we provide a flag to disable * the processing of the olflags field when they are not needed. This @@ -58,11 +61,17 @@ #define L3TYPE_SHIFT (4) /* L4 type shift */ #define L4TYPE_SHIFT (7) +/* HBO flag shift */ +#define HBOFLAG_SHIFT (10) +/* RXE flag shift */ +#define RXEFLAG_SHIFT (13) +/* IPE/L4E flag shift */ +#define L3L4EFLAG_SHIFT (14) static inline void fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) { - __m128i ptype0, ptype1, vtag0, vtag1; + __m128i ptype0, ptype1, vtag0, vtag1, eflag0, eflag1, cksumflag; union { uint16_t e[4]; uint64_t dword; @@ -78,12 +87,29 @@ fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) 0x0000, 0x0000, 0x0000, 0x0000, 0x000F, 0x000F, 0x000F, 0x000F); + /* mask for HBO and RXE flag flags */ + const __m128i rxe_msk = _mm_set_epi16( + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0001, 0x0001, 0x0001, 0x0001); + + const __m128i l3l4cksum_flag = _mm_set_epi8(0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD, 0); + + const __m128i rxe_flag = _mm_set_epi8(0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0); + /* map rss type to rss hash flag */ const __m128i rss_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0); + /* Calculate RSS_hash and Vlan fields */ ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]); ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]); vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]); @@ -94,10 +120,27 @@ fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) ptype0 = _mm_shuffle_epi8(rss_flags, ptype0); vtag1 = _mm_unpacklo_epi32(vtag0, vtag1); + eflag0 = vtag1; + cksumflag = vtag1; vtag1 = _mm_srli_epi16(vtag1, VP_SHIFT); vtag1 = _mm_and_si128(vtag1, pkttype_msk); vtag1 = _mm_or_si128(ptype0, vtag1); + + /* Process err flags, simply set RECIP_ERR bit if HBO/IXE is set */ + eflag1 = _mm_srli_epi16(eflag0, RXEFLAG_SHIFT); + eflag0 = _mm_srli_epi16(eflag0, HBOFLAG_SHIFT); + eflag0 = _mm_or_si128(eflag0, eflag1); + eflag0 = _mm_and_si128(eflag0, rxe_msk); + eflag0 = _mm_shuffle_epi8(rxe_flag, eflag0); + + vtag1 = _mm_or_si128(eflag0, vtag1); + + /* Process L4/L3 checksum error flags */ + cksumflag = _mm_srli_epi16(cksumflag, L3L4EFLAG_SHIFT); + cksumflag = _mm_shuffle_epi8(l3l4cksum_flag, cksumflag); + vtag1 = _mm_or_si128(cksumflag, vtag1); + vol.dword = _mm_cvtsi128_si64(vtag1); rx_pkts[0]->ol_flags = vol.e[0]; @@ -106,6 +149,9 @@ fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) rx_pkts[3]->ol_flags = vol.e[3]; } +/* @note: When this function is changed, make corresponding change to + * fm10k_dev_supported_ptypes_get(). + */ static inline void fm10k_desc_to_pktype_v(__m128i descs[4], struct rte_mbuf **rx_pkts) { @@ -321,6 +367,24 @@ fm10k_rxq_rearm(struct fm10k_rx_queue *rxq) FM10K_PCI_REG_WRITE(rxq->tail_ptr, rx_id); } +void __attribute__((cold)) +fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq) +{ + const unsigned mask = rxq->nb_desc - 1; + unsigned i; + + if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_desc) + return; + + /* free all mbufs that are valid in the ring */ + for (i = rxq->next_dd; i != rxq->rxrearm_start; i = (i + 1) & mask) + rte_pktmbuf_free_seg(rxq->sw_ring[i]); + rxq->rxrearm_nb = rxq->nb_desc; + + /* set all entries to NULL */ + memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_desc); +} + static inline uint16_t fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, uint8_t *split_packet) @@ -423,10 +487,10 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, rte_compiler_barrier(); if (split_packet) { - rte_prefetch0(&rx_pkts[pos]->cacheline1); - rte_prefetch0(&rx_pkts[pos + 1]->cacheline1); - rte_prefetch0(&rx_pkts[pos + 2]->cacheline1); - rte_prefetch0(&rx_pkts[pos + 3]->cacheline1); + rte_mbuf_prefetch_part2(rx_pkts[pos]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); } /* D.1 pkt 3,4 convert format from desc to pktmbuf */ @@ -542,8 +606,11 @@ fm10k_reassemble_packets(struct fm10k_rx_queue *rxq, if (!split_flags[buf_idx]) { /* it's the last packet of the set */ +#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE start->hash = end->hash; start->ol_flags = end->ol_flags; + start->packet_type = end->packet_type; +#endif pkts[pkt_idx++] = start; start = end = NULL; } @@ -609,3 +676,203 @@ fm10k_recv_scattered_pkts_vec(void *rx_queue, return i + fm10k_reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, &split_flags[i]); } + +static const struct fm10k_txq_ops vec_txq_ops = { + .reset = fm10k_reset_tx_queue, +}; + +void __attribute__((cold)) +fm10k_txq_vec_setup(struct fm10k_tx_queue *txq) +{ + txq->ops = &vec_txq_ops; +} + +int __attribute__((cold)) +fm10k_tx_vec_condition_check(struct fm10k_tx_queue *txq) +{ + /* Vector TX can't offload any features yet */ + if ((txq->txq_flags & FM10K_SIMPLE_TX_FLAG) != FM10K_SIMPLE_TX_FLAG) + return -1; + + if (txq->tx_ftag_en) + return -1; + + return 0; +} + +static inline void +vtx1(volatile struct fm10k_tx_desc *txdp, + struct rte_mbuf *pkt, uint64_t flags) +{ + __m128i descriptor = _mm_set_epi64x(flags << 56 | + pkt->vlan_tci << 16 | pkt->data_len, + MBUF_DMA_ADDR(pkt)); + _mm_store_si128((__m128i *)txdp, descriptor); +} + +static inline void +vtx(volatile struct fm10k_tx_desc *txdp, + struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) +{ + int i; + + for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt) + vtx1(txdp, *pkt, flags); +} + +static inline int __attribute__((always_inline)) +fm10k_tx_free_bufs(struct fm10k_tx_queue *txq) +{ + struct rte_mbuf **txep; + uint8_t flags; + uint32_t n; + uint32_t i; + int nb_free = 0; + struct rte_mbuf *m, *free[RTE_FM10K_TX_MAX_FREE_BUF_SZ]; + + /* check DD bit on threshold descriptor */ + flags = txq->hw_ring[txq->next_dd].flags; + if (!(flags & FM10K_TXD_FLAG_DONE)) + return 0; + + n = txq->rs_thresh; + + /* First buffer to free from S/W ring is at index + * next_dd - (rs_thresh-1) + */ + txep = &txq->sw_ring[txq->next_dd - (n - 1)]; + m = __rte_pktmbuf_prefree_seg(txep[0]); + if (likely(m != NULL)) { + free[0] = m; + nb_free = 1; + for (i = 1; i < n; i++) { + m = __rte_pktmbuf_prefree_seg(txep[i]); + if (likely(m != NULL)) { + if (likely(m->pool == free[0]->pool)) + free[nb_free++] = m; + else { + rte_mempool_put_bulk(free[0]->pool, + (void *)free, nb_free); + free[0] = m; + nb_free = 1; + } + } + } + rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + } else { + for (i = 1; i < n; i++) { + m = __rte_pktmbuf_prefree_seg(txep[i]); + if (m != NULL) + rte_mempool_put(m->pool, m); + } + } + + /* buffers were freed, update counters */ + txq->nb_free = (uint16_t)(txq->nb_free + txq->rs_thresh); + txq->next_dd = (uint16_t)(txq->next_dd + txq->rs_thresh); + if (txq->next_dd >= txq->nb_desc) + txq->next_dd = (uint16_t)(txq->rs_thresh - 1); + + return txq->rs_thresh; +} + +static inline void __attribute__((always_inline)) +tx_backlog_entry(struct rte_mbuf **txep, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i; + + for (i = 0; i < (int)nb_pkts; ++i) + txep[i] = tx_pkts[i]; +} + +uint16_t +fm10k_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct fm10k_tx_queue *txq = (struct fm10k_tx_queue *)tx_queue; + volatile struct fm10k_tx_desc *txdp; + struct rte_mbuf **txep; + uint16_t n, nb_commit, tx_id; + uint64_t flags = FM10K_TXD_FLAG_LAST; + uint64_t rs = FM10K_TXD_FLAG_RS | FM10K_TXD_FLAG_LAST; + int i; + + /* cross rx_thresh boundary is not allowed */ + nb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh); + + if (txq->nb_free < txq->free_thresh) + fm10k_tx_free_bufs(txq); + + nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_free, nb_pkts); + if (unlikely(nb_pkts == 0)) + return 0; + + tx_id = txq->next_free; + txdp = &txq->hw_ring[tx_id]; + txep = &txq->sw_ring[tx_id]; + + txq->nb_free = (uint16_t)(txq->nb_free - nb_pkts); + + n = (uint16_t)(txq->nb_desc - tx_id); + if (nb_commit >= n) { + tx_backlog_entry(txep, tx_pkts, n); + + for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp) + vtx1(txdp, *tx_pkts, flags); + + vtx1(txdp, *tx_pkts++, rs); + + nb_commit = (uint16_t)(nb_commit - n); + + tx_id = 0; + txq->next_rs = (uint16_t)(txq->rs_thresh - 1); + + /* avoid reach the end of ring */ + txdp = &(txq->hw_ring[tx_id]); + txep = &txq->sw_ring[tx_id]; + } + + tx_backlog_entry(txep, tx_pkts, nb_commit); + + vtx(txdp, tx_pkts, nb_commit, flags); + + tx_id = (uint16_t)(tx_id + nb_commit); + if (tx_id > txq->next_rs) { + txq->hw_ring[txq->next_rs].flags |= FM10K_TXD_FLAG_RS; + txq->next_rs = (uint16_t)(txq->next_rs + txq->rs_thresh); + } + + txq->next_free = tx_id; + + FM10K_PCI_REG_WRITE(txq->tail_ptr, txq->next_free); + + return nb_pkts; +} + +static void __attribute__((cold)) +fm10k_reset_tx_queue(struct fm10k_tx_queue *txq) +{ + static const struct fm10k_tx_desc zeroed_desc = {0}; + struct rte_mbuf **txe = txq->sw_ring; + uint16_t i; + + /* Zero out HW ring memory */ + for (i = 0; i < txq->nb_desc; i++) + txq->hw_ring[i] = zeroed_desc; + + /* Initialize SW ring entries */ + for (i = 0; i < txq->nb_desc; i++) + txe[i] = NULL; + + txq->next_dd = (uint16_t)(txq->rs_thresh - 1); + txq->next_rs = (uint16_t)(txq->rs_thresh - 1); + + txq->next_free = 0; + txq->nb_used = 0; + /* Always allow 1 descriptor to be un-allocated to avoid + * a H/W race condition + */ + txq->nb_free = (uint16_t)(txq->nb_desc - 1); + FM10K_PCI_REG_WRITE(txq->tail_ptr, 0); +}