X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;ds=sidebyside;f=lib%2Flibrte_pmd_ixgbe%2Fixgbe_rxtx_vec.c;h=7ac6b612bf9290881ed267c01af0da0b98873551;hb=10ef5d7891882d8b5a379904b40c2ba66ee1472a;hp=9869b8b2c92ae52ef47ebe2eaf4c2d269f36292a;hpb=1f22652ca886941b0961c792e5ba688b3764c041;p=dpdk.git diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c index 9869b8b2c9..7ac6b612bf 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c @@ -45,34 +45,56 @@ #endif static inline void -ixgbe_rxq_rearm(struct igb_rx_queue *rxq) +ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq) { int i; uint16_t rx_id; volatile union ixgbe_adv_rx_desc *rxdp; - struct igb_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; + struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; struct rte_mbuf *mb0, *mb1; __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, RTE_PKTMBUF_HEADROOM); + __m128i dma_addr0, dma_addr1; + + rxdp = rxq->rx_ring + rxq->rxrearm_start; /* Pull 'n' more MBUFs into the software ring */ if (rte_mempool_get_bulk(rxq->mb_pool, - (void *)rxep, RTE_IXGBE_RXQ_REARM_THRESH) < 0) + (void *)rxep, + RTE_IXGBE_RXQ_REARM_THRESH) < 0) { + if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >= + rxq->nb_rx_desc) { + dma_addr0 = _mm_setzero_si128(); + for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) { + rxep[i].mbuf = &rxq->fake_mbuf; + _mm_store_si128((__m128i *)&rxdp[i].read, + dma_addr0); + } + } + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += + RTE_IXGBE_RXQ_REARM_THRESH; return; - - rxdp = rxq->rx_ring + rxq->rxrearm_start; + } /* Initialize the mbufs in vector, process 2 mbufs in one loop */ for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) { - __m128i dma_addr0, dma_addr1; __m128i vaddr0, vaddr1; + uintptr_t p0, p1; mb0 = rxep[0].mbuf; mb1 = rxep[1].mbuf; - /* flush mbuf with pkt template */ - mb0->rearm_data[0] = rxq->mbuf_initializer; - mb1->rearm_data[0] = rxq->mbuf_initializer; + /* + * Flush mbuf with pkt template. + * Data to be rearmed is 6 bytes long. + * Though, RX will overwrite ol_flags that are coming next + * anyway. So overwrite whole 8 bytes with one load: + * 6 bytes of rearm_data plus first 2 bytes of ol_flags. + */ + p0 = (uintptr_t)&mb0->rearm_data; + *(uint64_t *)p0 = rxq->mbuf_initializer; + p1 = (uintptr_t)&mb1->rearm_data; + *(uint64_t *)p1 = rxq->mbuf_initializer; /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */ vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr)); @@ -164,13 +186,12 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) * numbers of DD bit * - don't support ol_flags for rss and csum err */ -uint16_t -ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts) +static inline uint16_t +_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts, uint8_t *split_packet) { volatile union ixgbe_adv_rx_desc *rxdp; - struct igb_rx_queue *rxq = rx_queue; - struct igb_rx_entry *sw_ring; + struct ixgbe_rx_entry *sw_ring; uint16_t nb_pkts_recd; int pos; uint64_t var; @@ -182,7 +203,7 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, -rxq->crc_len, /* sub crc on data_len */ 0 /* ignore pkt_type field */ ); - __m128i dd_check; + __m128i dd_check, eop_check; if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST)) return 0; @@ -207,6 +228,9 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, /* 4 packets DD mask */ dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL); + /* 4 packets EOP mask */ + eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL); + /* mask to shuffle from desc. to mbuf */ shuf_msk = _mm_set_epi8( 7, 6, 5, 4, /* octet 4~7, 32bits rss */ @@ -218,7 +242,6 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 0xFF, 0xFF /* skip pkt_type field */ ); - /* Cache is empty -> need to scan the buffer rings, but first move * the next 'n' mbufs into the cache */ sw_ring = &rxq->sw_ring[rxq->rx_tail]; @@ -227,6 +250,7 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, * A. load 4 packet in one loop * B. copy 4 mbuf point from swring to rx_pkts * C. calc the number of DD bits among the 4 packets + * [C*. extract the end-of-packet bit, if requested] * D. fill info. from desc to mbuf */ for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST; @@ -237,6 +261,13 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, __m128i zero, staterr, sterr_tmp1, sterr_tmp2; __m128i mbp1, mbp2; /* two mbuf pointer in one XMM reg. */ + if (split_packet) { + rte_prefetch0(&rx_pkts[pos]->cacheline1); + rte_prefetch0(&rx_pkts[pos + 1]->cacheline1); + rte_prefetch0(&rx_pkts[pos + 2]->cacheline1); + rte_prefetch0(&rx_pkts[pos + 3]->cacheline1); + } + /* B.1 load 1 mbuf point */ mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); @@ -295,7 +326,34 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust); - /* C.3 calc avaialbe number of desc */ + /* C* extract and record EOP bit */ + if (split_packet) { + __m128i eop_shuf_mask = _mm_set_epi8( + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0x04, 0x0C, 0x00, 0x08 + ); + + /* and with mask to extract bits, flipping 1-0 */ + __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); + /* the staterr values are not in order, as the count + * count of dd bits doesn't care. However, for end of + * packet tracking, we do care, so shuffle. This also + * compresses the 32-bit values to 8-bit */ + eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); + /* store the resulting 32-bit value */ + *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); + split_packet += RTE_IXGBE_DESCS_PER_LOOP; + + /* zero-out next pointers */ + rx_pkts[pos]->next = NULL; + rx_pkts[pos + 1]->next = NULL; + rx_pkts[pos + 2]->next = NULL; + rx_pkts[pos + 3]->next = NULL; + } + + /* C.3 calc available number of desc */ staterr = _mm_and_si128(staterr, dd_check); staterr = _mm_packs_epi32(staterr, zero); @@ -319,6 +377,126 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, return nb_pkts_recd; } + +/* + * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST) + * in one loop + * + * Notice: + * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet + * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST + * numbers of DD bit + * - don't support ol_flags for rss and csum err + */ +uint16_t +ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); +} + +static inline uint16_t +reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs, + uint16_t nb_bufs, uint8_t *split_flags) +{ + struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/ + struct rte_mbuf *start = rxq->pkt_first_seg; + struct rte_mbuf *end = rxq->pkt_last_seg; + unsigned pkt_idx, buf_idx; + + + for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) { + if (end != NULL) { + /* processing a split packet */ + end->next = rx_bufs[buf_idx]; + rx_bufs[buf_idx]->data_len += rxq->crc_len; + + start->nb_segs++; + start->pkt_len += rx_bufs[buf_idx]->data_len; + end = end->next; + + if (!split_flags[buf_idx]) { + /* it's the last packet of the set */ + start->hash = end->hash; + start->ol_flags = end->ol_flags; + /* we need to strip crc for the whole packet */ + start->pkt_len -= rxq->crc_len; + if (end->data_len > rxq->crc_len) + end->data_len -= rxq->crc_len; + else { + /* free up last mbuf */ + struct rte_mbuf *secondlast = start; + while (secondlast->next != end) + secondlast = secondlast->next; + secondlast->data_len -= (rxq->crc_len - + end->data_len); + secondlast->next = NULL; + rte_pktmbuf_free_seg(end); + end = secondlast; + } + pkts[pkt_idx++] = start; + start = end = NULL; + } + } else { + /* not processing a split packet */ + if (!split_flags[buf_idx]) { + /* not a split packet, save and skip */ + pkts[pkt_idx++] = rx_bufs[buf_idx]; + continue; + } + end = start = rx_bufs[buf_idx]; + rx_bufs[buf_idx]->data_len += rxq->crc_len; + rx_bufs[buf_idx]->pkt_len += rxq->crc_len; + } + } + + /* save the partial packet for next time */ + rxq->pkt_first_seg = start; + rxq->pkt_last_seg = end; + memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts))); + return pkt_idx; +} + +/* + * vPMD receive routine that reassembles scattered packets + * + * Notice: + * - don't support ol_flags for rss and csum err + * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST) + */ +uint16_t +ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct ixgbe_rx_queue *rxq = rx_queue; + uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0}; + + /* get some new buffers */ + uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, + split_flags); + if (nb_bufs == 0) + return 0; + + /* happy day case, full burst + no packets to be joined */ + const uint32_t *split_fl32 = (uint32_t *)split_flags; + if (rxq->pkt_first_seg == NULL && + split_fl32[0] == 0 && split_fl32[1] == 0 && + split_fl32[2] == 0 && split_fl32[3] == 0) + return nb_bufs; + + /* reassemble any packets that need reassembly*/ + unsigned i = 0; + if (rxq->pkt_first_seg == NULL) { + /* find the first split flag, and only reassemble then*/ + while (i < nb_bufs && !split_flags[i]) + i++; + if (i == nb_bufs) + return nb_bufs; + } + return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, + &split_flags[i]); +} + static inline void vtx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags) @@ -339,9 +517,9 @@ vtx(volatile union ixgbe_adv_tx_desc *txdp, } static inline int __attribute__((always_inline)) -ixgbe_tx_free_bufs(struct igb_tx_queue *txq) +ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq) { - struct igb_tx_entry_v *txep; + struct ixgbe_tx_entry_v *txep; uint32_t status; uint32_t n; uint32_t i; @@ -359,22 +537,14 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq) * first buffer to free from S/W ring is at index * tx_next_dd - (tx_rs_thresh-1) */ - txep = &((struct igb_tx_entry_v *)txq->sw_ring)[txq->tx_next_dd - + txep = &((struct ixgbe_tx_entry_v *)txq->sw_ring)[txq->tx_next_dd - (n - 1)]; -#ifdef RTE_MBUF_REFCNT m = __rte_pktmbuf_prefree_seg(txep[0].mbuf); -#else - m = txep[0].mbuf; -#endif if (likely(m != NULL)) { free[0] = m; nb_free = 1; for (i = 1; i < n; i++) { -#ifdef RTE_MBUF_REFCNT m = __rte_pktmbuf_prefree_seg(txep[i].mbuf); -#else - m = txep[i]->mbuf; -#endif if (likely(m != NULL)) { if (likely(m->pool == free[0]->pool)) free[nb_free++] = m; @@ -405,7 +575,7 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq) } static inline void __attribute__((always_inline)) -tx_backlog_entry(struct igb_tx_entry_v *txep, +tx_backlog_entry(struct ixgbe_tx_entry_v *txep, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { int i; @@ -417,9 +587,9 @@ uint16_t ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { - struct igb_tx_queue *txq = (struct igb_tx_queue *)tx_queue; + struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; volatile union ixgbe_adv_tx_desc *txdp; - struct igb_tx_entry_v *txep; + struct ixgbe_tx_entry_v *txep; uint16_t n, nb_commit, tx_id; uint64_t flags = DCMD_DTYP_FLAGS; uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS; @@ -437,7 +607,7 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, tx_id = txq->tx_tail; txdp = &txq->tx_ring[tx_id]; - txep = &((struct igb_tx_entry_v *)txq->sw_ring)[tx_id]; + txep = &((struct ixgbe_tx_entry_v *)txq->sw_ring)[tx_id]; txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); @@ -458,7 +628,7 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, /* avoid reach the end of ring */ txdp = &(txq->tx_ring[tx_id]); - txep = &(((struct igb_tx_entry_v *)txq->sw_ring)[tx_id]); + txep = &(((struct ixgbe_tx_entry_v *)txq->sw_ring)[tx_id]); } tx_backlog_entry(txep, tx_pkts, nb_commit); @@ -481,10 +651,10 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, } static void -ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq) +ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq) { unsigned i; - struct igb_tx_entry_v *txe; + struct ixgbe_tx_entry_v *txe; uint16_t nb_free, max_desc; if (txq->sw_ring != NULL) { @@ -494,36 +664,36 @@ ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq) for (i = txq->tx_next_dd - (txq->tx_rs_thresh - 1); nb_free < max_desc && i != txq->tx_tail; i = (i + 1) & max_desc) { - txe = (struct igb_tx_entry_v *)&txq->sw_ring[i]; + txe = (struct ixgbe_tx_entry_v *)&txq->sw_ring[i]; if (txe->mbuf != NULL) rte_pktmbuf_free_seg(txe->mbuf); } /* reset tx_entry */ for (i = 0; i < txq->nb_tx_desc; i++) { - txe = (struct igb_tx_entry_v *)&txq->sw_ring[i]; + txe = (struct ixgbe_tx_entry_v *)&txq->sw_ring[i]; txe->mbuf = NULL; } } } static void -ixgbe_tx_free_swring(struct igb_tx_queue *txq) +ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq) { if (txq == NULL) return; if (txq->sw_ring != NULL) { - rte_free((struct igb_rx_entry *)txq->sw_ring - 1); + rte_free((struct ixgbe_rx_entry *)txq->sw_ring - 1); txq->sw_ring = NULL; } } static void -ixgbe_reset_tx_queue(struct igb_tx_queue *txq) +ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq) { static const union ixgbe_adv_tx_desc zeroed_desc = { .read = { .buffer_addr = 0} }; - struct igb_tx_entry_v *txe = (struct igb_tx_entry_v *)txq->sw_ring; + struct ixgbe_tx_entry_v *txe = (struct ixgbe_tx_entry_v *)txq->sw_ring; uint16_t i; /* Zero out HW ring memory */ @@ -553,43 +723,44 @@ ixgbe_reset_tx_queue(struct igb_tx_queue *txq) IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info)); } -static struct ixgbe_txq_ops vec_txq_ops = { +static const struct ixgbe_txq_ops vec_txq_ops = { .release_mbufs = ixgbe_tx_queue_release_mbufs, .free_swring = ixgbe_tx_free_swring, .reset = ixgbe_reset_tx_queue, }; int -ixgbe_rxq_vec_setup(struct igb_rx_queue *rxq) +ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq) { - static struct rte_mbuf mb_def = { - .nb_segs = 1, - .data_off = RTE_PKTMBUF_HEADROOM, -#ifdef RTE_MBUF_REFCNT - .refcnt = 1, -#endif - }; + uintptr_t p; + struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ - mb_def.buf_len = rxq->mb_pool->elt_size - sizeof(struct rte_mbuf); + mb_def.nb_segs = 1; + mb_def.data_off = RTE_PKTMBUF_HEADROOM; mb_def.port = rxq->port_id; - rxq->mbuf_initializer = *((uint64_t *)&mb_def.rearm_data); + rte_mbuf_refcnt_set(&mb_def, 1); + + /* prevent compiler reordering: rearm_data covers previous fields */ + rte_compiler_barrier(); + p = (uintptr_t)&mb_def.rearm_data; + rxq->mbuf_initializer = *(uint64_t *)p; return 0; } -int ixgbe_txq_vec_setup(struct igb_tx_queue *txq) +int ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq) { if (txq->sw_ring == NULL) return -1; /* leave the first one for overflow */ - txq->sw_ring = (struct igb_tx_entry *) - ((struct igb_tx_entry_v *)txq->sw_ring + 1); + txq->sw_ring = (struct ixgbe_tx_entry *) + ((struct ixgbe_tx_entry_v *)txq->sw_ring + 1); txq->ops = &vec_txq_ops; return 0; } -int ixgbe_rx_vec_condition_check(struct rte_eth_dev *dev) +int ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) { #ifndef RTE_LIBRTE_IEEE1588 struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;