RTE_PKTMBUF_HEADROOM);
__m128i dma_addr0, dma_addr1;
+ const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX);
+
rxdp = rxq->rx_ring + rxq->rxrearm_start;
/* Pull 'n' more MBUFs into the software ring */
dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
+ /* set Header Buffer Address to zero */
+ dma_addr0 = _mm_and_si128(dma_addr0, hba_msk);
+ dma_addr1 = _mm_and_si128(dma_addr1, hba_msk);
+
/* flush desc with pa dma_addr */
_mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
_mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
*/
#ifdef RTE_IXGBE_RX_OLFLAGS_ENABLE
-#ifdef RTE_NEXT_ABI
-#define OLFLAGS_MASK_V (((uint64_t)PKT_RX_VLAN_PKT << 48) | \
- ((uint64_t)PKT_RX_VLAN_PKT << 32) | \
- ((uint64_t)PKT_RX_VLAN_PKT << 16) | \
- ((uint64_t)PKT_RX_VLAN_PKT))
-#else
-#define OLFLAGS_MASK ((uint16_t)(PKT_RX_VLAN_PKT | PKT_RX_IPV4_HDR |\
- PKT_RX_IPV4_HDR_EXT | PKT_RX_IPV6_HDR |\
- PKT_RX_IPV6_HDR_EXT))
-#define OLFLAGS_MASK_V (((uint64_t)OLFLAGS_MASK << 48) | \
- ((uint64_t)OLFLAGS_MASK << 32) | \
- ((uint64_t)OLFLAGS_MASK << 16) | \
- ((uint64_t)OLFLAGS_MASK))
+#ifndef RTE_NEXT_ABI
+#define OLFLAGS_MASK ((uint16_t)(PKT_RX_VLAN_PKT | PKT_RX_IPV4_HDR |\
+ PKT_RX_IPV4_HDR_EXT | PKT_RX_IPV6_HDR |\
+ PKT_RX_IPV6_HDR_EXT))
#define PTYPE_SHIFT (1)
#endif /* RTE_NEXT_ABI */
desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
{
#ifdef RTE_NEXT_ABI
- __m128i vtag0, vtag1;
+ __m128i ptype0, ptype1, vtag0, vtag1;
union {
uint16_t e[4];
uint64_t dword;
} vol;
+ /* pkt type + vlan olflags mask */
+ const __m128i pkttype_msk = _mm_set_epi16(
+ 0x0000, 0x0000, 0x0000, 0x0000,
+ PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT);
+
+ /* mask everything except rss type */
+ const __m128i rsstype_msk = _mm_set_epi16(
+ 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x000F, 0x000F, 0x000F, 0x000F);
+
+ /* map rss type to rss hash flag */
+ const __m128i rss_flags = _mm_set_epi8(PKT_RX_FDIR, 0, 0, 0,
+ 0, 0, 0, PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0,
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0);
+
+ ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
+ ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);
vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]);
vtag1 = _mm_unpackhi_epi16(descs[2], descs[3]);
+
+ ptype0 = _mm_unpacklo_epi32(ptype0, ptype1);
+ ptype0 = _mm_and_si128(ptype0, rsstype_msk);
+ ptype0 = _mm_shuffle_epi8(rss_flags, ptype0);
+
vtag1 = _mm_unpacklo_epi32(vtag0, vtag1);
vtag1 = _mm_srli_epi16(vtag1, VTAG_SHIFT);
- vol.dword = _mm_cvtsi128_si64(vtag1) & OLFLAGS_MASK_V;
+ vtag1 = _mm_and_si128(vtag1, pkttype_msk);
+
+ vtag1 = _mm_or_si128(ptype0, vtag1);
+ vol.dword = _mm_cvtsi128_si64(vtag1);
#else
__m128i ptype0, ptype1, vtag0, vtag1;
union {
uint64_t dword;
} vol;
+ /* pkt type + vlan olflags mask */
+ const __m128i pkttype_msk = _mm_set_epi16(
+ 0x0000, 0x0000, 0x0000, 0x0000,
+ OLFLAGS_MASK, OLFLAGS_MASK, OLFLAGS_MASK, OLFLAGS_MASK);
+
+ /* mask everything except rss type */
+ const __m128i rsstype_msk = _mm_set_epi16(
+ 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x000F, 0x000F, 0x000F, 0x000F);
+
+ /* rss type to PKT_RX_RSS_HASH translation */
+ const __m128i rss_flags = _mm_set_epi8(PKT_RX_FDIR, 0, 0, 0,
+ 0, 0, 0, PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0,
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0);
+
ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);
ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);
vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]);
ptype1 = _mm_unpacklo_epi32(ptype0, ptype1);
vtag1 = _mm_unpacklo_epi32(vtag0, vtag1);
+ ptype0 = _mm_and_si128(ptype1, rsstype_msk);
+ ptype0 = _mm_shuffle_epi8(rss_flags, ptype0);
+
ptype1 = _mm_slli_epi16(ptype1, PTYPE_SHIFT);
vtag1 = _mm_srli_epi16(vtag1, VTAG_SHIFT);
ptype1 = _mm_or_si128(ptype1, vtag1);
- vol.dword = _mm_cvtsi128_si64(ptype1) & OLFLAGS_MASK_V;
+ ptype1 = _mm_and_si128(ptype1, pkttype_msk);
+
+ ptype0 = _mm_or_si128(ptype0, ptype1);
+
+ vol.dword = _mm_cvtsi128_si64(ptype0);
#endif /* RTE_NEXT_ABI */
rx_pkts[0]->ol_flags = vol.e[0];
for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
pos += RTE_IXGBE_DESCS_PER_LOOP,
rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
+#ifdef RTE_NEXT_ABI
+ __m128i descs0[RTE_IXGBE_DESCS_PER_LOOP];
+#endif /* RTE_NEXT_ABI */
__m128i descs[RTE_IXGBE_DESCS_PER_LOOP];
__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
__m128i zero, staterr, sterr_tmp1, sterr_tmp2;
/* B.1 load 1 mbuf point */
mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);
+#ifdef RTE_NEXT_ABI
/* Read desc statuses backwards to avoid race condition */
/* A.1 load 4 pkts desc */
- descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
+ descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
/* B.2 copy 2 mbuf point into rx_pkts */
_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
/* B.1 load 1 mbuf point */
mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]);
- descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
+ descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
/* B.1 load 2 mbuf point */
- descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
- descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
+ descs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
+ descs0[0] = _mm_loadu_si128((__m128i *)(rxdp));
/* B.2 copy 2 mbuf point into rx_pkts */
_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
-#ifdef RTE_NEXT_ABI
/* A* mask out 0~3 bits RSS type */
- descs[3] = _mm_and_si128(descs[3], desc_mask);
- descs[2] = _mm_and_si128(descs[2], desc_mask);
+ descs[3] = _mm_and_si128(descs0[3], desc_mask);
+ descs[2] = _mm_and_si128(descs0[2], desc_mask);
/* A* mask out 0~3 bits RSS type */
- descs[1] = _mm_and_si128(descs[1], desc_mask);
- descs[0] = _mm_and_si128(descs[0], desc_mask);
+ descs[1] = _mm_and_si128(descs0[1], desc_mask);
+ descs[0] = _mm_and_si128(descs0[0], desc_mask);
+#else
+ /* Read desc statuses backwards to avoid race condition */
+ /* A.1 load 4 pkts desc */
+ descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));
+
+ /* B.2 copy 2 mbuf point into rx_pkts */
+ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);
+
+ /* B.1 load 1 mbuf point */
+ mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]);
+
+ descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));
+ /* B.1 load 2 mbuf point */
+ descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));
+ descs[0] = _mm_loadu_si128((__m128i *)(rxdp));
+
+ /* B.2 copy 2 mbuf point into rx_pkts */
+ _mm_storeu_si128((__m128i *)&rx_pkts[pos + 2], mbp2);
#endif /* RTE_NEXT_ABI */
/* avoid compiler reorder optimization */
#ifdef RTE_NEXT_ABI
/* set ol_flags with vlan packet type */
+ desc_to_olflags_v(descs0, &rx_pkts[pos]);
#else
/* set ol_flags with packet type and vlan tag */
-#endif /* RTE_NEXT_ABI */
desc_to_olflags_v(descs, &rx_pkts[pos]);
+#endif /* RTE_NEXT_ABI */
/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
else {
/* free up last mbuf */
struct rte_mbuf *secondlast = start;
+
+ start->nb_segs--;
while (secondlast->next != end)
secondlast = secondlast->next;
secondlast->data_len -= (rxq->crc_len -
return 0;
/* happy day case, full burst + no packets to be joined */
- const uint32_t *split_fl32 = (uint32_t *)split_flags;
+ const uint64_t *split_fl64 = (uint64_t *)split_flags;
if (rxq->pkt_first_seg == NULL &&
- split_fl32[0] == 0 && split_fl32[1] == 0 &&
- split_fl32[2] == 0 && split_fl32[3] == 0)
+ split_fl64[0] == 0 && split_fl64[1] == 0 &&
+ split_fl64[2] == 0 && split_fl64[3] == 0)
return nb_bufs;
/* reassemble any packets that need reassembly*/
* first buffer to free from S/W ring is at index
* tx_next_dd - (tx_rs_thresh-1)
*/
- txep = &((struct ixgbe_tx_entry_v *)txq->sw_ring)[txq->tx_next_dd -
- (n - 1)];
+ txep = &txq->sw_ring_v[txq->tx_next_dd - (n - 1)];
m = __rte_pktmbuf_prefree_seg(txep[0].mbuf);
if (likely(m != NULL)) {
free[0] = m;
tx_id = txq->tx_tail;
txdp = &txq->tx_ring[tx_id];
- txep = &((struct ixgbe_tx_entry_v *)txq->sw_ring)[tx_id];
+ txep = &txq->sw_ring_v[tx_id];
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
/* avoid reach the end of ring */
txdp = &(txq->tx_ring[tx_id]);
- txep = &(((struct ixgbe_tx_entry_v *)txq->sw_ring)[tx_id]);
+ txep = &txq->sw_ring_v[tx_id];
}
tx_backlog_entry(txep, tx_pkts, nb_commit);
for (i = txq->tx_next_dd - (txq->tx_rs_thresh - 1);
i != txq->tx_tail;
i = (i + 1) & max_desc) {
- txe = &((struct ixgbe_tx_entry_v *)txq->sw_ring)[i];
+ txe = &txq->sw_ring_v[i];
rte_pktmbuf_free_seg(txe->mbuf);
}
txq->nb_tx_free = max_desc;
/* reset tx_entry */
for (i = 0; i < txq->nb_tx_desc; i++) {
- txe = (struct ixgbe_tx_entry_v *)&txq->sw_ring[i];
+ txe = &txq->sw_ring_v[i];
txe->mbuf = NULL;
}
}
return;
if (txq->sw_ring != NULL) {
- rte_free((struct ixgbe_rx_entry *)txq->sw_ring - 1);
- txq->sw_ring = NULL;
+ rte_free(txq->sw_ring_v - 1);
+ txq->sw_ring_v = NULL;
}
}
ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
{
static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
- struct ixgbe_tx_entry_v *txe = (struct ixgbe_tx_entry_v *)txq->sw_ring;
+ struct ixgbe_tx_entry_v *txe = txq->sw_ring_v;
uint16_t i;
/* Zero out HW ring memory */
int __attribute__((cold))
ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq)
{
- if (txq->sw_ring == NULL)
+ if (txq->sw_ring_v == NULL)
return -1;
/* leave the first one for overflow */
- txq->sw_ring = (struct ixgbe_tx_entry *)
- ((struct ixgbe_tx_entry_v *)txq->sw_ring + 1);
+ txq->sw_ring_v = txq->sw_ring_v + 1;
txq->ops = &vec_txq_ops;
return 0;