mbuf: make rearm data address naturally aligned
[dpdk.git] / drivers / net / i40e / i40e_rxtx_vec_sse.c
index 7c84a41..fdd4a34 100644 (file)
@@ -87,11 +87,8 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
                mb0 = rxep[0].mbuf;
                mb1 = rxep[1].mbuf;
 
-                /* Flush mbuf with pkt template.
+               /* Flush mbuf with pkt template.
                 * Data to be rearmed is 6 bytes long.
-                * Though, RX will overwrite ol_flags that are coming next
-                * anyway. So overwrite whole 8 bytes with one load:
-                * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
                 */
                p0 = (uintptr_t)&mb0->rearm_data;
                *(uint64_t *)p0 = rxq->mbuf_initializer;
@@ -148,6 +145,20 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
        const __m128i rss_vlan_msk = _mm_set_epi32(
                        0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804);
 
+       const __m128i cksum_mask = _mm_set_epi32(
+                       PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+                       PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+                       PKT_RX_EIP_CKSUM_BAD,
+                       PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+                       PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+                       PKT_RX_EIP_CKSUM_BAD,
+                       PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+                       PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+                       PKT_RX_EIP_CKSUM_BAD,
+                       PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+                       PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+                       PKT_RX_EIP_CKSUM_BAD);
+
        /* map rss and vlan type to rss hash and vlan flag */
        const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0,
                        0, 0, 0, 0,
@@ -160,14 +171,17 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
                        0, 0, PKT_RX_FDIR, 0);
 
        const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
-                       PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
-                       PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
-                       PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
-                       PKT_RX_EIP_CKSUM_BAD,
-                       PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
-                       PKT_RX_L4_CKSUM_BAD,
-                       PKT_RX_IP_CKSUM_BAD,
-                       0);
+                       /* shift right 1 bit to make sure it not exceed 255 */
+                       (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+                        PKT_RX_IP_CKSUM_BAD) >> 1,
+                       (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD |
+                        PKT_RX_L4_CKSUM_BAD) >> 1,
+                       (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+                       (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1,
+                       (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+                       (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1,
+                       PKT_RX_IP_CKSUM_BAD >> 1,
+                       (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1);
 
        vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]);
        vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]);
@@ -181,6 +195,10 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
 
        l3_l4e = _mm_srli_epi32(vlan1, 22);
        l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e);
+       /* then we shift left 1 bit */
+       l3_l4e = _mm_slli_epi32(l3_l4e, 1);
+       /* we need to mask out the reduntant bits */
+       l3_l4e = _mm_and_si128(l3_l4e, cksum_mask);
 
        vlan0 = _mm_or_si128(vlan0, rss);
        vlan0 = _mm_or_si128(vlan0, l3_l4e);
@@ -403,12 +421,6 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                        /* store the resulting 32-bit value */
                        *(int *)split_packet = _mm_cvtsi128_si32(eop_bits);
                        split_packet += RTE_I40E_DESCS_PER_LOOP;
-
-                       /* zero-out next pointers */
-                       rx_pkts[pos]->next = NULL;
-                       rx_pkts[pos + 1]->next = NULL;
-                       rx_pkts[pos + 2]->next = NULL;
-                       rx_pkts[pos + 3]->next = NULL;
                }
 
                /* C.3 calc available number of desc */
@@ -515,8 +527,8 @@ vtx(volatile struct i40e_tx_desc *txdp,
 }
 
 uint16_t
-i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
-                  uint16_t nb_pkts)
+i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
+                         uint16_t nb_pkts)
 {
        struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue;
        volatile struct i40e_tx_desc *txdp;