-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- * Copyright(c) 2016, Linaro Limited
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation.
+ * Copyright(c) 2016-2018, Linaro Limited.
*/
#include <stdint.h>
-#include <rte_ethdev.h>
+#include <rte_ethdev_driver.h>
#include <rte_malloc.h>
#include "base/i40e_prototype.h"
uint64x2_t dma_addr0, dma_addr1;
uint64x2_t zero = vdupq_n_u64(0);
uint64_t paddr;
- uint8x8_t p;
rxdp = rxq->rx_ring + rxq->rxrearm_start;
return;
}
- p = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
-
/* Initialize the mbufs in vector, process 2 mbufs in one loop */
for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
mb0 = rxep[0].mbuf;
mb1 = rxep[1].mbuf;
- /* Flush mbuf with pkt template.
- * Data to be rearmed is 6 bytes long.
- * Though, RX will overwrite ol_flags that are coming next
- * anyway. So overwrite whole 8 bytes with one load:
- * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
- */
- vst1_u8((uint8_t *)&mb0->rearm_data, p);
- paddr = mb0->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+ paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
dma_addr0 = vdupq_n_u64(paddr);
/* flush desc with pa dma_addr */
vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
- vst1_u8((uint8_t *)&mb1->rearm_data, p);
- paddr = mb1->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+ paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
dma_addr1 = vdupq_n_u64(paddr);
vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1);
}
I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
}
-/* Handling the offload flags (olflags) field takes computation
- * time when receiving packets. Therefore we provide a flag to disable
- * the processing of the olflags field when they are not needed. This
- * gives improved performance, at the cost of losing the offload info
- * in the received packet
- */
-#ifdef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE
-
static inline void
-desc_to_olflags_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
+desc_to_olflags_v(struct i40e_rx_queue *rxq, uint64x2_t descs[4],
+ struct rte_mbuf **rx_pkts)
{
uint32x4_t vlan0, vlan1, rss, l3_l4e;
+ const uint64x2_t mbuf_init = {rxq->mbuf_initializer, 0};
+ uint64x2_t rearm0, rearm1, rearm2, rearm3;
/* mask everything except RSS, flow director and VLAN flags
* bit2 is for VLAN tag, bit11 for flow director indication
const uint32x4_t rss_vlan_msk = {
0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804};
+ const uint32x4_t cksum_mask = {
+ PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_EIP_CKSUM_BAD,
+ PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_EIP_CKSUM_BAD,
+ PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_EIP_CKSUM_BAD,
+ PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_EIP_CKSUM_BAD};
+
/* map rss and vlan type to rss hash and vlan flag */
const uint8x16_t vlan_flags = {
0, 0, 0, 0,
- PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED, 0, 0, 0,
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0};
0, 0, 0, 0};
const uint8x16_t l3_l4e_flags = {
- 0,
- PKT_RX_IP_CKSUM_BAD,
- PKT_RX_L4_CKSUM_BAD,
- PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
- PKT_RX_EIP_CKSUM_BAD,
- PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
- PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD,
- PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD,
+ (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1,
+ PKT_RX_IP_CKSUM_BAD >> 1,
+ (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1,
+ (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1,
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD) >> 1,
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
0, 0, 0, 0, 0, 0, 0, 0};
vlan0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]),
l3_l4e = vshrq_n_u32(vlan1, 22);
l3_l4e = vreinterpretq_u32_u8(vqtbl1q_u8(l3_l4e_flags,
vreinterpretq_u8_u32(l3_l4e)));
-
+ /* then we shift left 1 bit */
+ l3_l4e = vshlq_n_u32(l3_l4e, 1);
+ /* we need to mask out the reduntant bits */
+ l3_l4e = vandq_u32(l3_l4e, cksum_mask);
vlan0 = vorrq_u32(vlan0, rss);
vlan0 = vorrq_u32(vlan0, l3_l4e);
- rx_pkts[0]->ol_flags = vgetq_lane_u32(vlan0, 0);
- rx_pkts[1]->ol_flags = vgetq_lane_u32(vlan0, 1);
- rx_pkts[2]->ol_flags = vgetq_lane_u32(vlan0, 2);
- rx_pkts[3]->ol_flags = vgetq_lane_u32(vlan0, 3);
+ rearm0 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 0), mbuf_init, 1);
+ rearm1 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 1), mbuf_init, 1);
+ rearm2 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 2), mbuf_init, 1);
+ rearm3 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 3), mbuf_init, 1);
+
+ vst1q_u64((uint64_t *)&rx_pkts[0]->rearm_data, rearm0);
+ vst1q_u64((uint64_t *)&rx_pkts[1]->rearm_data, rearm1);
+ vst1q_u64((uint64_t *)&rx_pkts[2]->rearm_data, rearm2);
+ vst1q_u64((uint64_t *)&rx_pkts[3]->rearm_data, rearm3);
}
-#else
-#define desc_to_olflags_v(descs, rx_pkts) do {} while (0)
-#endif
#define PKTLEN_SHIFT 10
-
-#define I40E_VPMD_DESC_DD_MASK 0x0001000100010001ULL
+#define I40E_UINT16_BIT (CHAR_BIT * sizeof(uint16_t))
static inline void
-desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts)
+desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts,
+ uint32_t *ptype_tbl)
{
int i;
uint8_t ptype;
for (i = 0; i < 4; i++) {
tmp = vreinterpretq_u8_u64(vshrq_n_u64(descs[i], 30));
ptype = vgetq_lane_u8(tmp, 8);
- rx_pkts[i]->packet_type = i40e_rxd_pkt_type_mapping(ptype);
+ rx_pkts[i]->packet_type = ptype_tbl[ptype];
}
}
struct i40e_rx_entry *sw_ring;
uint16_t nb_pkts_recd;
int pos;
- uint64_t var;
+ uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
/* mask to shuffle from desc. to mbuf */
uint8x16_t shuf_msk = {
/* Read desc statuses backwards to avoid race condition */
/* A.1 load 4 pkts desc */
descs[3] = vld1q_u64((uint64_t *)(rxdp + 3));
- rte_rmb();
/* B.2 copy 2 mbuf point into rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
}
- /* avoid compiler reorder optimization */
- rte_compiler_barrier();
-
/* pkt 3,4 shift the pktlen field to be 16-bit aligned*/
uint32x4_t len3 = vshlq_u32(vreinterpretq_u32_u64(descs[3]),
len_shl);
/* C.2 get 4 pkts staterr value */
staterr = vzipq_u16(sterr_tmp1.val[1],
sterr_tmp2.val[1]).val[0];
- stat = vgetq_lane_u64(vreinterpretq_u64_u16(staterr), 0);
- desc_to_olflags_v(descs, &rx_pkts[pos]);
+ desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
rx_pkts[pos + 3]->next = NULL;
}
+ staterr = vshlq_n_u16(staterr, I40E_UINT16_BIT - 1);
+ staterr = vreinterpretq_u16_s16(
+ vshrq_n_s16(vreinterpretq_s16_u16(staterr),
+ I40E_UINT16_BIT - 1));
+ stat = ~vgetq_lane_u64(vreinterpretq_u64_u16(staterr), 0);
+
rte_prefetch_non_temporal(rxdp + RTE_I40E_DESCS_PER_LOOP);
/* D.3 copy final 1,2 data to rx_pkts */
pkt_mb2);
vst1q_u8((void *)&rx_pkts[pos]->rx_descriptor_fields1,
pkt_mb1);
- desc_to_ptype_v(descs, &rx_pkts[pos]);
+ desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
/* C.4 calc avaialbe number of desc */
- var = __builtin_popcountll(stat & I40E_VPMD_DESC_DD_MASK);
- nb_pkts_recd += var;
- if (likely(var != RTE_I40E_DESCS_PER_LOOP))
+ if (unlikely(stat == 0)) {
+ nb_pkts_recd += RTE_I40E_DESCS_PER_LOOP;
+ } else {
+ nb_pkts_recd += __builtin_ctzl(stat) / I40E_UINT16_BIT;
break;
+ }
}
/* Update our internal tail pointer */
((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) |
((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
- uint64x2_t descriptor = {pkt->buf_physaddr + pkt->data_off, high_qw};
+ uint64x2_t descriptor = {pkt->buf_iova + pkt->data_off, high_qw};
vst1q_u64((uint64_t *)txdp, descriptor);
}