1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2015 Intel Corporation
6 #include <ethdev_driver.h>
7 #include <rte_malloc.h>
10 #include "ixgbe_ethdev.h"
11 #include "ixgbe_rxtx.h"
12 #include "ixgbe_rxtx_vec_common.h"
14 #pragma GCC diagnostic ignored "-Wcast-qual"
17 ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
21 volatile union ixgbe_adv_rx_desc *rxdp;
22 struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
23 struct rte_mbuf *mb0, *mb1;
24 uint64x2_t dma_addr0, dma_addr1;
25 uint64x2_t zero = vdupq_n_u64(0);
29 rxdp = rxq->rx_ring + rxq->rxrearm_start;
31 /* Pull 'n' more MBUFs into the software ring */
32 if (unlikely(rte_mempool_get_bulk(rxq->mb_pool,
34 RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
35 if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
37 for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
38 rxep[i].mbuf = &rxq->fake_mbuf;
39 vst1q_u64((uint64_t *)&rxdp[i].read,
43 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
44 RTE_IXGBE_RXQ_REARM_THRESH;
48 p = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
50 /* Initialize the mbufs in vector, process 2 mbufs in one loop */
51 for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
56 * Flush mbuf with pkt template.
57 * Data to be rearmed is 6 bytes long.
59 vst1_u8((uint8_t *)&mb0->rearm_data, p);
60 paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
61 dma_addr0 = vsetq_lane_u64(paddr, zero, 0);
62 /* flush desc with pa dma_addr */
63 vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
65 vst1_u8((uint8_t *)&mb1->rearm_data, p);
66 paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
67 dma_addr1 = vsetq_lane_u64(paddr, zero, 0);
68 vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1);
71 rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
72 if (rxq->rxrearm_start >= rxq->nb_rx_desc)
73 rxq->rxrearm_start = 0;
75 rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
77 rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
78 (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
80 /* Update the tail pointer on the NIC */
81 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
85 desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2,
86 uint8x16_t staterr, uint8_t vlan_flags, uint16_t udp_p_flag,
87 struct rte_mbuf **rx_pkts)
89 uint16_t udp_p_flag_hi;
90 uint8x16_t ptype, udp_csum_skip;
91 uint32x4_t temp_udp_csum_skip = {0, 0, 0, 0};
92 uint8x16_t vtag_lo, vtag_hi, vtag;
94 uint32x4_t csum = {0, 0, 0, 0};
101 const uint8x16_t rsstype_msk = {
102 0x0F, 0x0F, 0x0F, 0x0F,
103 0x00, 0x00, 0x00, 0x00,
104 0x00, 0x00, 0x00, 0x00,
105 0x00, 0x00, 0x00, 0x00};
107 const uint8x16_t rss_flags = {
108 0, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
109 RTE_MBUF_F_RX_RSS_HASH,
110 0, RTE_MBUF_F_RX_RSS_HASH, 0, RTE_MBUF_F_RX_RSS_HASH,
111 RTE_MBUF_F_RX_RSS_HASH, 0, 0, 0,
112 0, 0, 0, RTE_MBUF_F_RX_FDIR};
114 /* mask everything except vlan present and l4/ip csum error */
115 const uint8x16_t vlan_csum_msk = {
116 IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
117 IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
120 (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24,
121 (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24,
122 (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24,
123 (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24};
125 /* map vlan present (0x8), IPE (0x2), L4E (0x1) to ol_flags */
126 const uint8x16_t vlan_csum_map_lo = {
127 RTE_MBUF_F_RX_IP_CKSUM_GOOD,
128 RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
129 RTE_MBUF_F_RX_IP_CKSUM_BAD,
130 RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
132 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD,
133 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
134 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD,
135 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
138 const uint8x16_t vlan_csum_map_hi = {
139 RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
140 RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
142 RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
143 RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
146 /* change mask from 0x200(IXGBE_RXDADV_PKTTYPE_UDP) to 0x2 */
147 udp_p_flag_hi = udp_p_flag >> 8;
149 /* mask everything except UDP header present if specified */
150 const uint8x16_t udp_hdr_p_msk = {
152 udp_p_flag_hi, udp_p_flag_hi, udp_p_flag_hi, udp_p_flag_hi,
156 const uint8x16_t udp_csum_bad_shuf = {
157 0xFF, ~(uint8_t)RTE_MBUF_F_RX_L4_CKSUM_BAD, 0, 0,
162 ptype = vzipq_u8(sterr_tmp1.val[0], sterr_tmp2.val[0]).val[0];
164 /* save the UDP header present information */
165 udp_csum_skip = vandq_u8(ptype, udp_hdr_p_msk);
167 /* move UDP header present information to low 32bits */
168 temp_udp_csum_skip = vcopyq_laneq_u32(temp_udp_csum_skip, 0,
169 vreinterpretq_u32_u8(udp_csum_skip), 1);
171 ptype = vandq_u8(ptype, rsstype_msk);
172 ptype = vqtbl1q_u8(rss_flags, ptype);
174 /* extract vlan_flags and csum_error from staterr */
175 vtag = vandq_u8(staterr, vlan_csum_msk);
177 /* csum bits are in the most significant, to use shuffle we need to
178 * shift them. Change mask from 0xc0 to 0x03.
180 temp_csum = vshrq_n_u8(vtag, 6);
182 /* 'OR' the most significant 32 bits containing the checksum
183 * flags with the vlan present flags
184 * Then bits layout of each lane(8bits) will be 'xxxx,VP,x,IPE,L4E'
186 csum = vsetq_lane_u32(vgetq_lane_u32(vreinterpretq_u32_u8(temp_csum), 3), csum, 0);
187 vtag = vorrq_u8(vreinterpretq_u8_u32(csum), vtag);
189 /* convert L4 checksum correct type to vtag_hi */
190 vtag_hi = vqtbl1q_u8(vlan_csum_map_hi, vtag);
191 vtag_hi = vshrq_n_u8(vtag_hi, 7);
193 /* convert VP, IPE, L4E to vtag_lo */
194 vtag_lo = vqtbl1q_u8(vlan_csum_map_lo, vtag);
195 vtag_lo = vorrq_u8(ptype, vtag_lo);
197 /* convert the UDP header present 0x2 to 0x1 for aligning with each
198 * RTE_MBUF_F_RX_L4_CKSUM_BAD value in low byte of 8 bits word ol_flag in
199 * vtag_lo (4x8). Then mask out the bad checksum value by shuffle and
202 udp_csum_skip = vshrq_n_u8(vreinterpretq_u8_u32(temp_udp_csum_skip), 1);
203 udp_csum_skip = vqtbl1q_u8(udp_csum_bad_shuf, udp_csum_skip);
204 vtag_lo = vandq_u8(vtag_lo, udp_csum_skip);
206 vtag = vzipq_u8(vtag_lo, vtag_hi).val[0];
207 vol.word = vgetq_lane_u64(vreinterpretq_u64_u8(vtag), 0);
209 rx_pkts[0]->ol_flags = vol.e[0];
210 rx_pkts[1]->ol_flags = vol.e[1];
211 rx_pkts[2]->ol_flags = vol.e[2];
212 rx_pkts[3]->ol_flags = vol.e[3];
215 #define IXGBE_VPMD_DESC_EOP_MASK 0x02020202
216 #define IXGBE_UINT8_BIT (CHAR_BIT * sizeof(uint8_t))
218 static inline uint32_t
219 get_packet_type(uint32_t pkt_info,
221 uint32_t tunnel_check)
224 return RTE_PTYPE_UNKNOWN;
227 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
228 return ptype_table_tn[pkt_info];
231 pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
232 return ptype_table[pkt_info];
236 desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask,
237 struct rte_mbuf **rx_pkts)
239 uint32x4_t etqf_check, tunnel_check;
240 uint32x4_t etqf_mask = vdupq_n_u32(0x8000);
241 uint32x4_t tunnel_mask = vdupq_n_u32(0x10000);
242 uint32x4_t ptype_mask = vdupq_n_u32((uint32_t)pkt_type_mask);
243 uint32x4_t ptype0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]),
244 vreinterpretq_u32_u64(descs[2])).val[0];
245 uint32x4_t ptype1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]),
246 vreinterpretq_u32_u64(descs[3])).val[0];
248 /* interleave low 32 bits,
249 * now we have 4 ptypes in a NEON register
251 ptype0 = vzipq_u32(ptype0, ptype1).val[0];
254 etqf_check = vandq_u32(ptype0, etqf_mask);
255 /* mask tunnel bits */
256 tunnel_check = vandq_u32(ptype0, tunnel_mask);
258 /* shift right by IXGBE_PACKET_TYPE_SHIFT, and apply ptype mask */
259 ptype0 = vandq_u32(vshrq_n_u32(ptype0, IXGBE_PACKET_TYPE_SHIFT),
262 rx_pkts[0]->packet_type =
263 get_packet_type(vgetq_lane_u32(ptype0, 0),
264 vgetq_lane_u32(etqf_check, 0),
265 vgetq_lane_u32(tunnel_check, 0));
266 rx_pkts[1]->packet_type =
267 get_packet_type(vgetq_lane_u32(ptype0, 1),
268 vgetq_lane_u32(etqf_check, 1),
269 vgetq_lane_u32(tunnel_check, 1));
270 rx_pkts[2]->packet_type =
271 get_packet_type(vgetq_lane_u32(ptype0, 2),
272 vgetq_lane_u32(etqf_check, 2),
273 vgetq_lane_u32(tunnel_check, 2));
274 rx_pkts[3]->packet_type =
275 get_packet_type(vgetq_lane_u32(ptype0, 3),
276 vgetq_lane_u32(etqf_check, 3),
277 vgetq_lane_u32(tunnel_check, 3));
281 * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
284 * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
285 * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
287 static inline uint16_t
288 _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
289 uint16_t nb_pkts, uint8_t *split_packet)
291 volatile union ixgbe_adv_rx_desc *rxdp;
292 struct ixgbe_rx_entry *sw_ring;
293 uint16_t nb_pkts_recd;
295 uint8x16_t shuf_msk = {
297 0xFF, 0xFF, /* skip 32 bits pkt_type */
298 12, 13, /* octet 12~13, low 16 bits pkt_len */
299 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */
300 12, 13, /* octet 12~13, 16 bits data_len */
301 14, 15, /* octet 14~15, low 16 bits vlan_macip */
302 4, 5, 6, 7 /* octet 4~7, 32bits rss */
304 uint16x8_t crc_adjust = {0, 0, rxq->crc_len, 0,
305 rxq->crc_len, 0, 0, 0};
307 uint16_t udp_p_flag = 0; /* Rx Descriptor UDP header present */
309 /* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
310 nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
312 /* Just the act of getting into the function from the application is
313 * going to cost about 7 cycles
315 rxdp = rxq->rx_ring + rxq->rx_tail;
317 rte_prefetch_non_temporal(rxdp);
319 /* See if we need to rearm the RX queue - gives the prefetch a bit
322 if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH)
323 ixgbe_rxq_rearm(rxq);
325 /* Before we start moving massive data around, check to see if
326 * there is actually a packet available
328 if (!(rxdp->wb.upper.status_error &
329 rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
332 if (rxq->rx_udp_csum_zero_err)
333 udp_p_flag = IXGBE_RXDADV_PKTTYPE_UDP;
335 /* Cache is empty -> need to scan the buffer rings, but first move
336 * the next 'n' mbufs into the cache
338 sw_ring = &rxq->sw_ring[rxq->rx_tail];
340 /* ensure these 2 flags are in the lower 8 bits */
341 RTE_BUILD_BUG_ON((RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED) > UINT8_MAX);
342 vlan_flags = rxq->vlan_flags & UINT8_MAX;
344 /* A. load 4 packet in one loop
345 * B. copy 4 mbuf point from swring to rx_pkts
346 * C. calc the number of DD bits among the 4 packets
347 * [C*. extract the end-of-packet bit, if requested]
348 * D. fill info. from desc to mbuf
350 for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
351 pos += RTE_IXGBE_DESCS_PER_LOOP,
352 rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
353 uint64x2_t descs[RTE_IXGBE_DESCS_PER_LOOP];
354 uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
355 uint8x16x2_t sterr_tmp1, sterr_tmp2;
356 uint64x2_t mbp1, mbp2;
361 /* B.1 load 2 mbuf point */
362 mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
364 /* B.2 copy 2 mbuf point into rx_pkts */
365 vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
367 /* B.1 load 2 mbuf point */
368 mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
370 /* A. load 4 pkts descs */
371 descs[0] = vld1q_u64((uint64_t *)(rxdp));
372 descs[1] = vld1q_u64((uint64_t *)(rxdp + 1));
373 descs[2] = vld1q_u64((uint64_t *)(rxdp + 2));
374 descs[3] = vld1q_u64((uint64_t *)(rxdp + 3));
376 /* B.2 copy 2 mbuf point into rx_pkts */
377 vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
380 rte_mbuf_prefetch_part2(rx_pkts[pos]);
381 rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
382 rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
383 rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
386 /* D.1 pkt 3,4 convert format from desc to pktmbuf */
387 pkt_mb4 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[3]), shuf_msk);
388 pkt_mb3 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[2]), shuf_msk);
390 /* D.1 pkt 1,2 convert format from desc to pktmbuf */
391 pkt_mb2 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[1]), shuf_msk);
392 pkt_mb1 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[0]), shuf_msk);
394 /* C.1 4=>2 filter staterr info only */
395 sterr_tmp2 = vzipq_u8(vreinterpretq_u8_u64(descs[1]),
396 vreinterpretq_u8_u64(descs[3]));
397 /* C.1 4=>2 filter staterr info only */
398 sterr_tmp1 = vzipq_u8(vreinterpretq_u8_u64(descs[0]),
399 vreinterpretq_u8_u64(descs[2]));
401 /* C.2 get 4 pkts staterr value */
402 staterr = vzipq_u8(sterr_tmp1.val[1], sterr_tmp2.val[1]).val[0];
404 /* set ol_flags with vlan packet type */
405 desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, vlan_flags,
406 udp_p_flag, &rx_pkts[pos]);
408 /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
409 tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
410 pkt_mb4 = vreinterpretq_u8_u16(tmp);
411 tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
412 pkt_mb3 = vreinterpretq_u8_u16(tmp);
414 /* D.3 copy final 3,4 data to rx_pkts */
415 vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
417 vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
420 /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
421 tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
422 pkt_mb2 = vreinterpretq_u8_u16(tmp);
423 tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
424 pkt_mb1 = vreinterpretq_u8_u16(tmp);
426 /* C* extract and record EOP bit */
428 stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
429 /* and with mask to extract bits, flipping 1-0 */
430 *(int *)split_packet = ~stat & IXGBE_VPMD_DESC_EOP_MASK;
432 split_packet += RTE_IXGBE_DESCS_PER_LOOP;
435 /* C.4 expand DD bit to saturate UINT8 */
436 staterr = vshlq_n_u8(staterr, IXGBE_UINT8_BIT - 1);
437 staterr = vreinterpretq_u8_s8
438 (vshrq_n_s8(vreinterpretq_s8_u8(staterr),
439 IXGBE_UINT8_BIT - 1));
440 stat = ~vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
442 rte_prefetch_non_temporal(rxdp + RTE_IXGBE_DESCS_PER_LOOP);
444 /* D.3 copy final 1,2 data to rx_pkts */
445 vst1q_u8((uint8_t *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
447 vst1q_u8((uint8_t *)&rx_pkts[pos]->rx_descriptor_fields1,
450 desc_to_ptype_v(descs, rxq->pkt_type_mask, &rx_pkts[pos]);
452 /* C.5 calc available number of desc */
453 if (unlikely(stat == 0)) {
454 nb_pkts_recd += RTE_IXGBE_DESCS_PER_LOOP;
456 nb_pkts_recd += __builtin_ctz(stat) / IXGBE_UINT8_BIT;
461 /* Update our internal tail pointer */
462 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
463 rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
464 rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
470 * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
473 * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
474 * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
477 ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
480 return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
484 * vPMD receive routine that reassembles scattered packets
487 * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
488 * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
491 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
494 struct ixgbe_rx_queue *rxq = rx_queue;
495 uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
497 /* get some new buffers */
498 uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
503 /* happy day case, full burst + no packets to be joined */
504 const uint64_t *split_fl64 = (uint64_t *)split_flags;
505 if (rxq->pkt_first_seg == NULL &&
506 split_fl64[0] == 0 && split_fl64[1] == 0 &&
507 split_fl64[2] == 0 && split_fl64[3] == 0)
510 /* reassemble any packets that need reassembly*/
512 if (rxq->pkt_first_seg == NULL) {
513 /* find the first split flag, and only reassemble then*/
514 while (i < nb_bufs && !split_flags[i])
518 rxq->pkt_first_seg = rx_pkts[i];
520 return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
525 * vPMD receive routine that reassembles scattered packets.
528 ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
533 while (nb_pkts > RTE_IXGBE_MAX_RX_BURST) {
536 burst = ixgbe_recv_scattered_burst_vec(rx_queue,
538 RTE_IXGBE_MAX_RX_BURST);
541 if (burst < RTE_IXGBE_MAX_RX_BURST)
545 return retval + ixgbe_recv_scattered_burst_vec(rx_queue,
551 vtx1(volatile union ixgbe_adv_tx_desc *txdp,
552 struct rte_mbuf *pkt, uint64_t flags)
554 uint64x2_t descriptor = {
555 pkt->buf_iova + pkt->data_off,
556 (uint64_t)pkt->pkt_len << 46 | flags | pkt->data_len};
558 vst1q_u64((uint64_t *)&txdp->read, descriptor);
562 vtx(volatile union ixgbe_adv_tx_desc *txdp,
563 struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)
567 for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
568 vtx1(txdp, *pkt, flags);
572 ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
575 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
576 volatile union ixgbe_adv_tx_desc *txdp;
577 struct ixgbe_tx_entry_v *txep;
578 uint16_t n, nb_commit, tx_id;
579 uint64_t flags = DCMD_DTYP_FLAGS;
580 uint64_t rs = IXGBE_ADVTXD_DCMD_RS | DCMD_DTYP_FLAGS;
583 /* cross rx_thresh boundary is not allowed */
584 nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
586 if (txq->nb_tx_free < txq->tx_free_thresh)
587 ixgbe_tx_free_bufs(txq);
589 nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
590 if (unlikely(nb_pkts == 0))
593 tx_id = txq->tx_tail;
594 txdp = &txq->tx_ring[tx_id];
595 txep = &txq->sw_ring_v[tx_id];
597 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
599 n = (uint16_t)(txq->nb_tx_desc - tx_id);
600 if (nb_commit >= n) {
601 tx_backlog_entry(txep, tx_pkts, n);
603 for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
604 vtx1(txdp, *tx_pkts, flags);
606 vtx1(txdp, *tx_pkts++, rs);
608 nb_commit = (uint16_t)(nb_commit - n);
611 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
613 /* avoid reach the end of ring */
614 txdp = &txq->tx_ring[tx_id];
615 txep = &txq->sw_ring_v[tx_id];
618 tx_backlog_entry(txep, tx_pkts, nb_commit);
620 vtx(txdp, tx_pkts, nb_commit, flags);
622 tx_id = (uint16_t)(tx_id + nb_commit);
623 if (tx_id > txq->tx_next_rs) {
624 txq->tx_ring[txq->tx_next_rs].read.cmd_type_len |=
625 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
626 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
630 txq->tx_tail = tx_id;
632 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
637 static void __rte_cold
638 ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq)
640 _ixgbe_tx_queue_release_mbufs_vec(txq);
644 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
646 _ixgbe_rx_queue_release_mbufs_vec(rxq);
649 static void __rte_cold
650 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
652 _ixgbe_tx_free_swring_vec(txq);
655 static void __rte_cold
656 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
658 _ixgbe_reset_tx_queue_vec(txq);
661 static const struct ixgbe_txq_ops vec_txq_ops = {
662 .release_mbufs = ixgbe_tx_queue_release_mbufs_vec,
663 .free_swring = ixgbe_tx_free_swring,
664 .reset = ixgbe_reset_tx_queue,
668 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
670 return ixgbe_rxq_vec_setup_default(rxq);
674 ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq)
676 return ixgbe_txq_vec_setup_default(txq, &vec_txq_ops);
680 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
682 return ixgbe_rx_vec_dev_conf_condition_check_default(dev);