4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5 * Copyright 2014 6WIND S.A.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK ( \
90 static inline struct rte_mbuf *
91 rte_rxmbuf_alloc(struct rte_mempool *mp)
95 m = __rte_mbuf_raw_alloc(mp);
96 __rte_mbuf_sanity_check_raw(m, 0);
102 #define RTE_PMD_USE_PREFETCH
105 #ifdef RTE_PMD_USE_PREFETCH
107 * Prefetch a cache line into all cache levels.
109 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
111 #define rte_ixgbe_prefetch(p) do {} while(0)
114 /*********************************************************************
118 **********************************************************************/
121 * Check for descriptors with their DD bit set and free mbufs.
122 * Return the total number of buffers freed.
124 static inline int __attribute__((always_inline))
125 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
127 struct ixgbe_tx_entry *txep;
131 /* check DD bit on threshold descriptor */
132 status = txq->tx_ring[txq->tx_next_dd].wb.status;
133 if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
137 * first buffer to free from S/W ring is at index
138 * tx_next_dd - (tx_rs_thresh-1)
140 txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
142 /* free buffers one at a time */
143 if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
144 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
145 txep->mbuf->next = NULL;
146 rte_mempool_put(txep->mbuf->pool, txep->mbuf);
150 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
151 rte_pktmbuf_free_seg(txep->mbuf);
156 /* buffers were freed, update counters */
157 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
158 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
159 if (txq->tx_next_dd >= txq->nb_tx_desc)
160 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
162 return txq->tx_rs_thresh;
165 /* Populate 4 descriptors with data from 4 mbufs */
167 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
169 uint64_t buf_dma_addr;
173 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
174 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
175 pkt_len = (*pkts)->data_len;
177 /* write data to descriptor */
178 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
180 txdp->read.cmd_type_len =
181 rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
183 txdp->read.olinfo_status =
184 rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
186 rte_prefetch0(&(*pkts)->pool);
190 /* Populate 1 descriptor with data from 1 mbuf */
192 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
194 uint64_t buf_dma_addr;
197 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
198 pkt_len = (*pkts)->data_len;
200 /* write data to descriptor */
201 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
202 txdp->read.cmd_type_len =
203 rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
204 txdp->read.olinfo_status =
205 rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
206 rte_prefetch0(&(*pkts)->pool);
210 * Fill H/W descriptor ring with mbuf data.
211 * Copy mbuf pointers to the S/W ring.
214 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
217 volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
218 struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
219 const int N_PER_LOOP = 4;
220 const int N_PER_LOOP_MASK = N_PER_LOOP-1;
221 int mainpart, leftover;
225 * Process most of the packets in chunks of N pkts. Any
226 * leftover packets will get processed one at a time.
228 mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
229 leftover = (nb_pkts & ((uint32_t) N_PER_LOOP_MASK));
230 for (i = 0; i < mainpart; i += N_PER_LOOP) {
231 /* Copy N mbuf pointers to the S/W ring */
232 for (j = 0; j < N_PER_LOOP; ++j) {
233 (txep + i + j)->mbuf = *(pkts + i + j);
235 tx4(txdp + i, pkts + i);
238 if (unlikely(leftover > 0)) {
239 for (i = 0; i < leftover; ++i) {
240 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
241 tx1(txdp + mainpart + i, pkts + mainpart + i);
246 static inline uint16_t
247 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
250 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
251 volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
255 * Begin scanning the H/W ring for done descriptors when the
256 * number of available descriptors drops below tx_free_thresh. For
257 * each done descriptor, free the associated buffer.
259 if (txq->nb_tx_free < txq->tx_free_thresh)
260 ixgbe_tx_free_bufs(txq);
262 /* Only use descriptors that are available */
263 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
264 if (unlikely(nb_pkts == 0))
267 /* Use exactly nb_pkts descriptors */
268 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
271 * At this point, we know there are enough descriptors in the
272 * ring to transmit all the packets. This assumes that each
273 * mbuf contains a single segment, and that no new offloads
274 * are expected, which would require a new context descriptor.
278 * See if we're going to wrap-around. If so, handle the top
279 * of the descriptor ring first, then do the bottom. If not,
280 * the processing looks just like the "bottom" part anyway...
282 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
283 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
284 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
287 * We know that the last descriptor in the ring will need to
288 * have its RS bit set because tx_rs_thresh has to be
289 * a divisor of the ring size
291 tx_r[txq->tx_next_rs].read.cmd_type_len |=
292 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
293 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
298 /* Fill H/W descriptor ring with mbuf data */
299 ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
300 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
303 * Determine if RS bit should be set
304 * This is what we actually want:
305 * if ((txq->tx_tail - 1) >= txq->tx_next_rs)
306 * but instead of subtracting 1 and doing >=, we can just do
307 * greater than without subtracting.
309 if (txq->tx_tail > txq->tx_next_rs) {
310 tx_r[txq->tx_next_rs].read.cmd_type_len |=
311 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
312 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
314 if (txq->tx_next_rs >= txq->nb_tx_desc)
315 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
319 * Check for wrap-around. This would only happen if we used
320 * up to the last descriptor in the ring, no more, no less.
322 if (txq->tx_tail >= txq->nb_tx_desc)
325 /* update tail pointer */
327 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
333 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
338 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
339 if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
340 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
342 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
346 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
347 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
348 nb_tx = (uint16_t)(nb_tx + ret);
349 nb_pkts = (uint16_t)(nb_pkts - ret);
358 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
359 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
360 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
362 uint32_t type_tucmd_mlhl;
363 uint32_t mss_l4len_idx = 0;
365 uint32_t vlan_macip_lens;
366 union ixgbe_tx_offload tx_offload_mask;
368 ctx_idx = txq->ctx_curr;
369 tx_offload_mask.data = 0;
372 /* Specify which HW CTX to upload. */
373 mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
375 if (ol_flags & PKT_TX_VLAN_PKT) {
376 tx_offload_mask.vlan_tci |= ~0;
379 /* check if TCP segmentation required for this packet */
380 if (ol_flags & PKT_TX_TCP_SEG) {
381 /* implies IP cksum in IPv4 */
382 if (ol_flags & PKT_TX_IP_CKSUM)
383 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
384 IXGBE_ADVTXD_TUCMD_L4T_TCP |
385 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
387 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
388 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
391 tx_offload_mask.l2_len |= ~0;
392 tx_offload_mask.l3_len |= ~0;
393 tx_offload_mask.l4_len |= ~0;
394 tx_offload_mask.tso_segsz |= ~0;
395 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
396 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
397 } else { /* no TSO, check if hardware checksum is needed */
398 if (ol_flags & PKT_TX_IP_CKSUM) {
399 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
400 tx_offload_mask.l2_len |= ~0;
401 tx_offload_mask.l3_len |= ~0;
404 switch (ol_flags & PKT_TX_L4_MASK) {
405 case PKT_TX_UDP_CKSUM:
406 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
407 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
408 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
409 tx_offload_mask.l2_len |= ~0;
410 tx_offload_mask.l3_len |= ~0;
412 case PKT_TX_TCP_CKSUM:
413 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
414 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
415 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
416 tx_offload_mask.l2_len |= ~0;
417 tx_offload_mask.l3_len |= ~0;
419 case PKT_TX_SCTP_CKSUM:
420 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
421 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
422 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
423 tx_offload_mask.l2_len |= ~0;
424 tx_offload_mask.l3_len |= ~0;
427 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
428 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433 txq->ctx_cache[ctx_idx].flags = ol_flags;
434 txq->ctx_cache[ctx_idx].tx_offload.data =
435 tx_offload_mask.data & tx_offload.data;
436 txq->ctx_cache[ctx_idx].tx_offload_mask = tx_offload_mask;
438 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
439 vlan_macip_lens = tx_offload.l3_len;
440 vlan_macip_lens |= (tx_offload.l2_len << IXGBE_ADVTXD_MACLEN_SHIFT);
441 vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
442 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
443 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
444 ctx_txd->seqnum_seed = 0;
448 * Check which hardware context can be used. Use the existing match
449 * or create a new context descriptor.
451 static inline uint32_t
452 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
453 union ixgbe_tx_offload tx_offload)
455 /* If match with the current used context */
456 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
457 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
458 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
459 return txq->ctx_curr;
462 /* What if match with the next context */
464 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
465 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
466 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
467 return txq->ctx_curr;
470 /* Mismatch, use the previous context */
471 return (IXGBE_CTX_NUM);
474 static inline uint32_t
475 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
478 if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
479 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
480 if (ol_flags & PKT_TX_IP_CKSUM)
481 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
482 if (ol_flags & PKT_TX_TCP_SEG)
483 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
487 static inline uint32_t
488 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
490 uint32_t cmdtype = 0;
491 if (ol_flags & PKT_TX_VLAN_PKT)
492 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
493 if (ol_flags & PKT_TX_TCP_SEG)
494 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
498 /* Default RS bit threshold values */
499 #ifndef DEFAULT_TX_RS_THRESH
500 #define DEFAULT_TX_RS_THRESH 32
502 #ifndef DEFAULT_TX_FREE_THRESH
503 #define DEFAULT_TX_FREE_THRESH 32
506 /* Reset transmit descriptors after they have been used */
508 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
510 struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
511 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
512 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
513 uint16_t nb_tx_desc = txq->nb_tx_desc;
514 uint16_t desc_to_clean_to;
515 uint16_t nb_tx_to_clean;
518 /* Determine the last descriptor needing to be cleaned */
519 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
520 if (desc_to_clean_to >= nb_tx_desc)
521 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
523 /* Check to make sure the last descriptor to clean is done */
524 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
525 status = txr[desc_to_clean_to].wb.status;
526 if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD)))
528 PMD_TX_FREE_LOG(DEBUG,
529 "TX descriptor %4u is not done"
530 "(port=%d queue=%d)",
532 txq->port_id, txq->queue_id);
533 /* Failed to clean any descriptors, better luck next time */
537 /* Figure out how many descriptors will be cleaned */
538 if (last_desc_cleaned > desc_to_clean_to)
539 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
542 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
545 PMD_TX_FREE_LOG(DEBUG,
546 "Cleaning %4u TX descriptors: %4u to %4u "
547 "(port=%d queue=%d)",
548 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
549 txq->port_id, txq->queue_id);
552 * The last descriptor to clean is done, so that means all the
553 * descriptors from the last descriptor that was cleaned
554 * up to the last descriptor with the RS bit set
555 * are done. Only reset the threshold descriptor.
557 txr[desc_to_clean_to].wb.status = 0;
559 /* Update the txq to reflect the last descriptor that was cleaned */
560 txq->last_desc_cleaned = desc_to_clean_to;
561 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
568 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
571 struct ixgbe_tx_queue *txq;
572 struct ixgbe_tx_entry *sw_ring;
573 struct ixgbe_tx_entry *txe, *txn;
574 volatile union ixgbe_adv_tx_desc *txr;
575 volatile union ixgbe_adv_tx_desc *txd, *txp;
576 struct rte_mbuf *tx_pkt;
577 struct rte_mbuf *m_seg;
578 uint64_t buf_dma_addr;
579 uint32_t olinfo_status;
580 uint32_t cmd_type_len;
591 union ixgbe_tx_offload tx_offload = {0};
594 sw_ring = txq->sw_ring;
596 tx_id = txq->tx_tail;
597 txe = &sw_ring[tx_id];
600 /* Determine if the descriptor ring needs to be cleaned. */
601 if (txq->nb_tx_free < txq->tx_free_thresh)
602 ixgbe_xmit_cleanup(txq);
604 rte_prefetch0(&txe->mbuf->pool);
607 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
610 pkt_len = tx_pkt->pkt_len;
613 * Determine how many (if any) context descriptors
614 * are needed for offload functionality.
616 ol_flags = tx_pkt->ol_flags;
618 /* If hardware offload required */
619 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
621 tx_offload.l2_len = tx_pkt->l2_len;
622 tx_offload.l3_len = tx_pkt->l3_len;
623 tx_offload.l4_len = tx_pkt->l4_len;
624 tx_offload.vlan_tci = tx_pkt->vlan_tci;
625 tx_offload.tso_segsz = tx_pkt->tso_segsz;
627 /* If new context need be built or reuse the exist ctx. */
628 ctx = what_advctx_update(txq, tx_ol_req,
630 /* Only allocate context descriptor if required*/
631 new_ctx = (ctx == IXGBE_CTX_NUM);
636 * Keep track of how many descriptors are used this loop
637 * This will always be the number of segments + the number of
638 * Context descriptors required to transmit the packet
640 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
643 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
644 /* set RS on the previous packet in the burst */
645 txp->read.cmd_type_len |=
646 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
649 * The number of descriptors that must be allocated for a
650 * packet is the number of segments of that packet, plus 1
651 * Context Descriptor for the hardware offload, if any.
652 * Determine the last TX descriptor to allocate in the TX ring
653 * for the packet, starting from the current position (tx_id)
656 tx_last = (uint16_t) (tx_id + nb_used - 1);
659 if (tx_last >= txq->nb_tx_desc)
660 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
662 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
663 " tx_first=%u tx_last=%u",
664 (unsigned) txq->port_id,
665 (unsigned) txq->queue_id,
671 * Make sure there are enough TX descriptors available to
672 * transmit the entire packet.
673 * nb_used better be less than or equal to txq->tx_rs_thresh
675 if (nb_used > txq->nb_tx_free) {
676 PMD_TX_FREE_LOG(DEBUG,
677 "Not enough free TX descriptors "
678 "nb_used=%4u nb_free=%4u "
679 "(port=%d queue=%d)",
680 nb_used, txq->nb_tx_free,
681 txq->port_id, txq->queue_id);
683 if (ixgbe_xmit_cleanup(txq) != 0) {
684 /* Could not clean any descriptors */
690 /* nb_used better be <= txq->tx_rs_thresh */
691 if (unlikely(nb_used > txq->tx_rs_thresh)) {
692 PMD_TX_FREE_LOG(DEBUG,
693 "The number of descriptors needed to "
694 "transmit the packet exceeds the "
695 "RS bit threshold. This will impact "
697 "nb_used=%4u nb_free=%4u "
699 "(port=%d queue=%d)",
700 nb_used, txq->nb_tx_free,
702 txq->port_id, txq->queue_id);
704 * Loop here until there are enough TX
705 * descriptors or until the ring cannot be
708 while (nb_used > txq->nb_tx_free) {
709 if (ixgbe_xmit_cleanup(txq) != 0) {
711 * Could not clean any
723 * By now there are enough free TX descriptors to transmit
728 * Set common flags of all TX Data Descriptors.
730 * The following bits must be set in all Data Descriptors:
731 * - IXGBE_ADVTXD_DTYP_DATA
732 * - IXGBE_ADVTXD_DCMD_DEXT
734 * The following bits must be set in the first Data Descriptor
735 * and are ignored in the other ones:
736 * - IXGBE_ADVTXD_DCMD_IFCS
737 * - IXGBE_ADVTXD_MAC_1588
738 * - IXGBE_ADVTXD_DCMD_VLE
740 * The following bits must only be set in the last Data
742 * - IXGBE_TXD_CMD_EOP
744 * The following bits can be set in any Data Descriptor, but
745 * are only set in the last Data Descriptor:
748 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
749 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
751 #ifdef RTE_LIBRTE_IEEE1588
752 if (ol_flags & PKT_TX_IEEE1588_TMST)
753 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
759 if (ol_flags & PKT_TX_TCP_SEG) {
760 /* when TSO is on, paylen in descriptor is the
761 * not the packet len but the tcp payload len */
762 pkt_len -= (tx_offload.l2_len +
763 tx_offload.l3_len + tx_offload.l4_len);
767 * Setup the TX Advanced Context Descriptor if required
770 volatile struct ixgbe_adv_tx_context_desc *
773 ctx_txd = (volatile struct
774 ixgbe_adv_tx_context_desc *)
777 txn = &sw_ring[txe->next_id];
778 rte_prefetch0(&txn->mbuf->pool);
780 if (txe->mbuf != NULL) {
781 rte_pktmbuf_free_seg(txe->mbuf);
785 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
788 txe->last_id = tx_last;
789 tx_id = txe->next_id;
794 * Setup the TX Advanced Data Descriptor,
795 * This path will go through
796 * whatever new/reuse the context descriptor
798 cmd_type_len |= tx_desc_ol_flags_to_cmdtype(ol_flags);
799 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
800 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
803 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
808 txn = &sw_ring[txe->next_id];
809 rte_prefetch0(&txn->mbuf->pool);
811 if (txe->mbuf != NULL)
812 rte_pktmbuf_free_seg(txe->mbuf);
816 * Set up Transmit Data Descriptor.
818 slen = m_seg->data_len;
819 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
820 txd->read.buffer_addr =
821 rte_cpu_to_le_64(buf_dma_addr);
822 txd->read.cmd_type_len =
823 rte_cpu_to_le_32(cmd_type_len | slen);
824 txd->read.olinfo_status =
825 rte_cpu_to_le_32(olinfo_status);
826 txe->last_id = tx_last;
827 tx_id = txe->next_id;
830 } while (m_seg != NULL);
833 * The last packet data descriptor needs End Of Packet (EOP)
835 cmd_type_len |= IXGBE_TXD_CMD_EOP;
836 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
837 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
839 /* Set RS bit only on threshold packets' last descriptor */
840 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
841 PMD_TX_FREE_LOG(DEBUG,
842 "Setting RS bit on TXD id="
843 "%4u (port=%d queue=%d)",
844 tx_last, txq->port_id, txq->queue_id);
846 cmd_type_len |= IXGBE_TXD_CMD_RS;
848 /* Update txq RS bit counters */
854 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
858 /* set RS on last packet in the burst */
860 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
865 * Set the Transmit Descriptor Tail (TDT)
867 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
868 (unsigned) txq->port_id, (unsigned) txq->queue_id,
869 (unsigned) tx_id, (unsigned) nb_tx);
870 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
871 txq->tx_tail = tx_id;
876 /*********************************************************************
880 **********************************************************************/
881 #define IXGBE_PACKET_TYPE_IPV4 0X01
882 #define IXGBE_PACKET_TYPE_IPV4_TCP 0X11
883 #define IXGBE_PACKET_TYPE_IPV4_UDP 0X21
884 #define IXGBE_PACKET_TYPE_IPV4_SCTP 0X41
885 #define IXGBE_PACKET_TYPE_IPV4_EXT 0X03
886 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP 0X43
887 #define IXGBE_PACKET_TYPE_IPV6 0X04
888 #define IXGBE_PACKET_TYPE_IPV6_TCP 0X14
889 #define IXGBE_PACKET_TYPE_IPV6_UDP 0X24
890 #define IXGBE_PACKET_TYPE_IPV6_EXT 0X0C
891 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP 0X1C
892 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP 0X2C
893 #define IXGBE_PACKET_TYPE_IPV4_IPV6 0X05
894 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP 0X15
895 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP 0X25
896 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
897 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
898 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
899 #define IXGBE_PACKET_TYPE_MAX 0X80
900 #define IXGBE_PACKET_TYPE_MASK 0X7F
901 #define IXGBE_PACKET_TYPE_SHIFT 0X04
902 static inline uint32_t
903 ixgbe_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
905 static const uint32_t
906 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
907 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
909 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
910 RTE_PTYPE_L3_IPV4_EXT,
911 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
913 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
914 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
915 RTE_PTYPE_INNER_L3_IPV6,
916 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
917 RTE_PTYPE_L3_IPV6_EXT,
918 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
919 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
920 RTE_PTYPE_INNER_L3_IPV6_EXT,
921 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
922 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
923 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
924 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
925 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
926 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
927 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
928 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
929 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
930 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
931 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
932 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
933 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
934 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
935 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
936 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
937 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
938 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
939 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
940 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
941 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
942 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
943 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
944 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
945 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
946 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
947 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
948 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
950 if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
951 return RTE_PTYPE_UNKNOWN;
953 pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) &
954 IXGBE_PACKET_TYPE_MASK;
956 return ptype_table[pkt_info];
959 static inline uint64_t
960 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
962 static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
963 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
964 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
965 PKT_RX_RSS_HASH, 0, 0, 0,
966 0, 0, 0, PKT_RX_FDIR,
968 #ifdef RTE_LIBRTE_IEEE1588
969 static uint64_t ip_pkt_etqf_map[8] = {
970 0, 0, 0, PKT_RX_IEEE1588_PTP,
974 if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
975 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
976 ip_rss_types_map[pkt_info & 0XF];
978 return ip_rss_types_map[pkt_info & 0XF];
980 return ip_rss_types_map[pkt_info & 0XF];
984 static inline uint64_t
985 rx_desc_status_to_pkt_flags(uint32_t rx_status)
990 * Check if VLAN present only.
991 * Do not check whether L3/L4 rx checksum done by NIC or not,
992 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
994 pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
996 #ifdef RTE_LIBRTE_IEEE1588
997 if (rx_status & IXGBE_RXD_STAT_TMST)
998 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1003 static inline uint64_t
1004 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1007 * Bit 31: IPE, IPv4 checksum error
1008 * Bit 30: L4I, L4I integrity error
1010 static uint64_t error_to_pkt_flags_map[4] = {
1011 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1012 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1014 return error_to_pkt_flags_map[(rx_status >>
1015 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1019 * LOOK_AHEAD defines how many desc statuses to check beyond the
1020 * current descriptor.
1021 * It must be a pound define for optimal performance.
1022 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1023 * function only works with LOOK_AHEAD=8.
1025 #define LOOK_AHEAD 8
1026 #if (LOOK_AHEAD != 8)
1027 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1030 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1032 volatile union ixgbe_adv_rx_desc *rxdp;
1033 struct ixgbe_rx_entry *rxep;
1034 struct rte_mbuf *mb;
1038 uint32_t s[LOOK_AHEAD];
1039 uint16_t pkt_info[LOOK_AHEAD];
1040 int i, j, nb_rx = 0;
1043 /* get references to current descriptor and S/W ring entry */
1044 rxdp = &rxq->rx_ring[rxq->rx_tail];
1045 rxep = &rxq->sw_ring[rxq->rx_tail];
1047 status = rxdp->wb.upper.status_error;
1048 /* check to make sure there is at least 1 packet to receive */
1049 if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1053 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1054 * reference packets that are ready to be received.
1056 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1057 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1059 /* Read desc statuses backwards to avoid race condition */
1060 for (j = LOOK_AHEAD-1; j >= 0; --j)
1061 s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1063 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1064 pkt_info[j] = rxdp[j].wb.lower.lo_dword.
1067 /* Compute how many status bits were set */
1069 for (j = 0; j < LOOK_AHEAD; ++j)
1070 nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1074 /* Translate descriptor info to mbuf format */
1075 for (j = 0; j < nb_dd; ++j) {
1077 pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1079 mb->data_len = pkt_len;
1080 mb->pkt_len = pkt_len;
1081 mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1083 /* convert descriptor fields to rte mbuf flags */
1084 pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
1085 pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1087 ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]);
1088 mb->ol_flags = pkt_flags;
1090 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info[j]);
1092 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1093 mb->hash.rss = rte_le_to_cpu_32(
1094 rxdp[j].wb.lower.hi_dword.rss);
1095 else if (pkt_flags & PKT_RX_FDIR) {
1096 mb->hash.fdir.hash = rte_le_to_cpu_16(
1097 rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1098 IXGBE_ATR_HASH_MASK;
1099 mb->hash.fdir.id = rte_le_to_cpu_16(
1100 rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1104 /* Move mbuf pointers from the S/W ring to the stage */
1105 for (j = 0; j < LOOK_AHEAD; ++j) {
1106 rxq->rx_stage[i + j] = rxep[j].mbuf;
1109 /* stop if all requested packets could not be received */
1110 if (nb_dd != LOOK_AHEAD)
1114 /* clear software ring entries so we can cleanup correctly */
1115 for (i = 0; i < nb_rx; ++i) {
1116 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1124 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1126 volatile union ixgbe_adv_rx_desc *rxdp;
1127 struct ixgbe_rx_entry *rxep;
1128 struct rte_mbuf *mb;
1133 /* allocate buffers in bulk directly into the S/W ring */
1134 alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1135 rxep = &rxq->sw_ring[alloc_idx];
1136 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1137 rxq->rx_free_thresh);
1138 if (unlikely(diag != 0))
1141 rxdp = &rxq->rx_ring[alloc_idx];
1142 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1143 /* populate the static rte mbuf fields */
1148 mb->port = rxq->port_id;
1151 rte_mbuf_refcnt_set(mb, 1);
1152 mb->data_off = RTE_PKTMBUF_HEADROOM;
1154 /* populate the descriptors */
1155 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb));
1156 rxdp[i].read.hdr_addr = 0;
1157 rxdp[i].read.pkt_addr = dma_addr;
1160 /* update state of internal queue structure */
1161 rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1162 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1163 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1169 static inline uint16_t
1170 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1173 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1176 /* how many packets are ready to return? */
1177 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1179 /* copy mbuf pointers to the application's packet list */
1180 for (i = 0; i < nb_pkts; ++i)
1181 rx_pkts[i] = stage[i];
1183 /* update internal queue state */
1184 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1185 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1190 static inline uint16_t
1191 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1194 struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1197 /* Any previously recv'd pkts will be returned from the Rx stage */
1198 if (rxq->rx_nb_avail)
1199 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1201 /* Scan the H/W ring for packets to receive */
1202 nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1204 /* update internal queue state */
1205 rxq->rx_next_avail = 0;
1206 rxq->rx_nb_avail = nb_rx;
1207 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1209 /* if required, allocate new buffers to replenish descriptors */
1210 if (rxq->rx_tail > rxq->rx_free_trigger) {
1211 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1213 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1215 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1216 "queue_id=%u", (unsigned) rxq->port_id,
1217 (unsigned) rxq->queue_id);
1219 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1220 rxq->rx_free_thresh;
1223 * Need to rewind any previous receives if we cannot
1224 * allocate new buffers to replenish the old ones.
1226 rxq->rx_nb_avail = 0;
1227 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1228 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1229 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1234 /* update tail pointer */
1236 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1239 if (rxq->rx_tail >= rxq->nb_rx_desc)
1242 /* received any packets this loop? */
1243 if (rxq->rx_nb_avail)
1244 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1249 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1251 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1256 if (unlikely(nb_pkts == 0))
1259 if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1260 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1262 /* request is relatively large, chunk it up */
1266 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1267 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1268 nb_rx = (uint16_t)(nb_rx + ret);
1269 nb_pkts = (uint16_t)(nb_pkts - ret);
1278 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1281 struct ixgbe_rx_queue *rxq;
1282 volatile union ixgbe_adv_rx_desc *rx_ring;
1283 volatile union ixgbe_adv_rx_desc *rxdp;
1284 struct ixgbe_rx_entry *sw_ring;
1285 struct ixgbe_rx_entry *rxe;
1286 struct rte_mbuf *rxm;
1287 struct rte_mbuf *nmb;
1288 union ixgbe_adv_rx_desc rxd;
1301 rx_id = rxq->rx_tail;
1302 rx_ring = rxq->rx_ring;
1303 sw_ring = rxq->sw_ring;
1304 while (nb_rx < nb_pkts) {
1306 * The order of operations here is important as the DD status
1307 * bit must not be read after any other descriptor fields.
1308 * rx_ring and rxdp are pointing to volatile data so the order
1309 * of accesses cannot be reordered by the compiler. If they were
1310 * not volatile, they could be reordered which could lead to
1311 * using invalid descriptor fields when read from rxd.
1313 rxdp = &rx_ring[rx_id];
1314 staterr = rxdp->wb.upper.status_error;
1315 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1322 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1323 * is likely to be invalid and to be dropped by the various
1324 * validation checks performed by the network stack.
1326 * Allocate a new mbuf to replenish the RX ring descriptor.
1327 * If the allocation fails:
1328 * - arrange for that RX descriptor to be the first one
1329 * being parsed the next time the receive function is
1330 * invoked [on the same queue].
1332 * - Stop parsing the RX ring and return immediately.
1334 * This policy do not drop the packet received in the RX
1335 * descriptor for which the allocation of a new mbuf failed.
1336 * Thus, it allows that packet to be later retrieved if
1337 * mbuf have been freed in the mean time.
1338 * As a side effect, holding RX descriptors instead of
1339 * systematically giving them back to the NIC may lead to
1340 * RX ring exhaustion situations.
1341 * However, the NIC can gracefully prevent such situations
1342 * to happen by sending specific "back-pressure" flow control
1343 * frames to its peer(s).
1345 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1346 "ext_err_stat=0x%08x pkt_len=%u",
1347 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1348 (unsigned) rx_id, (unsigned) staterr,
1349 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1351 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1353 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1354 "queue_id=%u", (unsigned) rxq->port_id,
1355 (unsigned) rxq->queue_id);
1356 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1361 rxe = &sw_ring[rx_id];
1363 if (rx_id == rxq->nb_rx_desc)
1366 /* Prefetch next mbuf while processing current one. */
1367 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1370 * When next RX descriptor is on a cache-line boundary,
1371 * prefetch the next 4 RX descriptors and the next 8 pointers
1374 if ((rx_id & 0x3) == 0) {
1375 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1376 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1382 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1383 rxdp->read.hdr_addr = 0;
1384 rxdp->read.pkt_addr = dma_addr;
1387 * Initialize the returned mbuf.
1388 * 1) setup generic mbuf fields:
1389 * - number of segments,
1392 * - RX port identifier.
1393 * 2) integrate hardware offload data, if any:
1394 * - RSS flag & hash,
1395 * - IP checksum flag,
1396 * - VLAN TCI, if any,
1399 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1401 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1402 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1405 rxm->pkt_len = pkt_len;
1406 rxm->data_len = pkt_len;
1407 rxm->port = rxq->port_id;
1409 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.hs_rss.
1411 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1412 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1414 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1415 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1416 pkt_flags = pkt_flags |
1417 ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1418 rxm->ol_flags = pkt_flags;
1419 rxm->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1421 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1422 rxm->hash.rss = rte_le_to_cpu_32(
1423 rxd.wb.lower.hi_dword.rss);
1424 else if (pkt_flags & PKT_RX_FDIR) {
1425 rxm->hash.fdir.hash = rte_le_to_cpu_16(
1426 rxd.wb.lower.hi_dword.csum_ip.csum) &
1427 IXGBE_ATR_HASH_MASK;
1428 rxm->hash.fdir.id = rte_le_to_cpu_16(
1429 rxd.wb.lower.hi_dword.csum_ip.ip_id);
1432 * Store the mbuf address into the next entry of the array
1433 * of returned packets.
1435 rx_pkts[nb_rx++] = rxm;
1437 rxq->rx_tail = rx_id;
1440 * If the number of free RX descriptors is greater than the RX free
1441 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1443 * Update the RDT with the value of the last processed RX descriptor
1444 * minus 1, to guarantee that the RDT register is never equal to the
1445 * RDH register, which creates a "full" ring situtation from the
1446 * hardware point of view...
1448 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1449 if (nb_hold > rxq->rx_free_thresh) {
1450 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1451 "nb_hold=%u nb_rx=%u",
1452 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1453 (unsigned) rx_id, (unsigned) nb_hold,
1455 rx_id = (uint16_t) ((rx_id == 0) ?
1456 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1457 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1460 rxq->nb_rx_hold = nb_hold;
1465 * Detect an RSC descriptor.
1467 static inline uint32_t
1468 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1470 return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1471 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1475 * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1477 * Fill the following info in the HEAD buffer of the Rx cluster:
1478 * - RX port identifier
1479 * - hardware offload data, if any:
1481 * - IP checksum flag
1482 * - VLAN TCI, if any
1484 * @head HEAD of the packet cluster
1485 * @desc HW descriptor to get data from
1486 * @port_id Port ID of the Rx queue
1489 ixgbe_fill_cluster_head_buf(
1490 struct rte_mbuf *head,
1491 union ixgbe_adv_rx_desc *desc,
1498 head->port = port_id;
1500 /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1501 * set in the pkt_flags field.
1503 head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1504 pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.hs_rss.pkt_info);
1505 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1506 pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1507 pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1508 head->ol_flags = pkt_flags;
1509 head->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1511 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1512 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1513 else if (pkt_flags & PKT_RX_FDIR) {
1514 head->hash.fdir.hash =
1515 rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1516 & IXGBE_ATR_HASH_MASK;
1517 head->hash.fdir.id =
1518 rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1523 * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1525 * @rx_queue Rx queue handle
1526 * @rx_pkts table of received packets
1527 * @nb_pkts size of rx_pkts table
1528 * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1530 * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1531 * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1533 * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1534 * 1) When non-EOP RSC completion arrives:
1535 * a) Update the HEAD of the current RSC aggregation cluster with the new
1536 * segment's data length.
1537 * b) Set the "next" pointer of the current segment to point to the segment
1538 * at the NEXTP index.
1539 * c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1540 * in the sw_rsc_ring.
1541 * 2) When EOP arrives we just update the cluster's total length and offload
1542 * flags and deliver the cluster up to the upper layers. In our case - put it
1543 * in the rx_pkts table.
1545 * Returns the number of received packets/clusters (according to the "bulk
1546 * receive" interface).
1548 static inline uint16_t
1549 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1552 struct ixgbe_rx_queue *rxq = rx_queue;
1553 volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1554 struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1555 struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1556 uint16_t rx_id = rxq->rx_tail;
1558 uint16_t nb_hold = rxq->nb_rx_hold;
1559 uint16_t prev_id = rxq->rx_tail;
1561 while (nb_rx < nb_pkts) {
1563 struct ixgbe_rx_entry *rxe;
1564 struct ixgbe_scattered_rx_entry *sc_entry;
1565 struct ixgbe_scattered_rx_entry *next_sc_entry;
1566 struct ixgbe_rx_entry *next_rxe;
1567 struct rte_mbuf *first_seg;
1568 struct rte_mbuf *rxm;
1569 struct rte_mbuf *nmb;
1570 union ixgbe_adv_rx_desc rxd;
1573 volatile union ixgbe_adv_rx_desc *rxdp;
1578 * The code in this whole file uses the volatile pointer to
1579 * ensure the read ordering of the status and the rest of the
1580 * descriptor fields (on the compiler level only!!!). This is so
1581 * UGLY - why not to just use the compiler barrier instead? DPDK
1582 * even has the rte_compiler_barrier() for that.
1584 * But most importantly this is just wrong because this doesn't
1585 * ensure memory ordering in a general case at all. For
1586 * instance, DPDK is supposed to work on Power CPUs where
1587 * compiler barrier may just not be enough!
1589 * I tried to write only this function properly to have a
1590 * starting point (as a part of an LRO/RSC series) but the
1591 * compiler cursed at me when I tried to cast away the
1592 * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1593 * keeping it the way it is for now.
1595 * The code in this file is broken in so many other places and
1596 * will just not work on a big endian CPU anyway therefore the
1597 * lines below will have to be revisited together with the rest
1601 * - Get rid of "volatile" crap and let the compiler do its
1603 * - Use the proper memory barrier (rte_rmb()) to ensure the
1604 * memory ordering below.
1606 rxdp = &rx_ring[rx_id];
1607 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1609 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1614 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1615 "staterr=0x%x data_len=%u",
1616 rxq->port_id, rxq->queue_id, rx_id, staterr,
1617 rte_le_to_cpu_16(rxd.wb.upper.length));
1620 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1622 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1623 "port_id=%u queue_id=%u",
1624 rxq->port_id, rxq->queue_id);
1626 rte_eth_devices[rxq->port_id].data->
1627 rx_mbuf_alloc_failed++;
1631 else if (nb_hold > rxq->rx_free_thresh) {
1632 uint16_t next_rdt = rxq->rx_free_trigger;
1634 if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1636 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1638 nb_hold -= rxq->rx_free_thresh;
1640 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1641 "port_id=%u queue_id=%u",
1642 rxq->port_id, rxq->queue_id);
1644 rte_eth_devices[rxq->port_id].data->
1645 rx_mbuf_alloc_failed++;
1651 rxe = &sw_ring[rx_id];
1652 eop = staterr & IXGBE_RXDADV_STAT_EOP;
1654 next_id = rx_id + 1;
1655 if (next_id == rxq->nb_rx_desc)
1658 /* Prefetch next mbuf while processing current one. */
1659 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
1662 * When next RX descriptor is on a cache-line boundary,
1663 * prefetch the next 4 RX descriptors and the next 4 pointers
1666 if ((next_id & 0x3) == 0) {
1667 rte_ixgbe_prefetch(&rx_ring[next_id]);
1668 rte_ixgbe_prefetch(&sw_ring[next_id]);
1675 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1677 * Update RX descriptor with the physical address of the
1678 * new data buffer of the new allocated mbuf.
1682 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1683 rxdp->read.hdr_addr = 0;
1684 rxdp->read.pkt_addr = dma;
1689 * Set data length & data buffer address of mbuf.
1691 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1692 rxm->data_len = data_len;
1697 * Get next descriptor index:
1698 * - For RSC it's in the NEXTP field.
1699 * - For a scattered packet - it's just a following
1702 if (ixgbe_rsc_count(&rxd))
1704 (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1705 IXGBE_RXDADV_NEXTP_SHIFT;
1709 next_sc_entry = &sw_sc_ring[nextp_id];
1710 next_rxe = &sw_ring[nextp_id];
1711 rte_ixgbe_prefetch(next_rxe);
1714 sc_entry = &sw_sc_ring[rx_id];
1715 first_seg = sc_entry->fbuf;
1716 sc_entry->fbuf = NULL;
1719 * If this is the first buffer of the received packet,
1720 * set the pointer to the first mbuf of the packet and
1721 * initialize its context.
1722 * Otherwise, update the total length and the number of segments
1723 * of the current scattered packet, and update the pointer to
1724 * the last mbuf of the current packet.
1726 if (first_seg == NULL) {
1728 first_seg->pkt_len = data_len;
1729 first_seg->nb_segs = 1;
1731 first_seg->pkt_len += data_len;
1732 first_seg->nb_segs++;
1739 * If this is not the last buffer of the received packet, update
1740 * the pointer to the first mbuf at the NEXTP entry in the
1741 * sw_sc_ring and continue to parse the RX ring.
1744 rxm->next = next_rxe->mbuf;
1745 next_sc_entry->fbuf = first_seg;
1750 * This is the last buffer of the received packet - return
1751 * the current cluster to the user.
1755 /* Initialize the first mbuf of the returned packet */
1756 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq->port_id,
1760 * Deal with the case, when HW CRC srip is disabled.
1761 * That can't happen when LRO is enabled, but still could
1762 * happen for scattered RX mode.
1764 first_seg->pkt_len -= rxq->crc_len;
1765 if (unlikely(rxm->data_len <= rxq->crc_len)) {
1766 struct rte_mbuf *lp;
1768 for (lp = first_seg; lp->next != rxm; lp = lp->next)
1771 first_seg->nb_segs--;
1772 lp->data_len -= rxq->crc_len - rxm->data_len;
1774 rte_pktmbuf_free_seg(rxm);
1776 rxm->data_len -= rxq->crc_len;
1778 /* Prefetch data of first segment, if configured to do so. */
1779 rte_packet_prefetch((char *)first_seg->buf_addr +
1780 first_seg->data_off);
1783 * Store the mbuf address into the next entry of the array
1784 * of returned packets.
1786 rx_pkts[nb_rx++] = first_seg;
1790 * Record index of the next RX descriptor to probe.
1792 rxq->rx_tail = rx_id;
1795 * If the number of free RX descriptors is greater than the RX free
1796 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1798 * Update the RDT with the value of the last processed RX descriptor
1799 * minus 1, to guarantee that the RDT register is never equal to the
1800 * RDH register, which creates a "full" ring situtation from the
1801 * hardware point of view...
1803 if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
1804 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1805 "nb_hold=%u nb_rx=%u",
1806 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
1809 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
1813 rxq->nb_rx_hold = nb_hold;
1818 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1821 return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
1825 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1828 return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
1831 /*********************************************************************
1833 * Queue management functions
1835 **********************************************************************/
1837 static void __attribute__((cold))
1838 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
1842 if (txq->sw_ring != NULL) {
1843 for (i = 0; i < txq->nb_tx_desc; i++) {
1844 if (txq->sw_ring[i].mbuf != NULL) {
1845 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1846 txq->sw_ring[i].mbuf = NULL;
1852 static void __attribute__((cold))
1853 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
1856 txq->sw_ring != NULL)
1857 rte_free(txq->sw_ring);
1860 static void __attribute__((cold))
1861 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
1863 if (txq != NULL && txq->ops != NULL) {
1864 txq->ops->release_mbufs(txq);
1865 txq->ops->free_swring(txq);
1870 void __attribute__((cold))
1871 ixgbe_dev_tx_queue_release(void *txq)
1873 ixgbe_tx_queue_release(txq);
1876 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
1877 static void __attribute__((cold))
1878 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
1880 static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
1881 struct ixgbe_tx_entry *txe = txq->sw_ring;
1884 /* Zero out HW ring memory */
1885 for (i = 0; i < txq->nb_tx_desc; i++) {
1886 txq->tx_ring[i] = zeroed_desc;
1889 /* Initialize SW ring entries */
1890 prev = (uint16_t) (txq->nb_tx_desc - 1);
1891 for (i = 0; i < txq->nb_tx_desc; i++) {
1892 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1893 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
1896 txe[prev].next_id = i;
1900 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
1901 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1904 txq->nb_tx_used = 0;
1906 * Always allow 1 descriptor to be un-allocated to avoid
1907 * a H/W race condition
1909 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1910 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1912 memset((void*)&txq->ctx_cache, 0,
1913 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1916 static const struct ixgbe_txq_ops def_txq_ops = {
1917 .release_mbufs = ixgbe_tx_queue_release_mbufs,
1918 .free_swring = ixgbe_tx_free_swring,
1919 .reset = ixgbe_reset_tx_queue,
1922 /* Takes an ethdev and a queue and sets up the tx function to be used based on
1923 * the queue parameters. Used in tx_queue_setup by primary process and then
1924 * in dev_init by secondary process when attaching to an existing ethdev.
1926 void __attribute__((cold))
1927 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
1929 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1930 if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
1931 && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
1932 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
1933 #ifdef RTE_IXGBE_INC_VECTOR
1934 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
1935 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
1936 ixgbe_txq_vec_setup(txq) == 0)) {
1937 PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
1938 dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
1941 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
1943 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
1945 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
1946 (unsigned long)txq->txq_flags,
1947 (unsigned long)IXGBE_SIMPLE_FLAGS);
1949 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
1950 (unsigned long)txq->tx_rs_thresh,
1951 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
1952 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1956 int __attribute__((cold))
1957 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1960 unsigned int socket_id,
1961 const struct rte_eth_txconf *tx_conf)
1963 const struct rte_memzone *tz;
1964 struct ixgbe_tx_queue *txq;
1965 struct ixgbe_hw *hw;
1966 uint16_t tx_rs_thresh, tx_free_thresh;
1968 PMD_INIT_FUNC_TRACE();
1969 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1972 * Validate number of transmit descriptors.
1973 * It must not exceed hardware maximum, and must be multiple
1976 if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
1977 (nb_desc > IXGBE_MAX_RING_DESC) ||
1978 (nb_desc < IXGBE_MIN_RING_DESC)) {
1983 * The following two parameters control the setting of the RS bit on
1984 * transmit descriptors.
1985 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
1986 * descriptors have been used.
1987 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
1988 * descriptors are used or if the number of descriptors required
1989 * to transmit a packet is greater than the number of free TX
1991 * The following constraints must be satisfied:
1992 * tx_rs_thresh must be greater than 0.
1993 * tx_rs_thresh must be less than the size of the ring minus 2.
1994 * tx_rs_thresh must be less than or equal to tx_free_thresh.
1995 * tx_rs_thresh must be a divisor of the ring size.
1996 * tx_free_thresh must be greater than 0.
1997 * tx_free_thresh must be less than the size of the ring minus 3.
1998 * One descriptor in the TX ring is used as a sentinel to avoid a
1999 * H/W race condition, hence the maximum threshold constraints.
2000 * When set to zero use default values.
2002 tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2003 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2004 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2005 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2006 if (tx_rs_thresh >= (nb_desc - 2)) {
2007 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2008 "of TX descriptors minus 2. (tx_rs_thresh=%u "
2009 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2010 (int)dev->data->port_id, (int)queue_idx);
2013 if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2014 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2015 "(tx_rs_thresh=%u port=%d queue=%d)",
2016 DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2017 (int)dev->data->port_id, (int)queue_idx);
2020 if (tx_free_thresh >= (nb_desc - 3)) {
2021 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2022 "tx_free_thresh must be less than the number of "
2023 "TX descriptors minus 3. (tx_free_thresh=%u "
2024 "port=%d queue=%d)",
2025 (unsigned int)tx_free_thresh,
2026 (int)dev->data->port_id, (int)queue_idx);
2029 if (tx_rs_thresh > tx_free_thresh) {
2030 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2031 "tx_free_thresh. (tx_free_thresh=%u "
2032 "tx_rs_thresh=%u port=%d queue=%d)",
2033 (unsigned int)tx_free_thresh,
2034 (unsigned int)tx_rs_thresh,
2035 (int)dev->data->port_id,
2039 if ((nb_desc % tx_rs_thresh) != 0) {
2040 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2041 "number of TX descriptors. (tx_rs_thresh=%u "
2042 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2043 (int)dev->data->port_id, (int)queue_idx);
2048 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2049 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2050 * by the NIC and all descriptors are written back after the NIC
2051 * accumulates WTHRESH descriptors.
2053 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2054 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2055 "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2056 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2057 (int)dev->data->port_id, (int)queue_idx);
2061 /* Free memory prior to re-allocation if needed... */
2062 if (dev->data->tx_queues[queue_idx] != NULL) {
2063 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2064 dev->data->tx_queues[queue_idx] = NULL;
2067 /* First allocate the tx queue data structure */
2068 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2069 RTE_CACHE_LINE_SIZE, socket_id);
2074 * Allocate TX ring hardware descriptors. A memzone large enough to
2075 * handle the maximum ring size is allocated in order to allow for
2076 * resizing in later calls to the queue setup function.
2078 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2079 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2080 IXGBE_ALIGN, socket_id);
2082 ixgbe_tx_queue_release(txq);
2086 txq->nb_tx_desc = nb_desc;
2087 txq->tx_rs_thresh = tx_rs_thresh;
2088 txq->tx_free_thresh = tx_free_thresh;
2089 txq->pthresh = tx_conf->tx_thresh.pthresh;
2090 txq->hthresh = tx_conf->tx_thresh.hthresh;
2091 txq->wthresh = tx_conf->tx_thresh.wthresh;
2092 txq->queue_id = queue_idx;
2093 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2094 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2095 txq->port_id = dev->data->port_id;
2096 txq->txq_flags = tx_conf->txq_flags;
2097 txq->ops = &def_txq_ops;
2098 txq->tx_deferred_start = tx_conf->tx_deferred_start;
2101 * Modification to set VFTDT for virtual function if vf is detected
2103 if (hw->mac.type == ixgbe_mac_82599_vf ||
2104 hw->mac.type == ixgbe_mac_X540_vf ||
2105 hw->mac.type == ixgbe_mac_X550_vf ||
2106 hw->mac.type == ixgbe_mac_X550EM_x_vf)
2107 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2109 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2111 txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2112 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2114 /* Allocate software ring */
2115 txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2116 sizeof(struct ixgbe_tx_entry) * nb_desc,
2117 RTE_CACHE_LINE_SIZE, socket_id);
2118 if (txq->sw_ring == NULL) {
2119 ixgbe_tx_queue_release(txq);
2122 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2123 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2125 /* set up vector or scalar TX function as appropriate */
2126 ixgbe_set_tx_function(dev, txq);
2128 txq->ops->reset(txq);
2130 dev->data->tx_queues[queue_idx] = txq;
2137 * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2139 * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2140 * in the sw_rsc_ring is not set to NULL but rather points to the next
2141 * mbuf of this RSC aggregation (that has not been completed yet and still
2142 * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2143 * will just free first "nb_segs" segments of the cluster explicitly by calling
2144 * an rte_pktmbuf_free_seg().
2146 * @m scattered cluster head
2148 static void __attribute__((cold))
2149 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2151 uint8_t i, nb_segs = m->nb_segs;
2152 struct rte_mbuf *next_seg;
2154 for (i = 0; i < nb_segs; i++) {
2156 rte_pktmbuf_free_seg(m);
2161 static void __attribute__((cold))
2162 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2166 #ifdef RTE_IXGBE_INC_VECTOR
2167 /* SSE Vector driver has a different way of releasing mbufs. */
2168 if (rxq->rx_using_sse) {
2169 ixgbe_rx_queue_release_mbufs_vec(rxq);
2174 if (rxq->sw_ring != NULL) {
2175 for (i = 0; i < rxq->nb_rx_desc; i++) {
2176 if (rxq->sw_ring[i].mbuf != NULL) {
2177 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2178 rxq->sw_ring[i].mbuf = NULL;
2181 if (rxq->rx_nb_avail) {
2182 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2183 struct rte_mbuf *mb;
2184 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2185 rte_pktmbuf_free_seg(mb);
2187 rxq->rx_nb_avail = 0;
2191 if (rxq->sw_sc_ring)
2192 for (i = 0; i < rxq->nb_rx_desc; i++)
2193 if (rxq->sw_sc_ring[i].fbuf) {
2194 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2195 rxq->sw_sc_ring[i].fbuf = NULL;
2199 static void __attribute__((cold))
2200 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2203 ixgbe_rx_queue_release_mbufs(rxq);
2204 rte_free(rxq->sw_ring);
2205 rte_free(rxq->sw_sc_ring);
2210 void __attribute__((cold))
2211 ixgbe_dev_rx_queue_release(void *rxq)
2213 ixgbe_rx_queue_release(rxq);
2217 * Check if Rx Burst Bulk Alloc function can be used.
2219 * 0: the preconditions are satisfied and the bulk allocation function
2221 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2222 * function must be used.
2224 static inline int __attribute__((cold))
2225 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2230 * Make sure the following pre-conditions are satisfied:
2231 * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2232 * rxq->rx_free_thresh < rxq->nb_rx_desc
2233 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2234 * rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2235 * Scattered packets are not supported. This should be checked
2236 * outside of this function.
2238 if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2239 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2240 "rxq->rx_free_thresh=%d, "
2241 "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2242 rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2244 } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2245 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2246 "rxq->rx_free_thresh=%d, "
2247 "rxq->nb_rx_desc=%d",
2248 rxq->rx_free_thresh, rxq->nb_rx_desc);
2250 } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2251 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2252 "rxq->nb_rx_desc=%d, "
2253 "rxq->rx_free_thresh=%d",
2254 rxq->nb_rx_desc, rxq->rx_free_thresh);
2256 } else if (!(rxq->nb_rx_desc <
2257 (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2258 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2259 "rxq->nb_rx_desc=%d, "
2260 "IXGBE_MAX_RING_DESC=%d, "
2261 "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2262 rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2263 RTE_PMD_IXGBE_RX_MAX_BURST);
2270 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2271 static void __attribute__((cold))
2272 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2274 static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2276 uint16_t len = rxq->nb_rx_desc;
2279 * By default, the Rx queue setup function allocates enough memory for
2280 * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires
2281 * extra memory at the end of the descriptor ring to be zero'd out. A
2282 * pre-condition for using the Rx burst bulk alloc function is that the
2283 * number of descriptors is less than or equal to
2284 * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2285 * constraints here to see if we need to zero out memory after the end
2286 * of the H/W descriptor ring.
2288 if (adapter->rx_bulk_alloc_allowed)
2289 /* zero out extra memory */
2290 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2293 * Zero out HW ring memory. Zero out extra memory at the end of
2294 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2295 * reads extra memory as zeros.
2297 for (i = 0; i < len; i++) {
2298 rxq->rx_ring[i] = zeroed_desc;
2302 * initialize extra software ring entries. Space for these extra
2303 * entries is always allocated
2305 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2306 for (i = rxq->nb_rx_desc; i < len; ++i) {
2307 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2310 rxq->rx_nb_avail = 0;
2311 rxq->rx_next_avail = 0;
2312 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2314 rxq->nb_rx_hold = 0;
2315 rxq->pkt_first_seg = NULL;
2316 rxq->pkt_last_seg = NULL;
2318 #ifdef RTE_IXGBE_INC_VECTOR
2319 rxq->rxrearm_start = 0;
2320 rxq->rxrearm_nb = 0;
2324 int __attribute__((cold))
2325 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2328 unsigned int socket_id,
2329 const struct rte_eth_rxconf *rx_conf,
2330 struct rte_mempool *mp)
2332 const struct rte_memzone *rz;
2333 struct ixgbe_rx_queue *rxq;
2334 struct ixgbe_hw *hw;
2336 struct ixgbe_adapter *adapter =
2337 (struct ixgbe_adapter *)dev->data->dev_private;
2339 PMD_INIT_FUNC_TRACE();
2340 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2343 * Validate number of receive descriptors.
2344 * It must not exceed hardware maximum, and must be multiple
2347 if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2348 (nb_desc > IXGBE_MAX_RING_DESC) ||
2349 (nb_desc < IXGBE_MIN_RING_DESC)) {
2353 /* Free memory prior to re-allocation if needed... */
2354 if (dev->data->rx_queues[queue_idx] != NULL) {
2355 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2356 dev->data->rx_queues[queue_idx] = NULL;
2359 /* First allocate the rx queue data structure */
2360 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2361 RTE_CACHE_LINE_SIZE, socket_id);
2365 rxq->nb_rx_desc = nb_desc;
2366 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2367 rxq->queue_id = queue_idx;
2368 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2369 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2370 rxq->port_id = dev->data->port_id;
2371 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2373 rxq->drop_en = rx_conf->rx_drop_en;
2374 rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2377 * Allocate RX ring hardware descriptors. A memzone large enough to
2378 * handle the maximum ring size is allocated in order to allow for
2379 * resizing in later calls to the queue setup function.
2381 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2382 RX_RING_SZ, IXGBE_ALIGN, socket_id);
2384 ixgbe_rx_queue_release(rxq);
2389 * Zero init all the descriptors in the ring.
2391 memset (rz->addr, 0, RX_RING_SZ);
2394 * Modified to setup VFRDT for Virtual Function
2396 if (hw->mac.type == ixgbe_mac_82599_vf ||
2397 hw->mac.type == ixgbe_mac_X540_vf ||
2398 hw->mac.type == ixgbe_mac_X550_vf ||
2399 hw->mac.type == ixgbe_mac_X550EM_x_vf) {
2401 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2403 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2407 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2409 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2412 rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2413 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2416 * Certain constraints must be met in order to use the bulk buffer
2417 * allocation Rx burst function. If any of Rx queues doesn't meet them
2418 * the feature should be disabled for the whole port.
2420 if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2421 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2422 "preconditions - canceling the feature for "
2423 "the whole port[%d]",
2424 rxq->queue_id, rxq->port_id);
2425 adapter->rx_bulk_alloc_allowed = false;
2429 * Allocate software ring. Allow for space at the end of the
2430 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2431 * function does not access an invalid memory region.
2434 if (adapter->rx_bulk_alloc_allowed)
2435 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2437 rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2438 sizeof(struct ixgbe_rx_entry) * len,
2439 RTE_CACHE_LINE_SIZE, socket_id);
2440 if (!rxq->sw_ring) {
2441 ixgbe_rx_queue_release(rxq);
2446 * Always allocate even if it's not going to be needed in order to
2447 * simplify the code.
2449 * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2450 * be requested in ixgbe_dev_rx_init(), which is called later from
2454 rte_zmalloc_socket("rxq->sw_sc_ring",
2455 sizeof(struct ixgbe_scattered_rx_entry) * len,
2456 RTE_CACHE_LINE_SIZE, socket_id);
2457 if (!rxq->sw_sc_ring) {
2458 ixgbe_rx_queue_release(rxq);
2462 PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2463 "dma_addr=0x%"PRIx64,
2464 rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2465 rxq->rx_ring_phys_addr);
2467 if (!rte_is_power_of_2(nb_desc)) {
2468 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2469 "preconditions - canceling the feature for "
2470 "the whole port[%d]",
2471 rxq->queue_id, rxq->port_id);
2472 adapter->rx_vec_allowed = false;
2474 ixgbe_rxq_vec_setup(rxq);
2476 dev->data->rx_queues[queue_idx] = rxq;
2478 ixgbe_reset_rx_queue(adapter, rxq);
2484 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2486 #define IXGBE_RXQ_SCAN_INTERVAL 4
2487 volatile union ixgbe_adv_rx_desc *rxdp;
2488 struct ixgbe_rx_queue *rxq;
2491 if (rx_queue_id >= dev->data->nb_rx_queues) {
2492 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2496 rxq = dev->data->rx_queues[rx_queue_id];
2497 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2499 while ((desc < rxq->nb_rx_desc) &&
2500 (rxdp->wb.upper.status_error &
2501 rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2502 desc += IXGBE_RXQ_SCAN_INTERVAL;
2503 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2504 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2505 rxdp = &(rxq->rx_ring[rxq->rx_tail +
2506 desc - rxq->nb_rx_desc]);
2513 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2515 volatile union ixgbe_adv_rx_desc *rxdp;
2516 struct ixgbe_rx_queue *rxq = rx_queue;
2519 if (unlikely(offset >= rxq->nb_rx_desc))
2521 desc = rxq->rx_tail + offset;
2522 if (desc >= rxq->nb_rx_desc)
2523 desc -= rxq->nb_rx_desc;
2525 rxdp = &rxq->rx_ring[desc];
2526 return !!(rxdp->wb.upper.status_error &
2527 rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2530 void __attribute__((cold))
2531 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2534 struct ixgbe_adapter *adapter =
2535 (struct ixgbe_adapter *)dev->data->dev_private;
2537 PMD_INIT_FUNC_TRACE();
2539 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2540 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2542 txq->ops->release_mbufs(txq);
2543 txq->ops->reset(txq);
2547 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2548 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2550 ixgbe_rx_queue_release_mbufs(rxq);
2551 ixgbe_reset_rx_queue(adapter, rxq);
2557 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2561 PMD_INIT_FUNC_TRACE();
2563 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2564 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2565 dev->data->rx_queues[i] = NULL;
2567 dev->data->nb_rx_queues = 0;
2569 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2570 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2571 dev->data->tx_queues[i] = NULL;
2573 dev->data->nb_tx_queues = 0;
2576 /*********************************************************************
2578 * Device RX/TX init functions
2580 **********************************************************************/
2583 * Receive Side Scaling (RSS)
2584 * See section 7.1.2.8 in the following document:
2585 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2588 * The source and destination IP addresses of the IP header and the source
2589 * and destination ports of TCP/UDP headers, if any, of received packets are
2590 * hashed against a configurable random key to compute a 32-bit RSS hash result.
2591 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2592 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
2593 * RSS output index which is used as the RX queue index where to store the
2595 * The following output is supplied in the RX write-back descriptor:
2596 * - 32-bit result of the Microsoft RSS hash function,
2597 * - 4-bit RSS type field.
2601 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2602 * Used as the default key.
2604 static uint8_t rss_intel_key[40] = {
2605 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2606 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2607 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2608 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2609 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2613 ixgbe_rss_disable(struct rte_eth_dev *dev)
2615 struct ixgbe_hw *hw;
2619 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2620 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2621 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2622 mrqc &= ~IXGBE_MRQC_RSSEN;
2623 IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2627 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2637 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2638 rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2640 hash_key = rss_conf->rss_key;
2641 if (hash_key != NULL) {
2642 /* Fill in RSS hash key */
2643 for (i = 0; i < 10; i++) {
2644 rss_key = hash_key[(i * 4)];
2645 rss_key |= hash_key[(i * 4) + 1] << 8;
2646 rss_key |= hash_key[(i * 4) + 2] << 16;
2647 rss_key |= hash_key[(i * 4) + 3] << 24;
2648 IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
2652 /* Set configured hashing protocols in MRQC register */
2653 rss_hf = rss_conf->rss_hf;
2654 mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
2655 if (rss_hf & ETH_RSS_IPV4)
2656 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2657 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2658 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2659 if (rss_hf & ETH_RSS_IPV6)
2660 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2661 if (rss_hf & ETH_RSS_IPV6_EX)
2662 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2663 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
2664 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2665 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2666 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2667 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2668 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2669 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
2670 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2671 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2672 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2673 IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2677 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2678 struct rte_eth_rss_conf *rss_conf)
2680 struct ixgbe_hw *hw;
2685 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2687 if (!ixgbe_rss_update_sp(hw->mac.type)) {
2688 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2692 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2695 * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
2696 * "RSS enabling cannot be done dynamically while it must be
2697 * preceded by a software reset"
2698 * Before changing anything, first check that the update RSS operation
2699 * does not attempt to disable RSS, if RSS was enabled at
2700 * initialization time, or does not attempt to enable RSS, if RSS was
2701 * disabled at initialization time.
2703 rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
2704 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2705 if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
2706 if (rss_hf != 0) /* Enable RSS */
2708 return 0; /* Nothing to do */
2711 if (rss_hf == 0) /* Disable RSS */
2713 ixgbe_hw_rss_hash_set(hw, rss_conf);
2718 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2719 struct rte_eth_rss_conf *rss_conf)
2721 struct ixgbe_hw *hw;
2730 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2731 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2732 rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2733 hash_key = rss_conf->rss_key;
2734 if (hash_key != NULL) {
2735 /* Return RSS hash key */
2736 for (i = 0; i < 10; i++) {
2737 rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
2738 hash_key[(i * 4)] = rss_key & 0x000000FF;
2739 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2740 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2741 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2745 /* Get RSS functions configured in MRQC register */
2746 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2747 if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
2748 rss_conf->rss_hf = 0;
2752 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
2753 rss_hf |= ETH_RSS_IPV4;
2754 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
2755 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2756 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
2757 rss_hf |= ETH_RSS_IPV6;
2758 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
2759 rss_hf |= ETH_RSS_IPV6_EX;
2760 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
2761 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2762 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
2763 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2764 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
2765 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2766 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
2767 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2768 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
2769 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2770 rss_conf->rss_hf = rss_hf;
2775 ixgbe_rss_configure(struct rte_eth_dev *dev)
2777 struct rte_eth_rss_conf rss_conf;
2778 struct ixgbe_hw *hw;
2782 uint16_t sp_reta_size;
2785 PMD_INIT_FUNC_TRACE();
2786 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2788 sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
2791 * Fill in redirection table
2792 * The byte-swap is needed because NIC registers are in
2793 * little-endian order.
2796 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
2797 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
2799 if (j == dev->data->nb_rx_queues)
2801 reta = (reta << 8) | j;
2803 IXGBE_WRITE_REG(hw, reta_reg,
2808 * Configure the RSS key and the RSS protocols used to compute
2809 * the RSS hash of input packets.
2811 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2812 if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
2813 ixgbe_rss_disable(dev);
2816 if (rss_conf.rss_key == NULL)
2817 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2818 ixgbe_hw_rss_hash_set(hw, &rss_conf);
2821 #define NUM_VFTA_REGISTERS 128
2822 #define NIC_RX_BUFFER_SIZE 0x200
2823 #define X550_RX_BUFFER_SIZE 0x180
2826 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2828 struct rte_eth_vmdq_dcb_conf *cfg;
2829 struct ixgbe_hw *hw;
2830 enum rte_eth_nb_pools num_pools;
2831 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2833 uint8_t nb_tcs; /* number of traffic classes */
2836 PMD_INIT_FUNC_TRACE();
2837 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2838 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2839 num_pools = cfg->nb_queue_pools;
2840 /* Check we have a valid number of pools */
2841 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2842 ixgbe_rss_disable(dev);
2845 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2846 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2850 * split rx buffer up into sections, each for 1 traffic class
2852 switch (hw->mac.type) {
2853 case ixgbe_mac_X550:
2854 case ixgbe_mac_X550EM_x:
2855 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
2858 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2861 for (i = 0 ; i < nb_tcs; i++) {
2862 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2863 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2864 /* clear 10 bits. */
2865 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2866 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2868 /* zero alloc all unused TCs */
2869 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2870 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2871 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2872 /* clear 10 bits. */
2873 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2876 /* MRQC: enable vmdq and dcb */
2877 mrqc = ((num_pools == ETH_16_POOLS) ? \
2878 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2879 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2881 /* PFVTCTL: turn on virtualisation and set the default pool */
2882 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2883 if (cfg->enable_default_pool) {
2884 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2886 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2889 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2891 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2893 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2895 * mapping is done with 3 bits per priority,
2896 * so shift by i*3 each time
2898 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
2900 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2902 /* RTRPCS: DCB related */
2903 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2905 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2906 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2907 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2908 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2910 /* VFTA - enable all vlan filters */
2911 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2912 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2915 /* VFRE: pool enabling for receive - 16 or 32 */
2916 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2917 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2920 * MPSAR - allow pools to read specific mac addresses
2921 * In this case, all pools should be able to read from mac addr 0
2923 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2924 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2926 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2927 for (i = 0; i < cfg->nb_pool_maps; i++) {
2928 /* set vlan id in VF register and set the valid bit */
2929 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2930 (cfg->pool_map[i].vlan_id & 0xFFF)));
2932 * Put the allowed pools in VFB reg. As we only have 16 or 32
2933 * pools, we only need to use the first half of the register
2936 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2941 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2942 * @hw: pointer to hardware structure
2943 * @dcb_config: pointer to ixgbe_dcb_config structure
2946 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
2947 struct ixgbe_dcb_config *dcb_config)
2952 PMD_INIT_FUNC_TRACE();
2953 if (hw->mac.type != ixgbe_mac_82598EB) {
2954 /* Disable the Tx desc arbiter so that MTQC can be changed */
2955 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2956 reg |= IXGBE_RTTDCS_ARBDIS;
2957 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2959 /* Enable DCB for Tx with 8 TCs */
2960 if (dcb_config->num_tcs.pg_tcs == 8) {
2961 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
2964 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
2966 if (dcb_config->vt_mode)
2967 reg |= IXGBE_MTQC_VT_ENA;
2968 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
2970 /* Disable drop for all queues */
2971 for (q = 0; q < 128; q++)
2972 IXGBE_WRITE_REG(hw, IXGBE_QDE,
2973 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
2975 /* Enable the Tx desc arbiter */
2976 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2977 reg &= ~IXGBE_RTTDCS_ARBDIS;
2978 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2980 /* Enable Security TX Buffer IFG for DCB */
2981 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
2982 reg |= IXGBE_SECTX_DCB;
2983 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
2989 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
2990 * @dev: pointer to rte_eth_dev structure
2991 * @dcb_config: pointer to ixgbe_dcb_config structure
2994 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
2995 struct ixgbe_dcb_config *dcb_config)
2997 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2998 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2999 struct ixgbe_hw *hw =
3000 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3002 PMD_INIT_FUNC_TRACE();
3003 if (hw->mac.type != ixgbe_mac_82598EB)
3004 /*PF VF Transmit Enable*/
3005 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3006 vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3008 /*Configure general DCB TX parameters*/
3009 ixgbe_dcb_tx_hw_config(hw,dcb_config);
3014 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3015 struct ixgbe_dcb_config *dcb_config)
3017 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3018 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3019 struct ixgbe_dcb_tc_config *tc;
3022 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3023 if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
3024 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3025 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3028 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3029 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3031 /* User Priority to Traffic Class mapping */
3032 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3033 j = vmdq_rx_conf->dcb_tc[i];
3034 tc = &dcb_config->tc_config[j];
3035 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3041 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3042 struct ixgbe_dcb_config *dcb_config)
3044 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3045 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3046 struct ixgbe_dcb_tc_config *tc;
3049 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3050 if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
3051 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3052 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3055 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3056 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3059 /* User Priority to Traffic Class mapping */
3060 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3061 j = vmdq_tx_conf->dcb_tc[i];
3062 tc = &dcb_config->tc_config[j];
3063 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3070 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3071 struct ixgbe_dcb_config *dcb_config)
3073 struct rte_eth_dcb_rx_conf *rx_conf =
3074 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3075 struct ixgbe_dcb_tc_config *tc;
3078 dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3079 dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3081 /* User Priority to Traffic Class mapping */
3082 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3083 j = rx_conf->dcb_tc[i];
3084 tc = &dcb_config->tc_config[j];
3085 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3091 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3092 struct ixgbe_dcb_config *dcb_config)
3094 struct rte_eth_dcb_tx_conf *tx_conf =
3095 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3096 struct ixgbe_dcb_tc_config *tc;
3099 dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3100 dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3102 /* User Priority to Traffic Class mapping */
3103 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3104 j = tx_conf->dcb_tc[i];
3105 tc = &dcb_config->tc_config[j];
3106 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3112 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3113 * @hw: pointer to hardware structure
3114 * @dcb_config: pointer to ixgbe_dcb_config structure
3117 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3118 struct ixgbe_dcb_config *dcb_config)
3124 PMD_INIT_FUNC_TRACE();
3126 * Disable the arbiter before changing parameters
3127 * (always enable recycle mode; WSP)
3129 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3130 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3132 if (hw->mac.type != ixgbe_mac_82598EB) {
3133 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3134 if (dcb_config->num_tcs.pg_tcs == 4) {
3135 if (dcb_config->vt_mode)
3136 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3137 IXGBE_MRQC_VMDQRT4TCEN;
3139 /* no matter the mode is DCB or DCB_RSS, just
3140 * set the MRQE to RSSXTCEN. RSS is controlled
3143 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3144 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3145 IXGBE_MRQC_RTRSS4TCEN;
3148 if (dcb_config->num_tcs.pg_tcs == 8) {
3149 if (dcb_config->vt_mode)
3150 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3151 IXGBE_MRQC_VMDQRT8TCEN;
3153 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3154 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3155 IXGBE_MRQC_RTRSS8TCEN;
3159 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3162 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3163 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3164 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3165 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3167 /* VFTA - enable all vlan filters */
3168 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3169 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3173 * Configure Rx packet plane (recycle mode; WSP) and
3176 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3177 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3183 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3184 uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3186 switch (hw->mac.type) {
3187 case ixgbe_mac_82598EB:
3188 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3190 case ixgbe_mac_82599EB:
3191 case ixgbe_mac_X540:
3192 case ixgbe_mac_X550:
3193 case ixgbe_mac_X550EM_x:
3194 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3203 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3204 uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3206 switch (hw->mac.type) {
3207 case ixgbe_mac_82598EB:
3208 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
3209 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
3211 case ixgbe_mac_82599EB:
3212 case ixgbe_mac_X540:
3213 case ixgbe_mac_X550:
3214 case ixgbe_mac_X550EM_x:
3215 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
3216 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
3223 #define DCB_RX_CONFIG 1
3224 #define DCB_TX_CONFIG 1
3225 #define DCB_TX_PB 1024
3227 * ixgbe_dcb_hw_configure - Enable DCB and configure
3228 * general DCB in VT mode and non-VT mode parameters
3229 * @dev: pointer to rte_eth_dev structure
3230 * @dcb_config: pointer to ixgbe_dcb_config structure
3233 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3234 struct ixgbe_dcb_config *dcb_config)
3237 uint8_t i,pfc_en,nb_tcs;
3238 uint16_t pbsize, rx_buffer_size;
3239 uint8_t config_dcb_rx = 0;
3240 uint8_t config_dcb_tx = 0;
3241 uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3242 uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3243 uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3244 uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3245 uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3246 struct ixgbe_dcb_tc_config *tc;
3247 uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3248 struct ixgbe_hw *hw =
3249 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3251 switch(dev->data->dev_conf.rxmode.mq_mode){
3252 case ETH_MQ_RX_VMDQ_DCB:
3253 dcb_config->vt_mode = true;
3254 if (hw->mac.type != ixgbe_mac_82598EB) {
3255 config_dcb_rx = DCB_RX_CONFIG;
3257 *get dcb and VT rx configuration parameters
3260 ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3261 /*Configure general VMDQ and DCB RX parameters*/
3262 ixgbe_vmdq_dcb_configure(dev);
3266 case ETH_MQ_RX_DCB_RSS:
3267 dcb_config->vt_mode = false;
3268 config_dcb_rx = DCB_RX_CONFIG;
3269 /* Get dcb TX configuration parameters from rte_eth_conf */
3270 ixgbe_dcb_rx_config(dev, dcb_config);
3271 /*Configure general DCB RX parameters*/
3272 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3275 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3278 switch (dev->data->dev_conf.txmode.mq_mode) {
3279 case ETH_MQ_TX_VMDQ_DCB:
3280 dcb_config->vt_mode = true;
3281 config_dcb_tx = DCB_TX_CONFIG;
3282 /* get DCB and VT TX configuration parameters from rte_eth_conf */
3283 ixgbe_dcb_vt_tx_config(dev,dcb_config);
3284 /*Configure general VMDQ and DCB TX parameters*/
3285 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
3289 dcb_config->vt_mode = false;
3290 config_dcb_tx = DCB_TX_CONFIG;
3291 /*get DCB TX configuration parameters from rte_eth_conf*/
3292 ixgbe_dcb_tx_config(dev, dcb_config);
3293 /*Configure general DCB TX parameters*/
3294 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3297 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3301 nb_tcs = dcb_config->num_tcs.pfc_tcs;
3303 ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3304 if(nb_tcs == ETH_4_TCS) {
3305 /* Avoid un-configured priority mapping to TC0 */
3307 uint8_t mask = 0xFF;
3308 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3309 mask = (uint8_t)(mask & (~ (1 << map[i])));
3310 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3311 if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3315 /* Re-configure 4 TCs BW */
3316 for (i = 0; i < nb_tcs; i++) {
3317 tc = &dcb_config->tc_config[i];
3318 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3319 (uint8_t)(100 / nb_tcs);
3320 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3321 (uint8_t)(100 / nb_tcs);
3323 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3324 tc = &dcb_config->tc_config[i];
3325 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3326 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3330 switch (hw->mac.type) {
3331 case ixgbe_mac_X550:
3332 case ixgbe_mac_X550EM_x:
3333 rx_buffer_size = X550_RX_BUFFER_SIZE;
3336 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3341 /* Set RX buffer size */
3342 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3343 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3344 for (i = 0 ; i < nb_tcs; i++) {
3345 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3347 /* zero alloc all unused TCs */
3348 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3349 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3353 /* Only support an equally distributed Tx packet buffer strategy. */
3354 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3355 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3356 for (i = 0; i < nb_tcs; i++) {
3357 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3358 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3360 /* Clear unused TCs, if any, to zero buffer size*/
3361 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3362 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3363 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3367 /*Calculates traffic class credits*/
3368 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3369 IXGBE_DCB_TX_CONFIG);
3370 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3371 IXGBE_DCB_RX_CONFIG);
3374 /* Unpack CEE standard containers */
3375 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3376 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3377 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3378 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3379 /* Configure PG(ETS) RX */
3380 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
3384 /* Unpack CEE standard containers */
3385 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3386 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3387 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3388 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3389 /* Configure PG(ETS) TX */
3390 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
3393 /*Configure queue statistics registers*/
3394 ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3396 /* Check if the PFC is supported */
3397 if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3398 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3399 for (i = 0; i < nb_tcs; i++) {
3401 * If the TC count is 8,and the default high_water is 48,
3402 * the low_water is 16 as default.
3404 hw->fc.high_water[i] = (pbsize * 3 ) / 4;
3405 hw->fc.low_water[i] = pbsize / 4;
3406 /* Enable pfc for this TC */
3407 tc = &dcb_config->tc_config[i];
3408 tc->pfc = ixgbe_dcb_pfc_enabled;
3410 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3411 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3413 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3420 * ixgbe_configure_dcb - Configure DCB Hardware
3421 * @dev: pointer to rte_eth_dev
3423 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3425 struct ixgbe_dcb_config *dcb_cfg =
3426 IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3427 struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3429 PMD_INIT_FUNC_TRACE();
3431 /* check support mq_mode for DCB */
3432 if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3433 (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3434 (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3437 if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3440 /** Configure DCB hardware **/
3441 ixgbe_dcb_hw_configure(dev, dcb_cfg);
3447 * VMDq only support for 10 GbE NIC.
3450 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3452 struct rte_eth_vmdq_rx_conf *cfg;
3453 struct ixgbe_hw *hw;
3454 enum rte_eth_nb_pools num_pools;
3455 uint32_t mrqc, vt_ctl, vlanctrl;
3459 PMD_INIT_FUNC_TRACE();
3460 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3461 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3462 num_pools = cfg->nb_queue_pools;
3464 ixgbe_rss_disable(dev);
3466 /* MRQC: enable vmdq */
3467 mrqc = IXGBE_MRQC_VMDQEN;
3468 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3470 /* PFVTCTL: turn on virtualisation and set the default pool */
3471 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3472 if (cfg->enable_default_pool)
3473 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3475 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3477 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3479 for (i = 0; i < (int)num_pools; i++) {
3480 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3481 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3484 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3485 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3486 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3487 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3489 /* VFTA - enable all vlan filters */
3490 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3491 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3493 /* VFRE: pool enabling for receive - 64 */
3494 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3495 if (num_pools == ETH_64_POOLS)
3496 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3499 * MPSAR - allow pools to read specific mac addresses
3500 * In this case, all pools should be able to read from mac addr 0
3502 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3503 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3505 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3506 for (i = 0; i < cfg->nb_pool_maps; i++) {
3507 /* set vlan id in VF register and set the valid bit */
3508 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
3509 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3511 * Put the allowed pools in VFB reg. As we only have 16 or 64
3512 * pools, we only need to use the first half of the register
3515 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3516 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), \
3517 (cfg->pool_map[i].pools & UINT32_MAX));
3519 IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i*2+1)), \
3520 ((cfg->pool_map[i].pools >> 32) \
3525 /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3526 if (cfg->enable_loop_back) {
3527 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3528 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3529 IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3532 IXGBE_WRITE_FLUSH(hw);
3536 * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3537 * @hw: pointer to hardware structure
3540 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3545 PMD_INIT_FUNC_TRACE();
3546 /*PF VF Transmit Enable*/
3547 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3548 IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3550 /* Disable the Tx desc arbiter so that MTQC can be changed */
3551 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3552 reg |= IXGBE_RTTDCS_ARBDIS;
3553 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3555 reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3556 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3558 /* Disable drop for all queues */
3559 for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3560 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3561 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3563 /* Enable the Tx desc arbiter */
3564 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3565 reg &= ~IXGBE_RTTDCS_ARBDIS;
3566 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3568 IXGBE_WRITE_FLUSH(hw);
3573 static int __attribute__((cold))
3574 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3576 struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3580 /* Initialize software ring entries */
3581 for (i = 0; i < rxq->nb_rx_desc; i++) {
3582 volatile union ixgbe_adv_rx_desc *rxd;
3583 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
3585 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3586 (unsigned) rxq->queue_id);
3590 rte_mbuf_refcnt_set(mbuf, 1);
3592 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3594 mbuf->port = rxq->port_id;
3597 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
3598 rxd = &rxq->rx_ring[i];
3599 rxd->read.hdr_addr = 0;
3600 rxd->read.pkt_addr = dma_addr;
3608 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3610 struct ixgbe_hw *hw;
3613 ixgbe_rss_configure(dev);
3615 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3617 /* MRQC: enable VF RSS */
3618 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3619 mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3620 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3622 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3626 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3630 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3634 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3640 ixgbe_config_vf_default(struct rte_eth_dev *dev)
3642 struct ixgbe_hw *hw =
3643 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3645 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3647 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3652 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3653 IXGBE_MRQC_VMDQRT4TCEN);
3657 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3658 IXGBE_MRQC_VMDQRT8TCEN);
3662 "invalid pool number in IOV mode");
3669 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3671 struct ixgbe_hw *hw =
3672 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3674 if (hw->mac.type == ixgbe_mac_82598EB)
3677 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3679 * SRIOV inactive scheme
3680 * any DCB/RSS w/o VMDq multi-queue setting
3682 switch (dev->data->dev_conf.rxmode.mq_mode) {
3684 case ETH_MQ_RX_DCB_RSS:
3685 case ETH_MQ_RX_VMDQ_RSS:
3686 ixgbe_rss_configure(dev);
3689 case ETH_MQ_RX_VMDQ_DCB:
3690 ixgbe_vmdq_dcb_configure(dev);
3693 case ETH_MQ_RX_VMDQ_ONLY:
3694 ixgbe_vmdq_rx_hw_configure(dev);
3697 case ETH_MQ_RX_NONE:
3699 /* if mq_mode is none, disable rss mode.*/
3700 ixgbe_rss_disable(dev);
3705 * SRIOV active scheme
3706 * Support RSS together with VMDq & SRIOV
3708 switch (dev->data->dev_conf.rxmode.mq_mode) {
3710 case ETH_MQ_RX_VMDQ_RSS:
3711 ixgbe_config_vf_rss(dev);
3714 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
3715 case ETH_MQ_RX_VMDQ_DCB:
3716 case ETH_MQ_RX_VMDQ_DCB_RSS:
3718 "Could not support DCB with VMDq & SRIOV");
3721 ixgbe_config_vf_default(dev);
3730 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
3732 struct ixgbe_hw *hw =
3733 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3737 if (hw->mac.type == ixgbe_mac_82598EB)
3740 /* disable arbiter before setting MTQC */
3741 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3742 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3743 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3745 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3747 * SRIOV inactive scheme
3748 * any DCB w/o VMDq multi-queue setting
3750 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
3751 ixgbe_vmdq_tx_hw_configure(hw);
3753 mtqc = IXGBE_MTQC_64Q_1PB;
3754 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3757 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3760 * SRIOV active scheme
3761 * FIXME if support DCB together with VMDq & SRIOV
3764 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3767 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
3770 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
3774 mtqc = IXGBE_MTQC_64Q_1PB;
3775 PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3777 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3780 /* re-enable arbiter */
3781 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3782 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3788 * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
3790 * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
3791 * spec rev. 3.0 chapter 8.2.3.8.13.
3793 * @pool Memory pool of the Rx queue
3795 static inline uint32_t
3796 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
3798 struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
3800 /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
3803 (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
3806 return IXGBE_RSCCTL_MAXDESC_16;
3807 else if (maxdesc >= 8)
3808 return IXGBE_RSCCTL_MAXDESC_8;
3809 else if (maxdesc >= 4)
3810 return IXGBE_RSCCTL_MAXDESC_4;
3812 return IXGBE_RSCCTL_MAXDESC_1;
3816 * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
3819 * (Taken from FreeBSD tree)
3820 * (yes this is all very magic and confusing :)
3823 * @entry the register array entry
3824 * @vector the MSIX vector for this queue
3828 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
3830 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3833 vector |= IXGBE_IVAR_ALLOC_VAL;
3835 switch (hw->mac.type) {
3837 case ixgbe_mac_82598EB:
3839 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3841 entry += (type * 64);
3842 index = (entry >> 2) & 0x1F;
3843 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3844 ivar &= ~(0xFF << (8 * (entry & 0x3)));
3845 ivar |= (vector << (8 * (entry & 0x3)));
3846 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3849 case ixgbe_mac_82599EB:
3850 case ixgbe_mac_X540:
3851 if (type == -1) { /* MISC IVAR */
3852 index = (entry & 1) * 8;
3853 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3854 ivar &= ~(0xFF << index);
3855 ivar |= (vector << index);
3856 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3857 } else { /* RX/TX IVARS */
3858 index = (16 * (entry & 1)) + (8 * type);
3859 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3860 ivar &= ~(0xFF << index);
3861 ivar |= (vector << index);
3862 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3872 void __attribute__((cold))
3873 ixgbe_set_rx_function(struct rte_eth_dev *dev)
3875 uint16_t i, rx_using_sse;
3876 struct ixgbe_adapter *adapter =
3877 (struct ixgbe_adapter *)dev->data->dev_private;
3880 * In order to allow Vector Rx there are a few configuration
3881 * conditions to be met and Rx Bulk Allocation should be allowed.
3883 if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
3884 !adapter->rx_bulk_alloc_allowed) {
3885 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
3886 "preconditions or RTE_IXGBE_INC_VECTOR is "
3888 dev->data->port_id);
3890 adapter->rx_vec_allowed = false;
3894 * Initialize the appropriate LRO callback.
3896 * If all queues satisfy the bulk allocation preconditions
3897 * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
3898 * Otherwise use a single allocation version.
3900 if (dev->data->lro) {
3901 if (adapter->rx_bulk_alloc_allowed) {
3902 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
3903 "allocation version");
3904 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3906 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
3907 "allocation version");
3908 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3910 } else if (dev->data->scattered_rx) {
3912 * Set the non-LRO scattered callback: there are Vector and
3913 * single allocation versions.
3915 if (adapter->rx_vec_allowed) {
3916 PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
3917 "callback (port=%d).",
3918 dev->data->port_id);
3920 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
3921 } else if (adapter->rx_bulk_alloc_allowed) {
3922 PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
3923 "allocation callback (port=%d).",
3924 dev->data->port_id);
3925 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3927 PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
3928 "single allocation) "
3929 "Scattered Rx callback "
3931 dev->data->port_id);
3933 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3936 * Below we set "simple" callbacks according to port/queues parameters.
3937 * If parameters allow we are going to choose between the following
3941 * - Single buffer allocation (the simplest one)
3943 } else if (adapter->rx_vec_allowed) {
3944 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
3945 "burst size no less than %d (port=%d).",
3946 RTE_IXGBE_DESCS_PER_LOOP,
3947 dev->data->port_id);
3949 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
3950 } else if (adapter->rx_bulk_alloc_allowed) {
3951 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
3952 "satisfied. Rx Burst Bulk Alloc function "
3953 "will be used on port=%d.",
3954 dev->data->port_id);
3956 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
3958 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
3959 "satisfied, or Scattered Rx is requested "
3961 dev->data->port_id);
3963 dev->rx_pkt_burst = ixgbe_recv_pkts;
3966 /* Propagate information about RX function choice through all queues. */
3969 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
3970 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
3972 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3973 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3974 rxq->rx_using_sse = rx_using_sse;
3979 * ixgbe_set_rsc - configure RSC related port HW registers
3981 * Configures the port's RSC related registers according to the 4.6.7.2 chapter
3982 * of 82599 Spec (x540 configuration is virtually the same).
3986 * Returns 0 in case of success or a non-zero error code
3989 ixgbe_set_rsc(struct rte_eth_dev *dev)
3991 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
3992 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3993 struct rte_eth_dev_info dev_info = { 0 };
3994 bool rsc_capable = false;
3999 dev->dev_ops->dev_infos_get(dev, &dev_info);
4000 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4003 if (!rsc_capable && rx_conf->enable_lro) {
4004 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4009 /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4011 if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4013 * According to chapter of 4.6.7.2.1 of the Spec Rev.
4014 * 3.0 RSC configuration requires HW CRC stripping being
4015 * enabled. If user requested both HW CRC stripping off
4016 * and RSC on - return an error.
4018 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4023 /* RFCTL configuration */
4025 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4026 if (rx_conf->enable_lro)
4028 * Since NFS packets coalescing is not supported - clear
4029 * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4032 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4033 IXGBE_RFCTL_NFSR_DIS);
4035 rfctl |= IXGBE_RFCTL_RSC_DIS;
4037 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4040 /* If LRO hasn't been requested - we are done here. */
4041 if (!rx_conf->enable_lro)
4044 /* Set RDRXCTL.RSCACKC bit */
4045 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4046 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4047 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4049 /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4050 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4051 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4053 IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4055 IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4057 IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4059 IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4062 * ixgbe PMD doesn't support header-split at the moment.
4064 * Following the 4.6.7.2.1 chapter of the 82599/x540
4065 * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4066 * should be configured even if header split is not
4067 * enabled. We will configure it 128 bytes following the
4068 * recommendation in the spec.
4070 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4071 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4072 IXGBE_SRRCTL_BSIZEHDR_MASK;
4075 * TODO: Consider setting the Receive Descriptor Minimum
4076 * Threshold Size for an RSC case. This is not an obviously
4077 * beneficiary option but the one worth considering...
4080 rscctl |= IXGBE_RSCCTL_RSCEN;
4081 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4082 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4085 * RSC: Set ITR interval corresponding to 2K ints/s.
4087 * Full-sized RSC aggregations for a 10Gb/s link will
4088 * arrive at about 20K aggregation/s rate.
4090 * 2K inst/s rate will make only 10% of the
4091 * aggregations to be closed due to the interrupt timer
4092 * expiration for a streaming at wire-speed case.
4094 * For a sparse streaming case this setting will yield
4095 * at most 500us latency for a single RSC aggregation.
4097 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4098 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4100 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4101 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4102 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4103 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4106 * RSC requires the mapping of the queue to the
4109 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4114 PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4120 * Initializes Receive Unit.
4122 int __attribute__((cold))
4123 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4125 struct ixgbe_hw *hw;
4126 struct ixgbe_rx_queue *rxq;
4137 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4140 PMD_INIT_FUNC_TRACE();
4141 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4144 * Make sure receives are disabled while setting
4145 * up the RX context (registers, descriptor rings, etc.).
4147 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4148 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4150 /* Enable receipt of broadcasted frames */
4151 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4152 fctrl |= IXGBE_FCTRL_BAM;
4153 fctrl |= IXGBE_FCTRL_DPF;
4154 fctrl |= IXGBE_FCTRL_PMCF;
4155 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4158 * Configure CRC stripping, if any.
4160 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4161 if (rx_conf->hw_strip_crc)
4162 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4164 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4167 * Configure jumbo frame support, if any.
4169 if (rx_conf->jumbo_frame == 1) {
4170 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4171 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4172 maxfrs &= 0x0000FFFF;
4173 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4174 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4176 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4179 * If loopback mode is configured for 82599, set LPBK bit.
4181 if (hw->mac.type == ixgbe_mac_82599EB &&
4182 dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4183 hlreg0 |= IXGBE_HLREG0_LPBK;
4185 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4187 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4189 /* Setup RX queues */
4190 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4191 rxq = dev->data->rx_queues[i];
4194 * Reset crc_len in case it was changed after queue setup by a
4195 * call to configure.
4197 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4199 /* Setup the Base and Length of the Rx Descriptor Rings */
4200 bus_addr = rxq->rx_ring_phys_addr;
4201 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4202 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4203 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4204 (uint32_t)(bus_addr >> 32));
4205 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4206 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4207 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4208 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4210 /* Configure the SRRCTL register */
4211 #ifdef RTE_HEADER_SPLIT_ENABLE
4213 * Configure Header Split
4215 if (rx_conf->header_split) {
4216 if (hw->mac.type == ixgbe_mac_82599EB) {
4217 /* Must setup the PSRTYPE register */
4219 psrtype = IXGBE_PSRTYPE_TCPHDR |
4220 IXGBE_PSRTYPE_UDPHDR |
4221 IXGBE_PSRTYPE_IPV4HDR |
4222 IXGBE_PSRTYPE_IPV6HDR;
4223 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4225 srrctl = ((rx_conf->split_hdr_size <<
4226 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4227 IXGBE_SRRCTL_BSIZEHDR_MASK);
4228 srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4231 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4233 /* Set if packets are dropped when no descriptors available */
4235 srrctl |= IXGBE_SRRCTL_DROP_EN;
4238 * Configure the RX buffer size in the BSIZEPACKET field of
4239 * the SRRCTL register of the queue.
4240 * The value is in 1 KB resolution. Valid values can be from
4243 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4244 RTE_PKTMBUF_HEADROOM);
4245 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4246 IXGBE_SRRCTL_BSIZEPKT_MASK);
4248 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4250 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4251 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4253 /* It adds dual VLAN length for supporting dual VLAN */
4254 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4255 2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4256 dev->data->scattered_rx = 1;
4259 if (rx_conf->enable_scatter)
4260 dev->data->scattered_rx = 1;
4263 * Device configured with multiple RX queues.
4265 ixgbe_dev_mq_rx_configure(dev);
4268 * Setup the Checksum Register.
4269 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4270 * Enable IP/L4 checkum computation by hardware if requested to do so.
4272 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4273 rxcsum |= IXGBE_RXCSUM_PCSD;
4274 if (rx_conf->hw_ip_checksum)
4275 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4277 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4279 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4281 if (hw->mac.type == ixgbe_mac_82599EB ||
4282 hw->mac.type == ixgbe_mac_X540) {
4283 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4284 if (rx_conf->hw_strip_crc)
4285 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4287 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4288 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4289 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4292 rc = ixgbe_set_rsc(dev);
4296 ixgbe_set_rx_function(dev);
4302 * Initializes Transmit Unit.
4304 void __attribute__((cold))
4305 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4307 struct ixgbe_hw *hw;
4308 struct ixgbe_tx_queue *txq;
4314 PMD_INIT_FUNC_TRACE();
4315 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4317 /* Enable TX CRC (checksum offload requirement) and hw padding
4318 * (TSO requirement) */
4319 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4320 hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4321 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4323 /* Setup the Base and Length of the Tx Descriptor Rings */
4324 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4325 txq = dev->data->tx_queues[i];
4327 bus_addr = txq->tx_ring_phys_addr;
4328 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4329 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4330 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4331 (uint32_t)(bus_addr >> 32));
4332 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4333 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4334 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4335 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4336 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4339 * Disable Tx Head Writeback RO bit, since this hoses
4340 * bookkeeping if things aren't delivered in order.
4342 switch (hw->mac.type) {
4343 case ixgbe_mac_82598EB:
4344 txctrl = IXGBE_READ_REG(hw,
4345 IXGBE_DCA_TXCTRL(txq->reg_idx));
4346 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4347 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4351 case ixgbe_mac_82599EB:
4352 case ixgbe_mac_X540:
4353 case ixgbe_mac_X550:
4354 case ixgbe_mac_X550EM_x:
4356 txctrl = IXGBE_READ_REG(hw,
4357 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4358 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4359 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4365 /* Device configured with multiple TX queues. */
4366 ixgbe_dev_mq_tx_configure(dev);
4370 * Set up link for 82599 loopback mode Tx->Rx.
4372 static inline void __attribute__((cold))
4373 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4375 PMD_INIT_FUNC_TRACE();
4377 if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4378 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4380 PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4389 IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4390 ixgbe_reset_pipeline_82599(hw);
4392 hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4398 * Start Transmit and Receive Units.
4400 int __attribute__((cold))
4401 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4403 struct ixgbe_hw *hw;
4404 struct ixgbe_tx_queue *txq;
4405 struct ixgbe_rx_queue *rxq;
4412 PMD_INIT_FUNC_TRACE();
4413 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4415 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4416 txq = dev->data->tx_queues[i];
4417 /* Setup Transmit Threshold Registers */
4418 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4419 txdctl |= txq->pthresh & 0x7F;
4420 txdctl |= ((txq->hthresh & 0x7F) << 8);
4421 txdctl |= ((txq->wthresh & 0x7F) << 16);
4422 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4425 if (hw->mac.type != ixgbe_mac_82598EB) {
4426 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4427 dmatxctl |= IXGBE_DMATXCTL_TE;
4428 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4431 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4432 txq = dev->data->tx_queues[i];
4433 if (!txq->tx_deferred_start) {
4434 ret = ixgbe_dev_tx_queue_start(dev, i);
4440 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4441 rxq = dev->data->rx_queues[i];
4442 if (!rxq->rx_deferred_start) {
4443 ret = ixgbe_dev_rx_queue_start(dev, i);
4449 /* Enable Receive engine */
4450 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4451 if (hw->mac.type == ixgbe_mac_82598EB)
4452 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4453 rxctrl |= IXGBE_RXCTRL_RXEN;
4454 hw->mac.ops.enable_rx_dma(hw, rxctrl);
4456 /* If loopback mode is enabled for 82599, set up the link accordingly */
4457 if (hw->mac.type == ixgbe_mac_82599EB &&
4458 dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4459 ixgbe_setup_loopback_link_82599(hw);
4465 * Start Receive Units for specified queue.
4467 int __attribute__((cold))
4468 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4470 struct ixgbe_hw *hw;
4471 struct ixgbe_rx_queue *rxq;
4475 PMD_INIT_FUNC_TRACE();
4476 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4478 if (rx_queue_id < dev->data->nb_rx_queues) {
4479 rxq = dev->data->rx_queues[rx_queue_id];
4481 /* Allocate buffers for descriptor rings */
4482 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4483 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4487 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4488 rxdctl |= IXGBE_RXDCTL_ENABLE;
4489 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4491 /* Wait until RX Enable ready */
4492 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4495 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4496 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4498 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4501 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4502 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4503 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4511 * Stop Receive Units for specified queue.
4513 int __attribute__((cold))
4514 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4516 struct ixgbe_hw *hw;
4517 struct ixgbe_adapter *adapter =
4518 (struct ixgbe_adapter *)dev->data->dev_private;
4519 struct ixgbe_rx_queue *rxq;
4523 PMD_INIT_FUNC_TRACE();
4524 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4526 if (rx_queue_id < dev->data->nb_rx_queues) {
4527 rxq = dev->data->rx_queues[rx_queue_id];
4529 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4530 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4531 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4533 /* Wait until RX Enable ready */
4534 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4537 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4538 } while (--poll_ms && (rxdctl | IXGBE_RXDCTL_ENABLE));
4540 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4543 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4545 ixgbe_rx_queue_release_mbufs(rxq);
4546 ixgbe_reset_rx_queue(adapter, rxq);
4547 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4556 * Start Transmit Units for specified queue.
4558 int __attribute__((cold))
4559 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4561 struct ixgbe_hw *hw;
4562 struct ixgbe_tx_queue *txq;
4566 PMD_INIT_FUNC_TRACE();
4567 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4569 if (tx_queue_id < dev->data->nb_tx_queues) {
4570 txq = dev->data->tx_queues[tx_queue_id];
4571 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4572 txdctl |= IXGBE_TXDCTL_ENABLE;
4573 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4575 /* Wait until TX Enable ready */
4576 if (hw->mac.type == ixgbe_mac_82599EB) {
4577 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4580 txdctl = IXGBE_READ_REG(hw,
4581 IXGBE_TXDCTL(txq->reg_idx));
4582 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4584 PMD_INIT_LOG(ERR, "Could not enable "
4585 "Tx Queue %d", tx_queue_id);
4588 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4589 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4590 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4598 * Stop Transmit Units for specified queue.
4600 int __attribute__((cold))
4601 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4603 struct ixgbe_hw *hw;
4604 struct ixgbe_tx_queue *txq;
4606 uint32_t txtdh, txtdt;
4609 PMD_INIT_FUNC_TRACE();
4610 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4612 if (tx_queue_id < dev->data->nb_tx_queues) {
4613 txq = dev->data->tx_queues[tx_queue_id];
4615 /* Wait until TX queue is empty */
4616 if (hw->mac.type == ixgbe_mac_82599EB) {
4617 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4619 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4620 txtdh = IXGBE_READ_REG(hw,
4621 IXGBE_TDH(txq->reg_idx));
4622 txtdt = IXGBE_READ_REG(hw,
4623 IXGBE_TDT(txq->reg_idx));
4624 } while (--poll_ms && (txtdh != txtdt));
4626 PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
4627 "when stopping.", tx_queue_id);
4630 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4631 txdctl &= ~IXGBE_TXDCTL_ENABLE;
4632 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4634 /* Wait until TX Enable ready */
4635 if (hw->mac.type == ixgbe_mac_82599EB) {
4636 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4639 txdctl = IXGBE_READ_REG(hw,
4640 IXGBE_TXDCTL(txq->reg_idx));
4641 } while (--poll_ms && (txdctl | IXGBE_TXDCTL_ENABLE));
4643 PMD_INIT_LOG(ERR, "Could not disable "
4644 "Tx Queue %d", tx_queue_id);
4647 if (txq->ops != NULL) {
4648 txq->ops->release_mbufs(txq);
4649 txq->ops->reset(txq);
4651 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4659 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4660 struct rte_eth_rxq_info *qinfo)
4662 struct ixgbe_rx_queue *rxq;
4664 rxq = dev->data->rx_queues[queue_id];
4666 qinfo->mp = rxq->mb_pool;
4667 qinfo->scattered_rx = dev->data->scattered_rx;
4668 qinfo->nb_desc = rxq->nb_rx_desc;
4670 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
4671 qinfo->conf.rx_drop_en = rxq->drop_en;
4672 qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
4676 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4677 struct rte_eth_txq_info *qinfo)
4679 struct ixgbe_tx_queue *txq;
4681 txq = dev->data->tx_queues[queue_id];
4683 qinfo->nb_desc = txq->nb_tx_desc;
4685 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
4686 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
4687 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
4689 qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
4690 qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
4691 qinfo->conf.txq_flags = txq->txq_flags;
4692 qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
4696 * [VF] Initializes Receive Unit.
4698 int __attribute__((cold))
4699 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
4701 struct ixgbe_hw *hw;
4702 struct ixgbe_rx_queue *rxq;
4704 uint32_t srrctl, psrtype = 0;
4709 PMD_INIT_FUNC_TRACE();
4710 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4712 if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
4713 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4714 "it should be power of 2");
4718 if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
4719 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4720 "it should be equal to or less than %d",
4721 hw->mac.max_rx_queues);
4726 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
4727 * disables the VF receipt of packets if the PF MTU is > 1500.
4728 * This is done to deal with 82599 limitations that imposes
4729 * the PF and all VFs to share the same MTU.
4730 * Then, the PF driver enables again the VF receipt of packet when
4731 * the VF driver issues a IXGBE_VF_SET_LPE request.
4732 * In the meantime, the VF device cannot be used, even if the VF driver
4733 * and the Guest VM network stack are ready to accept packets with a
4734 * size up to the PF MTU.
4735 * As a work-around to this PF behaviour, force the call to
4736 * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
4737 * VF packets received can work in all cases.
4739 ixgbevf_rlpml_set_vf(hw,
4740 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
4742 /* Setup RX queues */
4743 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4744 rxq = dev->data->rx_queues[i];
4746 /* Allocate buffers for descriptor rings */
4747 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
4751 /* Setup the Base and Length of the Rx Descriptor Rings */
4752 bus_addr = rxq->rx_ring_phys_addr;
4754 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
4755 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4756 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
4757 (uint32_t)(bus_addr >> 32));
4758 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
4759 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4760 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
4761 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
4764 /* Configure the SRRCTL register */
4765 #ifdef RTE_HEADER_SPLIT_ENABLE
4767 * Configure Header Split
4769 if (dev->data->dev_conf.rxmode.header_split) {
4770 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
4771 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4772 IXGBE_SRRCTL_BSIZEHDR_MASK);
4773 srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4776 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4778 /* Set if packets are dropped when no descriptors available */
4780 srrctl |= IXGBE_SRRCTL_DROP_EN;
4783 * Configure the RX buffer size in the BSIZEPACKET field of
4784 * the SRRCTL register of the queue.
4785 * The value is in 1 KB resolution. Valid values can be from
4788 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4789 RTE_PKTMBUF_HEADROOM);
4790 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4791 IXGBE_SRRCTL_BSIZEPKT_MASK);
4794 * VF modification to write virtual function SRRCTL register
4796 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
4798 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4799 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4801 if (dev->data->dev_conf.rxmode.enable_scatter ||
4802 /* It adds dual VLAN length for supporting dual VLAN */
4803 (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4804 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
4805 if (!dev->data->scattered_rx)
4806 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
4807 dev->data->scattered_rx = 1;
4811 #ifdef RTE_HEADER_SPLIT_ENABLE
4812 if (dev->data->dev_conf.rxmode.header_split)
4813 /* Must setup the PSRTYPE register */
4814 psrtype = IXGBE_PSRTYPE_TCPHDR |
4815 IXGBE_PSRTYPE_UDPHDR |
4816 IXGBE_PSRTYPE_IPV4HDR |
4817 IXGBE_PSRTYPE_IPV6HDR;
4820 /* Set RQPL for VF RSS according to max Rx queue */
4821 psrtype |= (dev->data->nb_rx_queues >> 1) <<
4822 IXGBE_PSRTYPE_RQPL_SHIFT;
4823 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
4825 ixgbe_set_rx_function(dev);
4831 * [VF] Initializes Transmit Unit.
4833 void __attribute__((cold))
4834 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
4836 struct ixgbe_hw *hw;
4837 struct ixgbe_tx_queue *txq;
4842 PMD_INIT_FUNC_TRACE();
4843 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4845 /* Setup the Base and Length of the Tx Descriptor Rings */
4846 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4847 txq = dev->data->tx_queues[i];
4848 bus_addr = txq->tx_ring_phys_addr;
4849 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
4850 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4851 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
4852 (uint32_t)(bus_addr >> 32));
4853 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
4854 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4855 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4856 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
4857 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
4860 * Disable Tx Head Writeback RO bit, since this hoses
4861 * bookkeeping if things aren't delivered in order.
4863 txctrl = IXGBE_READ_REG(hw,
4864 IXGBE_VFDCA_TXCTRL(i));
4865 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4866 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
4872 * [VF] Start Transmit and Receive Units.
4874 void __attribute__((cold))
4875 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
4877 struct ixgbe_hw *hw;
4878 struct ixgbe_tx_queue *txq;
4879 struct ixgbe_rx_queue *rxq;
4885 PMD_INIT_FUNC_TRACE();
4886 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4888 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4889 txq = dev->data->tx_queues[i];
4890 /* Setup Transmit Threshold Registers */
4891 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4892 txdctl |= txq->pthresh & 0x7F;
4893 txdctl |= ((txq->hthresh & 0x7F) << 8);
4894 txdctl |= ((txq->wthresh & 0x7F) << 16);
4895 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4898 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4900 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4901 txdctl |= IXGBE_TXDCTL_ENABLE;
4902 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4905 /* Wait until TX Enable ready */
4908 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4909 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4911 PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
4913 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4915 rxq = dev->data->rx_queues[i];
4917 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4918 rxdctl |= IXGBE_RXDCTL_ENABLE;
4919 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
4921 /* Wait until RX Enable ready */
4925 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4926 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4928 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
4930 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
4935 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
4936 int __attribute__((weak))
4937 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
4942 uint16_t __attribute__((weak))
4943 ixgbe_recv_pkts_vec(
4944 void __rte_unused *rx_queue,
4945 struct rte_mbuf __rte_unused **rx_pkts,
4946 uint16_t __rte_unused nb_pkts)
4951 uint16_t __attribute__((weak))
4952 ixgbe_recv_scattered_pkts_vec(
4953 void __rte_unused *rx_queue,
4954 struct rte_mbuf __rte_unused **rx_pkts,
4955 uint16_t __rte_unused nb_pkts)
4960 int __attribute__((weak))
4961 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)