4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5 * Copyright 2014 6WIND S.A.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK ( \
90 static inline struct rte_mbuf *
91 rte_rxmbuf_alloc(struct rte_mempool *mp)
95 m = __rte_mbuf_raw_alloc(mp);
96 __rte_mbuf_sanity_check_raw(m, 0);
102 #define RTE_PMD_USE_PREFETCH
105 #ifdef RTE_PMD_USE_PREFETCH
107 * Prefetch a cache line into all cache levels.
109 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
111 #define rte_ixgbe_prefetch(p) do {} while(0)
114 /*********************************************************************
118 **********************************************************************/
121 * Check for descriptors with their DD bit set and free mbufs.
122 * Return the total number of buffers freed.
124 static inline int __attribute__((always_inline))
125 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
127 struct ixgbe_tx_entry *txep;
131 /* check DD bit on threshold descriptor */
132 status = txq->tx_ring[txq->tx_next_dd].wb.status;
133 if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
137 * first buffer to free from S/W ring is at index
138 * tx_next_dd - (tx_rs_thresh-1)
140 txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
142 /* free buffers one at a time */
143 if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
144 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
145 txep->mbuf->next = NULL;
146 rte_mempool_put(txep->mbuf->pool, txep->mbuf);
150 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
151 rte_pktmbuf_free_seg(txep->mbuf);
156 /* buffers were freed, update counters */
157 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
158 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
159 if (txq->tx_next_dd >= txq->nb_tx_desc)
160 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
162 return txq->tx_rs_thresh;
165 /* Populate 4 descriptors with data from 4 mbufs */
167 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
169 uint64_t buf_dma_addr;
173 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
174 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
175 pkt_len = (*pkts)->data_len;
177 /* write data to descriptor */
178 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
180 txdp->read.cmd_type_len =
181 rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
183 txdp->read.olinfo_status =
184 rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
186 rte_prefetch0(&(*pkts)->pool);
190 /* Populate 1 descriptor with data from 1 mbuf */
192 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
194 uint64_t buf_dma_addr;
197 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
198 pkt_len = (*pkts)->data_len;
200 /* write data to descriptor */
201 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
202 txdp->read.cmd_type_len =
203 rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
204 txdp->read.olinfo_status =
205 rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
206 rte_prefetch0(&(*pkts)->pool);
210 * Fill H/W descriptor ring with mbuf data.
211 * Copy mbuf pointers to the S/W ring.
214 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
217 volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
218 struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
219 const int N_PER_LOOP = 4;
220 const int N_PER_LOOP_MASK = N_PER_LOOP-1;
221 int mainpart, leftover;
225 * Process most of the packets in chunks of N pkts. Any
226 * leftover packets will get processed one at a time.
228 mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
229 leftover = (nb_pkts & ((uint32_t) N_PER_LOOP_MASK));
230 for (i = 0; i < mainpart; i += N_PER_LOOP) {
231 /* Copy N mbuf pointers to the S/W ring */
232 for (j = 0; j < N_PER_LOOP; ++j) {
233 (txep + i + j)->mbuf = *(pkts + i + j);
235 tx4(txdp + i, pkts + i);
238 if (unlikely(leftover > 0)) {
239 for (i = 0; i < leftover; ++i) {
240 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
241 tx1(txdp + mainpart + i, pkts + mainpart + i);
246 static inline uint16_t
247 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
250 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
251 volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
255 * Begin scanning the H/W ring for done descriptors when the
256 * number of available descriptors drops below tx_free_thresh. For
257 * each done descriptor, free the associated buffer.
259 if (txq->nb_tx_free < txq->tx_free_thresh)
260 ixgbe_tx_free_bufs(txq);
262 /* Only use descriptors that are available */
263 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
264 if (unlikely(nb_pkts == 0))
267 /* Use exactly nb_pkts descriptors */
268 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
271 * At this point, we know there are enough descriptors in the
272 * ring to transmit all the packets. This assumes that each
273 * mbuf contains a single segment, and that no new offloads
274 * are expected, which would require a new context descriptor.
278 * See if we're going to wrap-around. If so, handle the top
279 * of the descriptor ring first, then do the bottom. If not,
280 * the processing looks just like the "bottom" part anyway...
282 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
283 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
284 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
287 * We know that the last descriptor in the ring will need to
288 * have its RS bit set because tx_rs_thresh has to be
289 * a divisor of the ring size
291 tx_r[txq->tx_next_rs].read.cmd_type_len |=
292 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
293 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
298 /* Fill H/W descriptor ring with mbuf data */
299 ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
300 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
303 * Determine if RS bit should be set
304 * This is what we actually want:
305 * if ((txq->tx_tail - 1) >= txq->tx_next_rs)
306 * but instead of subtracting 1 and doing >=, we can just do
307 * greater than without subtracting.
309 if (txq->tx_tail > txq->tx_next_rs) {
310 tx_r[txq->tx_next_rs].read.cmd_type_len |=
311 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
312 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
314 if (txq->tx_next_rs >= txq->nb_tx_desc)
315 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
319 * Check for wrap-around. This would only happen if we used
320 * up to the last descriptor in the ring, no more, no less.
322 if (txq->tx_tail >= txq->nb_tx_desc)
325 /* update tail pointer */
327 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
333 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
338 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
339 if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
340 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
342 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
346 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
347 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
348 nb_tx = (uint16_t)(nb_tx + ret);
349 nb_pkts = (uint16_t)(nb_pkts - ret);
358 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
359 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
360 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
362 uint32_t type_tucmd_mlhl;
363 uint32_t mss_l4len_idx = 0;
365 uint32_t vlan_macip_lens;
366 union ixgbe_tx_offload tx_offload_mask;
368 ctx_idx = txq->ctx_curr;
369 tx_offload_mask.data = 0;
372 /* Specify which HW CTX to upload. */
373 mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
375 if (ol_flags & PKT_TX_VLAN_PKT) {
376 tx_offload_mask.vlan_tci |= ~0;
379 /* check if TCP segmentation required for this packet */
380 if (ol_flags & PKT_TX_TCP_SEG) {
381 /* implies IP cksum in IPv4 */
382 if (ol_flags & PKT_TX_IP_CKSUM)
383 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
384 IXGBE_ADVTXD_TUCMD_L4T_TCP |
385 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
387 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
388 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
391 tx_offload_mask.l2_len |= ~0;
392 tx_offload_mask.l3_len |= ~0;
393 tx_offload_mask.l4_len |= ~0;
394 tx_offload_mask.tso_segsz |= ~0;
395 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
396 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
397 } else { /* no TSO, check if hardware checksum is needed */
398 if (ol_flags & PKT_TX_IP_CKSUM) {
399 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
400 tx_offload_mask.l2_len |= ~0;
401 tx_offload_mask.l3_len |= ~0;
404 switch (ol_flags & PKT_TX_L4_MASK) {
405 case PKT_TX_UDP_CKSUM:
406 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
407 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
408 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
409 tx_offload_mask.l2_len |= ~0;
410 tx_offload_mask.l3_len |= ~0;
412 case PKT_TX_TCP_CKSUM:
413 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
414 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
415 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
416 tx_offload_mask.l2_len |= ~0;
417 tx_offload_mask.l3_len |= ~0;
419 case PKT_TX_SCTP_CKSUM:
420 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
421 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
422 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
423 tx_offload_mask.l2_len |= ~0;
424 tx_offload_mask.l3_len |= ~0;
427 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
428 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433 txq->ctx_cache[ctx_idx].flags = ol_flags;
434 txq->ctx_cache[ctx_idx].tx_offload.data =
435 tx_offload_mask.data & tx_offload.data;
436 txq->ctx_cache[ctx_idx].tx_offload_mask = tx_offload_mask;
438 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
439 vlan_macip_lens = tx_offload.l3_len;
440 vlan_macip_lens |= (tx_offload.l2_len << IXGBE_ADVTXD_MACLEN_SHIFT);
441 vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
442 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
443 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
444 ctx_txd->seqnum_seed = 0;
448 * Check which hardware context can be used. Use the existing match
449 * or create a new context descriptor.
451 static inline uint32_t
452 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
453 union ixgbe_tx_offload tx_offload)
455 /* If match with the current used context */
456 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
457 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
458 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
459 return txq->ctx_curr;
462 /* What if match with the next context */
464 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
465 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
466 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
467 return txq->ctx_curr;
470 /* Mismatch, use the previous context */
471 return (IXGBE_CTX_NUM);
474 static inline uint32_t
475 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
478 if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
479 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
480 if (ol_flags & PKT_TX_IP_CKSUM)
481 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
482 if (ol_flags & PKT_TX_TCP_SEG)
483 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
487 static inline uint32_t
488 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
490 uint32_t cmdtype = 0;
491 if (ol_flags & PKT_TX_VLAN_PKT)
492 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
493 if (ol_flags & PKT_TX_TCP_SEG)
494 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
498 /* Default RS bit threshold values */
499 #ifndef DEFAULT_TX_RS_THRESH
500 #define DEFAULT_TX_RS_THRESH 32
502 #ifndef DEFAULT_TX_FREE_THRESH
503 #define DEFAULT_TX_FREE_THRESH 32
506 /* Reset transmit descriptors after they have been used */
508 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
510 struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
511 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
512 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
513 uint16_t nb_tx_desc = txq->nb_tx_desc;
514 uint16_t desc_to_clean_to;
515 uint16_t nb_tx_to_clean;
518 /* Determine the last descriptor needing to be cleaned */
519 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
520 if (desc_to_clean_to >= nb_tx_desc)
521 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
523 /* Check to make sure the last descriptor to clean is done */
524 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
525 status = txr[desc_to_clean_to].wb.status;
526 if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD)))
528 PMD_TX_FREE_LOG(DEBUG,
529 "TX descriptor %4u is not done"
530 "(port=%d queue=%d)",
532 txq->port_id, txq->queue_id);
533 /* Failed to clean any descriptors, better luck next time */
537 /* Figure out how many descriptors will be cleaned */
538 if (last_desc_cleaned > desc_to_clean_to)
539 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
542 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
545 PMD_TX_FREE_LOG(DEBUG,
546 "Cleaning %4u TX descriptors: %4u to %4u "
547 "(port=%d queue=%d)",
548 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
549 txq->port_id, txq->queue_id);
552 * The last descriptor to clean is done, so that means all the
553 * descriptors from the last descriptor that was cleaned
554 * up to the last descriptor with the RS bit set
555 * are done. Only reset the threshold descriptor.
557 txr[desc_to_clean_to].wb.status = 0;
559 /* Update the txq to reflect the last descriptor that was cleaned */
560 txq->last_desc_cleaned = desc_to_clean_to;
561 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
568 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
571 struct ixgbe_tx_queue *txq;
572 struct ixgbe_tx_entry *sw_ring;
573 struct ixgbe_tx_entry *txe, *txn;
574 volatile union ixgbe_adv_tx_desc *txr;
575 volatile union ixgbe_adv_tx_desc *txd, *txp;
576 struct rte_mbuf *tx_pkt;
577 struct rte_mbuf *m_seg;
578 uint64_t buf_dma_addr;
579 uint32_t olinfo_status;
580 uint32_t cmd_type_len;
591 union ixgbe_tx_offload tx_offload = {0};
594 sw_ring = txq->sw_ring;
596 tx_id = txq->tx_tail;
597 txe = &sw_ring[tx_id];
600 /* Determine if the descriptor ring needs to be cleaned. */
601 if (txq->nb_tx_free < txq->tx_free_thresh)
602 ixgbe_xmit_cleanup(txq);
604 rte_prefetch0(&txe->mbuf->pool);
607 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
610 pkt_len = tx_pkt->pkt_len;
613 * Determine how many (if any) context descriptors
614 * are needed for offload functionality.
616 ol_flags = tx_pkt->ol_flags;
618 /* If hardware offload required */
619 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
621 tx_offload.l2_len = tx_pkt->l2_len;
622 tx_offload.l3_len = tx_pkt->l3_len;
623 tx_offload.l4_len = tx_pkt->l4_len;
624 tx_offload.vlan_tci = tx_pkt->vlan_tci;
625 tx_offload.tso_segsz = tx_pkt->tso_segsz;
627 /* If new context need be built or reuse the exist ctx. */
628 ctx = what_advctx_update(txq, tx_ol_req,
630 /* Only allocate context descriptor if required*/
631 new_ctx = (ctx == IXGBE_CTX_NUM);
636 * Keep track of how many descriptors are used this loop
637 * This will always be the number of segments + the number of
638 * Context descriptors required to transmit the packet
640 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
643 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
644 /* set RS on the previous packet in the burst */
645 txp->read.cmd_type_len |=
646 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
649 * The number of descriptors that must be allocated for a
650 * packet is the number of segments of that packet, plus 1
651 * Context Descriptor for the hardware offload, if any.
652 * Determine the last TX descriptor to allocate in the TX ring
653 * for the packet, starting from the current position (tx_id)
656 tx_last = (uint16_t) (tx_id + nb_used - 1);
659 if (tx_last >= txq->nb_tx_desc)
660 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
662 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
663 " tx_first=%u tx_last=%u",
664 (unsigned) txq->port_id,
665 (unsigned) txq->queue_id,
671 * Make sure there are enough TX descriptors available to
672 * transmit the entire packet.
673 * nb_used better be less than or equal to txq->tx_rs_thresh
675 if (nb_used > txq->nb_tx_free) {
676 PMD_TX_FREE_LOG(DEBUG,
677 "Not enough free TX descriptors "
678 "nb_used=%4u nb_free=%4u "
679 "(port=%d queue=%d)",
680 nb_used, txq->nb_tx_free,
681 txq->port_id, txq->queue_id);
683 if (ixgbe_xmit_cleanup(txq) != 0) {
684 /* Could not clean any descriptors */
690 /* nb_used better be <= txq->tx_rs_thresh */
691 if (unlikely(nb_used > txq->tx_rs_thresh)) {
692 PMD_TX_FREE_LOG(DEBUG,
693 "The number of descriptors needed to "
694 "transmit the packet exceeds the "
695 "RS bit threshold. This will impact "
697 "nb_used=%4u nb_free=%4u "
699 "(port=%d queue=%d)",
700 nb_used, txq->nb_tx_free,
702 txq->port_id, txq->queue_id);
704 * Loop here until there are enough TX
705 * descriptors or until the ring cannot be
708 while (nb_used > txq->nb_tx_free) {
709 if (ixgbe_xmit_cleanup(txq) != 0) {
711 * Could not clean any
723 * By now there are enough free TX descriptors to transmit
728 * Set common flags of all TX Data Descriptors.
730 * The following bits must be set in all Data Descriptors:
731 * - IXGBE_ADVTXD_DTYP_DATA
732 * - IXGBE_ADVTXD_DCMD_DEXT
734 * The following bits must be set in the first Data Descriptor
735 * and are ignored in the other ones:
736 * - IXGBE_ADVTXD_DCMD_IFCS
737 * - IXGBE_ADVTXD_MAC_1588
738 * - IXGBE_ADVTXD_DCMD_VLE
740 * The following bits must only be set in the last Data
742 * - IXGBE_TXD_CMD_EOP
744 * The following bits can be set in any Data Descriptor, but
745 * are only set in the last Data Descriptor:
748 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
749 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
751 #ifdef RTE_LIBRTE_IEEE1588
752 if (ol_flags & PKT_TX_IEEE1588_TMST)
753 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
759 if (ol_flags & PKT_TX_TCP_SEG) {
760 /* when TSO is on, paylen in descriptor is the
761 * not the packet len but the tcp payload len */
762 pkt_len -= (tx_offload.l2_len +
763 tx_offload.l3_len + tx_offload.l4_len);
767 * Setup the TX Advanced Context Descriptor if required
770 volatile struct ixgbe_adv_tx_context_desc *
773 ctx_txd = (volatile struct
774 ixgbe_adv_tx_context_desc *)
777 txn = &sw_ring[txe->next_id];
778 rte_prefetch0(&txn->mbuf->pool);
780 if (txe->mbuf != NULL) {
781 rte_pktmbuf_free_seg(txe->mbuf);
785 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
788 txe->last_id = tx_last;
789 tx_id = txe->next_id;
794 * Setup the TX Advanced Data Descriptor,
795 * This path will go through
796 * whatever new/reuse the context descriptor
798 cmd_type_len |= tx_desc_ol_flags_to_cmdtype(ol_flags);
799 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
800 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
803 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
808 txn = &sw_ring[txe->next_id];
809 rte_prefetch0(&txn->mbuf->pool);
811 if (txe->mbuf != NULL)
812 rte_pktmbuf_free_seg(txe->mbuf);
816 * Set up Transmit Data Descriptor.
818 slen = m_seg->data_len;
819 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
820 txd->read.buffer_addr =
821 rte_cpu_to_le_64(buf_dma_addr);
822 txd->read.cmd_type_len =
823 rte_cpu_to_le_32(cmd_type_len | slen);
824 txd->read.olinfo_status =
825 rte_cpu_to_le_32(olinfo_status);
826 txe->last_id = tx_last;
827 tx_id = txe->next_id;
830 } while (m_seg != NULL);
833 * The last packet data descriptor needs End Of Packet (EOP)
835 cmd_type_len |= IXGBE_TXD_CMD_EOP;
836 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
837 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
839 /* Set RS bit only on threshold packets' last descriptor */
840 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
841 PMD_TX_FREE_LOG(DEBUG,
842 "Setting RS bit on TXD id="
843 "%4u (port=%d queue=%d)",
844 tx_last, txq->port_id, txq->queue_id);
846 cmd_type_len |= IXGBE_TXD_CMD_RS;
848 /* Update txq RS bit counters */
854 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
858 /* set RS on last packet in the burst */
860 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
865 * Set the Transmit Descriptor Tail (TDT)
867 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
868 (unsigned) txq->port_id, (unsigned) txq->queue_id,
869 (unsigned) tx_id, (unsigned) nb_tx);
870 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
871 txq->tx_tail = tx_id;
876 /*********************************************************************
880 **********************************************************************/
881 #define IXGBE_PACKET_TYPE_IPV4 0X01
882 #define IXGBE_PACKET_TYPE_IPV4_TCP 0X11
883 #define IXGBE_PACKET_TYPE_IPV4_UDP 0X21
884 #define IXGBE_PACKET_TYPE_IPV4_SCTP 0X41
885 #define IXGBE_PACKET_TYPE_IPV4_EXT 0X03
886 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP 0X43
887 #define IXGBE_PACKET_TYPE_IPV6 0X04
888 #define IXGBE_PACKET_TYPE_IPV6_TCP 0X14
889 #define IXGBE_PACKET_TYPE_IPV6_UDP 0X24
890 #define IXGBE_PACKET_TYPE_IPV6_EXT 0X0C
891 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP 0X1C
892 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP 0X2C
893 #define IXGBE_PACKET_TYPE_IPV4_IPV6 0X05
894 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP 0X15
895 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP 0X25
896 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
897 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
898 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
899 #define IXGBE_PACKET_TYPE_MAX 0X80
900 #define IXGBE_PACKET_TYPE_MASK 0X7F
901 #define IXGBE_PACKET_TYPE_SHIFT 0X04
902 static inline uint32_t
903 ixgbe_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
905 static const uint32_t
906 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
907 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
909 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
910 RTE_PTYPE_L3_IPV4_EXT,
911 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
913 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
914 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
915 RTE_PTYPE_INNER_L3_IPV6,
916 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
917 RTE_PTYPE_L3_IPV6_EXT,
918 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
919 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
920 RTE_PTYPE_INNER_L3_IPV6_EXT,
921 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
922 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
923 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
924 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
925 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
926 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
927 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
928 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
929 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
930 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
931 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
932 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
933 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
934 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
935 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
936 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
937 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
938 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
939 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
940 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
941 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
942 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
943 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
944 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
945 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
946 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
947 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
948 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
950 if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
951 return RTE_PTYPE_UNKNOWN;
953 pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) &
954 IXGBE_PACKET_TYPE_MASK;
956 return ptype_table[pkt_info];
959 static inline uint64_t
960 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
962 static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
963 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
964 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
965 PKT_RX_RSS_HASH, 0, 0, 0,
966 0, 0, 0, PKT_RX_FDIR,
968 #ifdef RTE_LIBRTE_IEEE1588
969 static uint64_t ip_pkt_etqf_map[8] = {
970 0, 0, 0, PKT_RX_IEEE1588_PTP,
974 if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
975 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
976 ip_rss_types_map[pkt_info & 0XF];
978 return ip_rss_types_map[pkt_info & 0XF];
980 return ip_rss_types_map[pkt_info & 0XF];
984 static inline uint64_t
985 rx_desc_status_to_pkt_flags(uint32_t rx_status)
990 * Check if VLAN present only.
991 * Do not check whether L3/L4 rx checksum done by NIC or not,
992 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
994 pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
996 #ifdef RTE_LIBRTE_IEEE1588
997 if (rx_status & IXGBE_RXD_STAT_TMST)
998 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1003 static inline uint64_t
1004 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1007 * Bit 31: IPE, IPv4 checksum error
1008 * Bit 30: L4I, L4I integrity error
1010 static uint64_t error_to_pkt_flags_map[4] = {
1011 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1012 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1014 return error_to_pkt_flags_map[(rx_status >>
1015 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1019 * LOOK_AHEAD defines how many desc statuses to check beyond the
1020 * current descriptor.
1021 * It must be a pound define for optimal performance.
1022 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1023 * function only works with LOOK_AHEAD=8.
1025 #define LOOK_AHEAD 8
1026 #if (LOOK_AHEAD != 8)
1027 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1030 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1032 volatile union ixgbe_adv_rx_desc *rxdp;
1033 struct ixgbe_rx_entry *rxep;
1034 struct rte_mbuf *mb;
1038 uint32_t s[LOOK_AHEAD];
1039 uint16_t pkt_info[LOOK_AHEAD];
1040 int i, j, nb_rx = 0;
1043 /* get references to current descriptor and S/W ring entry */
1044 rxdp = &rxq->rx_ring[rxq->rx_tail];
1045 rxep = &rxq->sw_ring[rxq->rx_tail];
1047 status = rxdp->wb.upper.status_error;
1048 /* check to make sure there is at least 1 packet to receive */
1049 if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1053 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1054 * reference packets that are ready to be received.
1056 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1057 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1059 /* Read desc statuses backwards to avoid race condition */
1060 for (j = LOOK_AHEAD-1; j >= 0; --j)
1061 s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1063 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1064 pkt_info[j] = rxdp[j].wb.lower.lo_dword.
1067 /* Compute how many status bits were set */
1069 for (j = 0; j < LOOK_AHEAD; ++j)
1070 nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1074 /* Translate descriptor info to mbuf format */
1075 for (j = 0; j < nb_dd; ++j) {
1077 pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1079 mb->data_len = pkt_len;
1080 mb->pkt_len = pkt_len;
1081 mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1083 /* convert descriptor fields to rte mbuf flags */
1084 pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
1085 pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1087 ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]);
1088 mb->ol_flags = pkt_flags;
1090 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info[j]);
1092 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1093 mb->hash.rss = rte_le_to_cpu_32(
1094 rxdp[j].wb.lower.hi_dword.rss);
1095 else if (pkt_flags & PKT_RX_FDIR) {
1096 mb->hash.fdir.hash = rte_le_to_cpu_16(
1097 rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1098 IXGBE_ATR_HASH_MASK;
1099 mb->hash.fdir.id = rte_le_to_cpu_16(
1100 rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1104 /* Move mbuf pointers from the S/W ring to the stage */
1105 for (j = 0; j < LOOK_AHEAD; ++j) {
1106 rxq->rx_stage[i + j] = rxep[j].mbuf;
1109 /* stop if all requested packets could not be received */
1110 if (nb_dd != LOOK_AHEAD)
1114 /* clear software ring entries so we can cleanup correctly */
1115 for (i = 0; i < nb_rx; ++i) {
1116 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1124 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1126 volatile union ixgbe_adv_rx_desc *rxdp;
1127 struct ixgbe_rx_entry *rxep;
1128 struct rte_mbuf *mb;
1133 /* allocate buffers in bulk directly into the S/W ring */
1134 alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1135 rxep = &rxq->sw_ring[alloc_idx];
1136 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1137 rxq->rx_free_thresh);
1138 if (unlikely(diag != 0))
1141 rxdp = &rxq->rx_ring[alloc_idx];
1142 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1143 /* populate the static rte mbuf fields */
1148 mb->port = rxq->port_id;
1151 rte_mbuf_refcnt_set(mb, 1);
1152 mb->data_off = RTE_PKTMBUF_HEADROOM;
1154 /* populate the descriptors */
1155 dma_addr = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb));
1156 rxdp[i].read.hdr_addr = 0;
1157 rxdp[i].read.pkt_addr = dma_addr;
1160 /* update state of internal queue structure */
1161 rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1162 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1163 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1169 static inline uint16_t
1170 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1173 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1176 /* how many packets are ready to return? */
1177 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1179 /* copy mbuf pointers to the application's packet list */
1180 for (i = 0; i < nb_pkts; ++i)
1181 rx_pkts[i] = stage[i];
1183 /* update internal queue state */
1184 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1185 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1190 static inline uint16_t
1191 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1194 struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1197 /* Any previously recv'd pkts will be returned from the Rx stage */
1198 if (rxq->rx_nb_avail)
1199 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1201 /* Scan the H/W ring for packets to receive */
1202 nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1204 /* update internal queue state */
1205 rxq->rx_next_avail = 0;
1206 rxq->rx_nb_avail = nb_rx;
1207 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1209 /* if required, allocate new buffers to replenish descriptors */
1210 if (rxq->rx_tail > rxq->rx_free_trigger) {
1211 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1213 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1215 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1216 "queue_id=%u", (unsigned) rxq->port_id,
1217 (unsigned) rxq->queue_id);
1219 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1220 rxq->rx_free_thresh;
1223 * Need to rewind any previous receives if we cannot
1224 * allocate new buffers to replenish the old ones.
1226 rxq->rx_nb_avail = 0;
1227 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1228 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1229 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1234 /* update tail pointer */
1236 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1239 if (rxq->rx_tail >= rxq->nb_rx_desc)
1242 /* received any packets this loop? */
1243 if (rxq->rx_nb_avail)
1244 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1249 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1251 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1256 if (unlikely(nb_pkts == 0))
1259 if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1260 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1262 /* request is relatively large, chunk it up */
1266 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1267 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1268 nb_rx = (uint16_t)(nb_rx + ret);
1269 nb_pkts = (uint16_t)(nb_pkts - ret);
1278 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1281 struct ixgbe_rx_queue *rxq;
1282 volatile union ixgbe_adv_rx_desc *rx_ring;
1283 volatile union ixgbe_adv_rx_desc *rxdp;
1284 struct ixgbe_rx_entry *sw_ring;
1285 struct ixgbe_rx_entry *rxe;
1286 struct rte_mbuf *rxm;
1287 struct rte_mbuf *nmb;
1288 union ixgbe_adv_rx_desc rxd;
1301 rx_id = rxq->rx_tail;
1302 rx_ring = rxq->rx_ring;
1303 sw_ring = rxq->sw_ring;
1304 while (nb_rx < nb_pkts) {
1306 * The order of operations here is important as the DD status
1307 * bit must not be read after any other descriptor fields.
1308 * rx_ring and rxdp are pointing to volatile data so the order
1309 * of accesses cannot be reordered by the compiler. If they were
1310 * not volatile, they could be reordered which could lead to
1311 * using invalid descriptor fields when read from rxd.
1313 rxdp = &rx_ring[rx_id];
1314 staterr = rxdp->wb.upper.status_error;
1315 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1322 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1323 * is likely to be invalid and to be dropped by the various
1324 * validation checks performed by the network stack.
1326 * Allocate a new mbuf to replenish the RX ring descriptor.
1327 * If the allocation fails:
1328 * - arrange for that RX descriptor to be the first one
1329 * being parsed the next time the receive function is
1330 * invoked [on the same queue].
1332 * - Stop parsing the RX ring and return immediately.
1334 * This policy do not drop the packet received in the RX
1335 * descriptor for which the allocation of a new mbuf failed.
1336 * Thus, it allows that packet to be later retrieved if
1337 * mbuf have been freed in the mean time.
1338 * As a side effect, holding RX descriptors instead of
1339 * systematically giving them back to the NIC may lead to
1340 * RX ring exhaustion situations.
1341 * However, the NIC can gracefully prevent such situations
1342 * to happen by sending specific "back-pressure" flow control
1343 * frames to its peer(s).
1345 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1346 "ext_err_stat=0x%08x pkt_len=%u",
1347 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1348 (unsigned) rx_id, (unsigned) staterr,
1349 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1351 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1353 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1354 "queue_id=%u", (unsigned) rxq->port_id,
1355 (unsigned) rxq->queue_id);
1356 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1361 rxe = &sw_ring[rx_id];
1363 if (rx_id == rxq->nb_rx_desc)
1366 /* Prefetch next mbuf while processing current one. */
1367 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1370 * When next RX descriptor is on a cache-line boundary,
1371 * prefetch the next 4 RX descriptors and the next 8 pointers
1374 if ((rx_id & 0x3) == 0) {
1375 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1376 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1382 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1383 rxdp->read.hdr_addr = 0;
1384 rxdp->read.pkt_addr = dma_addr;
1387 * Initialize the returned mbuf.
1388 * 1) setup generic mbuf fields:
1389 * - number of segments,
1392 * - RX port identifier.
1393 * 2) integrate hardware offload data, if any:
1394 * - RSS flag & hash,
1395 * - IP checksum flag,
1396 * - VLAN TCI, if any,
1399 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1401 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1402 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1405 rxm->pkt_len = pkt_len;
1406 rxm->data_len = pkt_len;
1407 rxm->port = rxq->port_id;
1409 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.hs_rss.
1411 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1412 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1414 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1415 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1416 pkt_flags = pkt_flags |
1417 ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1418 rxm->ol_flags = pkt_flags;
1419 rxm->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1421 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1422 rxm->hash.rss = rte_le_to_cpu_32(
1423 rxd.wb.lower.hi_dword.rss);
1424 else if (pkt_flags & PKT_RX_FDIR) {
1425 rxm->hash.fdir.hash = rte_le_to_cpu_16(
1426 rxd.wb.lower.hi_dword.csum_ip.csum) &
1427 IXGBE_ATR_HASH_MASK;
1428 rxm->hash.fdir.id = rte_le_to_cpu_16(
1429 rxd.wb.lower.hi_dword.csum_ip.ip_id);
1432 * Store the mbuf address into the next entry of the array
1433 * of returned packets.
1435 rx_pkts[nb_rx++] = rxm;
1437 rxq->rx_tail = rx_id;
1440 * If the number of free RX descriptors is greater than the RX free
1441 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1443 * Update the RDT with the value of the last processed RX descriptor
1444 * minus 1, to guarantee that the RDT register is never equal to the
1445 * RDH register, which creates a "full" ring situtation from the
1446 * hardware point of view...
1448 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1449 if (nb_hold > rxq->rx_free_thresh) {
1450 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1451 "nb_hold=%u nb_rx=%u",
1452 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1453 (unsigned) rx_id, (unsigned) nb_hold,
1455 rx_id = (uint16_t) ((rx_id == 0) ?
1456 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1457 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1460 rxq->nb_rx_hold = nb_hold;
1465 * Detect an RSC descriptor.
1467 static inline uint32_t
1468 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1470 return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1471 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1475 * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1477 * Fill the following info in the HEAD buffer of the Rx cluster:
1478 * - RX port identifier
1479 * - hardware offload data, if any:
1481 * - IP checksum flag
1482 * - VLAN TCI, if any
1484 * @head HEAD of the packet cluster
1485 * @desc HW descriptor to get data from
1486 * @port_id Port ID of the Rx queue
1489 ixgbe_fill_cluster_head_buf(
1490 struct rte_mbuf *head,
1491 union ixgbe_adv_rx_desc *desc,
1498 head->port = port_id;
1500 /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1501 * set in the pkt_flags field.
1503 head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1504 pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.hs_rss.pkt_info);
1505 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1506 pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1507 pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1508 head->ol_flags = pkt_flags;
1509 head->packet_type = ixgbe_rxd_pkt_info_to_pkt_type(pkt_info);
1511 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1512 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1513 else if (pkt_flags & PKT_RX_FDIR) {
1514 head->hash.fdir.hash =
1515 rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1516 & IXGBE_ATR_HASH_MASK;
1517 head->hash.fdir.id =
1518 rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1523 * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1525 * @rx_queue Rx queue handle
1526 * @rx_pkts table of received packets
1527 * @nb_pkts size of rx_pkts table
1528 * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1530 * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1531 * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1533 * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1534 * 1) When non-EOP RSC completion arrives:
1535 * a) Update the HEAD of the current RSC aggregation cluster with the new
1536 * segment's data length.
1537 * b) Set the "next" pointer of the current segment to point to the segment
1538 * at the NEXTP index.
1539 * c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1540 * in the sw_rsc_ring.
1541 * 2) When EOP arrives we just update the cluster's total length and offload
1542 * flags and deliver the cluster up to the upper layers. In our case - put it
1543 * in the rx_pkts table.
1545 * Returns the number of received packets/clusters (according to the "bulk
1546 * receive" interface).
1548 static inline uint16_t
1549 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1552 struct ixgbe_rx_queue *rxq = rx_queue;
1553 volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1554 struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1555 struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1556 uint16_t rx_id = rxq->rx_tail;
1558 uint16_t nb_hold = rxq->nb_rx_hold;
1559 uint16_t prev_id = rxq->rx_tail;
1561 while (nb_rx < nb_pkts) {
1563 struct ixgbe_rx_entry *rxe;
1564 struct ixgbe_scattered_rx_entry *sc_entry;
1565 struct ixgbe_scattered_rx_entry *next_sc_entry;
1566 struct ixgbe_rx_entry *next_rxe;
1567 struct rte_mbuf *first_seg;
1568 struct rte_mbuf *rxm;
1569 struct rte_mbuf *nmb;
1570 union ixgbe_adv_rx_desc rxd;
1573 volatile union ixgbe_adv_rx_desc *rxdp;
1578 * The code in this whole file uses the volatile pointer to
1579 * ensure the read ordering of the status and the rest of the
1580 * descriptor fields (on the compiler level only!!!). This is so
1581 * UGLY - why not to just use the compiler barrier instead? DPDK
1582 * even has the rte_compiler_barrier() for that.
1584 * But most importantly this is just wrong because this doesn't
1585 * ensure memory ordering in a general case at all. For
1586 * instance, DPDK is supposed to work on Power CPUs where
1587 * compiler barrier may just not be enough!
1589 * I tried to write only this function properly to have a
1590 * starting point (as a part of an LRO/RSC series) but the
1591 * compiler cursed at me when I tried to cast away the
1592 * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1593 * keeping it the way it is for now.
1595 * The code in this file is broken in so many other places and
1596 * will just not work on a big endian CPU anyway therefore the
1597 * lines below will have to be revisited together with the rest
1601 * - Get rid of "volatile" crap and let the compiler do its
1603 * - Use the proper memory barrier (rte_rmb()) to ensure the
1604 * memory ordering below.
1606 rxdp = &rx_ring[rx_id];
1607 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1609 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1614 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1615 "staterr=0x%x data_len=%u",
1616 rxq->port_id, rxq->queue_id, rx_id, staterr,
1617 rte_le_to_cpu_16(rxd.wb.upper.length));
1620 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1622 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1623 "port_id=%u queue_id=%u",
1624 rxq->port_id, rxq->queue_id);
1626 rte_eth_devices[rxq->port_id].data->
1627 rx_mbuf_alloc_failed++;
1631 else if (nb_hold > rxq->rx_free_thresh) {
1632 uint16_t next_rdt = rxq->rx_free_trigger;
1634 if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1636 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1638 nb_hold -= rxq->rx_free_thresh;
1640 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1641 "port_id=%u queue_id=%u",
1642 rxq->port_id, rxq->queue_id);
1644 rte_eth_devices[rxq->port_id].data->
1645 rx_mbuf_alloc_failed++;
1651 rxe = &sw_ring[rx_id];
1652 eop = staterr & IXGBE_RXDADV_STAT_EOP;
1654 next_id = rx_id + 1;
1655 if (next_id == rxq->nb_rx_desc)
1658 /* Prefetch next mbuf while processing current one. */
1659 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
1662 * When next RX descriptor is on a cache-line boundary,
1663 * prefetch the next 4 RX descriptors and the next 4 pointers
1666 if ((next_id & 0x3) == 0) {
1667 rte_ixgbe_prefetch(&rx_ring[next_id]);
1668 rte_ixgbe_prefetch(&sw_ring[next_id]);
1675 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1677 * Update RX descriptor with the physical address of the
1678 * new data buffer of the new allocated mbuf.
1682 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1683 rxdp->read.hdr_addr = 0;
1684 rxdp->read.pkt_addr = dma;
1689 * Set data length & data buffer address of mbuf.
1691 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1692 rxm->data_len = data_len;
1697 * Get next descriptor index:
1698 * - For RSC it's in the NEXTP field.
1699 * - For a scattered packet - it's just a following
1702 if (ixgbe_rsc_count(&rxd))
1704 (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1705 IXGBE_RXDADV_NEXTP_SHIFT;
1709 next_sc_entry = &sw_sc_ring[nextp_id];
1710 next_rxe = &sw_ring[nextp_id];
1711 rte_ixgbe_prefetch(next_rxe);
1714 sc_entry = &sw_sc_ring[rx_id];
1715 first_seg = sc_entry->fbuf;
1716 sc_entry->fbuf = NULL;
1719 * If this is the first buffer of the received packet,
1720 * set the pointer to the first mbuf of the packet and
1721 * initialize its context.
1722 * Otherwise, update the total length and the number of segments
1723 * of the current scattered packet, and update the pointer to
1724 * the last mbuf of the current packet.
1726 if (first_seg == NULL) {
1728 first_seg->pkt_len = data_len;
1729 first_seg->nb_segs = 1;
1731 first_seg->pkt_len += data_len;
1732 first_seg->nb_segs++;
1739 * If this is not the last buffer of the received packet, update
1740 * the pointer to the first mbuf at the NEXTP entry in the
1741 * sw_sc_ring and continue to parse the RX ring.
1744 rxm->next = next_rxe->mbuf;
1745 next_sc_entry->fbuf = first_seg;
1750 * This is the last buffer of the received packet - return
1751 * the current cluster to the user.
1755 /* Initialize the first mbuf of the returned packet */
1756 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq->port_id,
1760 * Deal with the case, when HW CRC srip is disabled.
1761 * That can't happen when LRO is enabled, but still could
1762 * happen for scattered RX mode.
1764 first_seg->pkt_len -= rxq->crc_len;
1765 if (unlikely(rxm->data_len <= rxq->crc_len)) {
1766 struct rte_mbuf *lp;
1768 for (lp = first_seg; lp->next != rxm; lp = lp->next)
1771 first_seg->nb_segs--;
1772 lp->data_len -= rxq->crc_len - rxm->data_len;
1774 rte_pktmbuf_free_seg(rxm);
1776 rxm->data_len -= rxq->crc_len;
1778 /* Prefetch data of first segment, if configured to do so. */
1779 rte_packet_prefetch((char *)first_seg->buf_addr +
1780 first_seg->data_off);
1783 * Store the mbuf address into the next entry of the array
1784 * of returned packets.
1786 rx_pkts[nb_rx++] = first_seg;
1790 * Record index of the next RX descriptor to probe.
1792 rxq->rx_tail = rx_id;
1795 * If the number of free RX descriptors is greater than the RX free
1796 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1798 * Update the RDT with the value of the last processed RX descriptor
1799 * minus 1, to guarantee that the RDT register is never equal to the
1800 * RDH register, which creates a "full" ring situtation from the
1801 * hardware point of view...
1803 if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
1804 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1805 "nb_hold=%u nb_rx=%u",
1806 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
1809 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
1813 rxq->nb_rx_hold = nb_hold;
1818 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1821 return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
1825 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1828 return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
1831 /*********************************************************************
1833 * Queue management functions
1835 **********************************************************************/
1838 * Create memzone for HW rings. malloc can't be used as the physical address is
1839 * needed. If the memzone is already created, then this function returns a ptr
1842 static const struct rte_memzone * __attribute__((cold))
1843 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1844 uint16_t queue_id, uint32_t ring_size, int socket_id)
1846 char z_name[RTE_MEMZONE_NAMESIZE];
1847 const struct rte_memzone *mz;
1849 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1850 dev->driver->pci_drv.name, ring_name,
1851 dev->data->port_id, queue_id);
1853 mz = rte_memzone_lookup(z_name);
1857 #ifdef RTE_LIBRTE_XEN_DOM0
1858 return rte_memzone_reserve_bounded(z_name, ring_size,
1859 socket_id, 0, IXGBE_ALIGN, RTE_PGSIZE_2M);
1861 return rte_memzone_reserve_aligned(z_name, ring_size,
1862 socket_id, 0, IXGBE_ALIGN);
1866 static void __attribute__((cold))
1867 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
1871 if (txq->sw_ring != NULL) {
1872 for (i = 0; i < txq->nb_tx_desc; i++) {
1873 if (txq->sw_ring[i].mbuf != NULL) {
1874 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1875 txq->sw_ring[i].mbuf = NULL;
1881 static void __attribute__((cold))
1882 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
1885 txq->sw_ring != NULL)
1886 rte_free(txq->sw_ring);
1889 static void __attribute__((cold))
1890 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
1892 if (txq != NULL && txq->ops != NULL) {
1893 txq->ops->release_mbufs(txq);
1894 txq->ops->free_swring(txq);
1899 void __attribute__((cold))
1900 ixgbe_dev_tx_queue_release(void *txq)
1902 ixgbe_tx_queue_release(txq);
1905 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
1906 static void __attribute__((cold))
1907 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
1909 static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
1910 struct ixgbe_tx_entry *txe = txq->sw_ring;
1913 /* Zero out HW ring memory */
1914 for (i = 0; i < txq->nb_tx_desc; i++) {
1915 txq->tx_ring[i] = zeroed_desc;
1918 /* Initialize SW ring entries */
1919 prev = (uint16_t) (txq->nb_tx_desc - 1);
1920 for (i = 0; i < txq->nb_tx_desc; i++) {
1921 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1922 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
1925 txe[prev].next_id = i;
1929 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
1930 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1933 txq->nb_tx_used = 0;
1935 * Always allow 1 descriptor to be un-allocated to avoid
1936 * a H/W race condition
1938 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1939 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1941 memset((void*)&txq->ctx_cache, 0,
1942 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1945 static const struct ixgbe_txq_ops def_txq_ops = {
1946 .release_mbufs = ixgbe_tx_queue_release_mbufs,
1947 .free_swring = ixgbe_tx_free_swring,
1948 .reset = ixgbe_reset_tx_queue,
1951 /* Takes an ethdev and a queue and sets up the tx function to be used based on
1952 * the queue parameters. Used in tx_queue_setup by primary process and then
1953 * in dev_init by secondary process when attaching to an existing ethdev.
1955 void __attribute__((cold))
1956 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
1958 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1959 if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
1960 && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
1961 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
1962 #ifdef RTE_IXGBE_INC_VECTOR
1963 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
1964 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
1965 ixgbe_txq_vec_setup(txq) == 0)) {
1966 PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
1967 dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
1970 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
1972 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
1974 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
1975 (unsigned long)txq->txq_flags,
1976 (unsigned long)IXGBE_SIMPLE_FLAGS);
1978 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
1979 (unsigned long)txq->tx_rs_thresh,
1980 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
1981 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1985 int __attribute__((cold))
1986 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1989 unsigned int socket_id,
1990 const struct rte_eth_txconf *tx_conf)
1992 const struct rte_memzone *tz;
1993 struct ixgbe_tx_queue *txq;
1994 struct ixgbe_hw *hw;
1995 uint16_t tx_rs_thresh, tx_free_thresh;
1997 PMD_INIT_FUNC_TRACE();
1998 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2001 * Validate number of transmit descriptors.
2002 * It must not exceed hardware maximum, and must be multiple
2005 if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2006 (nb_desc > IXGBE_MAX_RING_DESC) ||
2007 (nb_desc < IXGBE_MIN_RING_DESC)) {
2012 * The following two parameters control the setting of the RS bit on
2013 * transmit descriptors.
2014 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2015 * descriptors have been used.
2016 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2017 * descriptors are used or if the number of descriptors required
2018 * to transmit a packet is greater than the number of free TX
2020 * The following constraints must be satisfied:
2021 * tx_rs_thresh must be greater than 0.
2022 * tx_rs_thresh must be less than the size of the ring minus 2.
2023 * tx_rs_thresh must be less than or equal to tx_free_thresh.
2024 * tx_rs_thresh must be a divisor of the ring size.
2025 * tx_free_thresh must be greater than 0.
2026 * tx_free_thresh must be less than the size of the ring minus 3.
2027 * One descriptor in the TX ring is used as a sentinel to avoid a
2028 * H/W race condition, hence the maximum threshold constraints.
2029 * When set to zero use default values.
2031 tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2032 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2033 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2034 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2035 if (tx_rs_thresh >= (nb_desc - 2)) {
2036 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2037 "of TX descriptors minus 2. (tx_rs_thresh=%u "
2038 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2039 (int)dev->data->port_id, (int)queue_idx);
2042 if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2043 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2044 "(tx_rs_thresh=%u port=%d queue=%d)",
2045 DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2046 (int)dev->data->port_id, (int)queue_idx);
2049 if (tx_free_thresh >= (nb_desc - 3)) {
2050 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2051 "tx_free_thresh must be less than the number of "
2052 "TX descriptors minus 3. (tx_free_thresh=%u "
2053 "port=%d queue=%d)",
2054 (unsigned int)tx_free_thresh,
2055 (int)dev->data->port_id, (int)queue_idx);
2058 if (tx_rs_thresh > tx_free_thresh) {
2059 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2060 "tx_free_thresh. (tx_free_thresh=%u "
2061 "tx_rs_thresh=%u port=%d queue=%d)",
2062 (unsigned int)tx_free_thresh,
2063 (unsigned int)tx_rs_thresh,
2064 (int)dev->data->port_id,
2068 if ((nb_desc % tx_rs_thresh) != 0) {
2069 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2070 "number of TX descriptors. (tx_rs_thresh=%u "
2071 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2072 (int)dev->data->port_id, (int)queue_idx);
2077 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2078 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2079 * by the NIC and all descriptors are written back after the NIC
2080 * accumulates WTHRESH descriptors.
2082 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2083 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2084 "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2085 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2086 (int)dev->data->port_id, (int)queue_idx);
2090 /* Free memory prior to re-allocation if needed... */
2091 if (dev->data->tx_queues[queue_idx] != NULL) {
2092 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2093 dev->data->tx_queues[queue_idx] = NULL;
2096 /* First allocate the tx queue data structure */
2097 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2098 RTE_CACHE_LINE_SIZE, socket_id);
2103 * Allocate TX ring hardware descriptors. A memzone large enough to
2104 * handle the maximum ring size is allocated in order to allow for
2105 * resizing in later calls to the queue setup function.
2107 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
2108 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2111 ixgbe_tx_queue_release(txq);
2115 txq->nb_tx_desc = nb_desc;
2116 txq->tx_rs_thresh = tx_rs_thresh;
2117 txq->tx_free_thresh = tx_free_thresh;
2118 txq->pthresh = tx_conf->tx_thresh.pthresh;
2119 txq->hthresh = tx_conf->tx_thresh.hthresh;
2120 txq->wthresh = tx_conf->tx_thresh.wthresh;
2121 txq->queue_id = queue_idx;
2122 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2123 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2124 txq->port_id = dev->data->port_id;
2125 txq->txq_flags = tx_conf->txq_flags;
2126 txq->ops = &def_txq_ops;
2127 txq->tx_deferred_start = tx_conf->tx_deferred_start;
2130 * Modification to set VFTDT for virtual function if vf is detected
2132 if (hw->mac.type == ixgbe_mac_82599_vf ||
2133 hw->mac.type == ixgbe_mac_X540_vf ||
2134 hw->mac.type == ixgbe_mac_X550_vf ||
2135 hw->mac.type == ixgbe_mac_X550EM_x_vf)
2136 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2138 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2139 #ifndef RTE_LIBRTE_XEN_DOM0
2140 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
2142 txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2144 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2146 /* Allocate software ring */
2147 txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2148 sizeof(struct ixgbe_tx_entry) * nb_desc,
2149 RTE_CACHE_LINE_SIZE, socket_id);
2150 if (txq->sw_ring == NULL) {
2151 ixgbe_tx_queue_release(txq);
2154 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2155 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2157 /* set up vector or scalar TX function as appropriate */
2158 ixgbe_set_tx_function(dev, txq);
2160 txq->ops->reset(txq);
2162 dev->data->tx_queues[queue_idx] = txq;
2169 * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2171 * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2172 * in the sw_rsc_ring is not set to NULL but rather points to the next
2173 * mbuf of this RSC aggregation (that has not been completed yet and still
2174 * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2175 * will just free first "nb_segs" segments of the cluster explicitly by calling
2176 * an rte_pktmbuf_free_seg().
2178 * @m scattered cluster head
2180 static void __attribute__((cold))
2181 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2183 uint8_t i, nb_segs = m->nb_segs;
2184 struct rte_mbuf *next_seg;
2186 for (i = 0; i < nb_segs; i++) {
2188 rte_pktmbuf_free_seg(m);
2193 static void __attribute__((cold))
2194 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2198 #ifdef RTE_IXGBE_INC_VECTOR
2199 /* SSE Vector driver has a different way of releasing mbufs. */
2200 if (rxq->rx_using_sse) {
2201 ixgbe_rx_queue_release_mbufs_vec(rxq);
2206 if (rxq->sw_ring != NULL) {
2207 for (i = 0; i < rxq->nb_rx_desc; i++) {
2208 if (rxq->sw_ring[i].mbuf != NULL) {
2209 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2210 rxq->sw_ring[i].mbuf = NULL;
2213 if (rxq->rx_nb_avail) {
2214 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2215 struct rte_mbuf *mb;
2216 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2217 rte_pktmbuf_free_seg(mb);
2219 rxq->rx_nb_avail = 0;
2223 if (rxq->sw_sc_ring)
2224 for (i = 0; i < rxq->nb_rx_desc; i++)
2225 if (rxq->sw_sc_ring[i].fbuf) {
2226 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2227 rxq->sw_sc_ring[i].fbuf = NULL;
2231 static void __attribute__((cold))
2232 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2235 ixgbe_rx_queue_release_mbufs(rxq);
2236 rte_free(rxq->sw_ring);
2237 rte_free(rxq->sw_sc_ring);
2242 void __attribute__((cold))
2243 ixgbe_dev_rx_queue_release(void *rxq)
2245 ixgbe_rx_queue_release(rxq);
2249 * Check if Rx Burst Bulk Alloc function can be used.
2251 * 0: the preconditions are satisfied and the bulk allocation function
2253 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2254 * function must be used.
2256 static inline int __attribute__((cold))
2257 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2262 * Make sure the following pre-conditions are satisfied:
2263 * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2264 * rxq->rx_free_thresh < rxq->nb_rx_desc
2265 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2266 * rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2267 * Scattered packets are not supported. This should be checked
2268 * outside of this function.
2270 if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2271 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2272 "rxq->rx_free_thresh=%d, "
2273 "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2274 rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2276 } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2277 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2278 "rxq->rx_free_thresh=%d, "
2279 "rxq->nb_rx_desc=%d",
2280 rxq->rx_free_thresh, rxq->nb_rx_desc);
2282 } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2283 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2284 "rxq->nb_rx_desc=%d, "
2285 "rxq->rx_free_thresh=%d",
2286 rxq->nb_rx_desc, rxq->rx_free_thresh);
2288 } else if (!(rxq->nb_rx_desc <
2289 (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2290 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2291 "rxq->nb_rx_desc=%d, "
2292 "IXGBE_MAX_RING_DESC=%d, "
2293 "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2294 rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2295 RTE_PMD_IXGBE_RX_MAX_BURST);
2302 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2303 static void __attribute__((cold))
2304 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2306 static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2308 uint16_t len = rxq->nb_rx_desc;
2311 * By default, the Rx queue setup function allocates enough memory for
2312 * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires
2313 * extra memory at the end of the descriptor ring to be zero'd out. A
2314 * pre-condition for using the Rx burst bulk alloc function is that the
2315 * number of descriptors is less than or equal to
2316 * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2317 * constraints here to see if we need to zero out memory after the end
2318 * of the H/W descriptor ring.
2320 if (adapter->rx_bulk_alloc_allowed)
2321 /* zero out extra memory */
2322 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2325 * Zero out HW ring memory. Zero out extra memory at the end of
2326 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2327 * reads extra memory as zeros.
2329 for (i = 0; i < len; i++) {
2330 rxq->rx_ring[i] = zeroed_desc;
2334 * initialize extra software ring entries. Space for these extra
2335 * entries is always allocated
2337 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2338 for (i = rxq->nb_rx_desc; i < len; ++i) {
2339 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2342 rxq->rx_nb_avail = 0;
2343 rxq->rx_next_avail = 0;
2344 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2346 rxq->nb_rx_hold = 0;
2347 rxq->pkt_first_seg = NULL;
2348 rxq->pkt_last_seg = NULL;
2350 #ifdef RTE_IXGBE_INC_VECTOR
2351 rxq->rxrearm_start = 0;
2352 rxq->rxrearm_nb = 0;
2356 int __attribute__((cold))
2357 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2360 unsigned int socket_id,
2361 const struct rte_eth_rxconf *rx_conf,
2362 struct rte_mempool *mp)
2364 const struct rte_memzone *rz;
2365 struct ixgbe_rx_queue *rxq;
2366 struct ixgbe_hw *hw;
2368 struct ixgbe_adapter *adapter =
2369 (struct ixgbe_adapter *)dev->data->dev_private;
2371 PMD_INIT_FUNC_TRACE();
2372 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2375 * Validate number of receive descriptors.
2376 * It must not exceed hardware maximum, and must be multiple
2379 if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2380 (nb_desc > IXGBE_MAX_RING_DESC) ||
2381 (nb_desc < IXGBE_MIN_RING_DESC)) {
2385 /* Free memory prior to re-allocation if needed... */
2386 if (dev->data->rx_queues[queue_idx] != NULL) {
2387 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2388 dev->data->rx_queues[queue_idx] = NULL;
2391 /* First allocate the rx queue data structure */
2392 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2393 RTE_CACHE_LINE_SIZE, socket_id);
2397 rxq->nb_rx_desc = nb_desc;
2398 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2399 rxq->queue_id = queue_idx;
2400 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2401 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2402 rxq->port_id = dev->data->port_id;
2403 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2405 rxq->drop_en = rx_conf->rx_drop_en;
2406 rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2409 * Allocate RX ring hardware descriptors. A memzone large enough to
2410 * handle the maximum ring size is allocated in order to allow for
2411 * resizing in later calls to the queue setup function.
2413 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
2414 RX_RING_SZ, socket_id);
2416 ixgbe_rx_queue_release(rxq);
2421 * Zero init all the descriptors in the ring.
2423 memset (rz->addr, 0, RX_RING_SZ);
2426 * Modified to setup VFRDT for Virtual Function
2428 if (hw->mac.type == ixgbe_mac_82599_vf ||
2429 hw->mac.type == ixgbe_mac_X540_vf ||
2430 hw->mac.type == ixgbe_mac_X550_vf ||
2431 hw->mac.type == ixgbe_mac_X550EM_x_vf) {
2433 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2435 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2439 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2441 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2443 #ifndef RTE_LIBRTE_XEN_DOM0
2444 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
2446 rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2448 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2451 * Certain constraints must be met in order to use the bulk buffer
2452 * allocation Rx burst function. If any of Rx queues doesn't meet them
2453 * the feature should be disabled for the whole port.
2455 if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2456 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2457 "preconditions - canceling the feature for "
2458 "the whole port[%d]",
2459 rxq->queue_id, rxq->port_id);
2460 adapter->rx_bulk_alloc_allowed = false;
2464 * Allocate software ring. Allow for space at the end of the
2465 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2466 * function does not access an invalid memory region.
2469 if (adapter->rx_bulk_alloc_allowed)
2470 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2472 rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2473 sizeof(struct ixgbe_rx_entry) * len,
2474 RTE_CACHE_LINE_SIZE, socket_id);
2475 if (!rxq->sw_ring) {
2476 ixgbe_rx_queue_release(rxq);
2481 * Always allocate even if it's not going to be needed in order to
2482 * simplify the code.
2484 * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2485 * be requested in ixgbe_dev_rx_init(), which is called later from
2489 rte_zmalloc_socket("rxq->sw_sc_ring",
2490 sizeof(struct ixgbe_scattered_rx_entry) * len,
2491 RTE_CACHE_LINE_SIZE, socket_id);
2492 if (!rxq->sw_sc_ring) {
2493 ixgbe_rx_queue_release(rxq);
2497 PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2498 "dma_addr=0x%"PRIx64,
2499 rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2500 rxq->rx_ring_phys_addr);
2502 if (!rte_is_power_of_2(nb_desc)) {
2503 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2504 "preconditions - canceling the feature for "
2505 "the whole port[%d]",
2506 rxq->queue_id, rxq->port_id);
2507 adapter->rx_vec_allowed = false;
2509 ixgbe_rxq_vec_setup(rxq);
2511 dev->data->rx_queues[queue_idx] = rxq;
2513 ixgbe_reset_rx_queue(adapter, rxq);
2519 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2521 #define IXGBE_RXQ_SCAN_INTERVAL 4
2522 volatile union ixgbe_adv_rx_desc *rxdp;
2523 struct ixgbe_rx_queue *rxq;
2526 if (rx_queue_id >= dev->data->nb_rx_queues) {
2527 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2531 rxq = dev->data->rx_queues[rx_queue_id];
2532 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2534 while ((desc < rxq->nb_rx_desc) &&
2535 (rxdp->wb.upper.status_error &
2536 rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2537 desc += IXGBE_RXQ_SCAN_INTERVAL;
2538 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2539 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2540 rxdp = &(rxq->rx_ring[rxq->rx_tail +
2541 desc - rxq->nb_rx_desc]);
2548 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2550 volatile union ixgbe_adv_rx_desc *rxdp;
2551 struct ixgbe_rx_queue *rxq = rx_queue;
2554 if (unlikely(offset >= rxq->nb_rx_desc))
2556 desc = rxq->rx_tail + offset;
2557 if (desc >= rxq->nb_rx_desc)
2558 desc -= rxq->nb_rx_desc;
2560 rxdp = &rxq->rx_ring[desc];
2561 return !!(rxdp->wb.upper.status_error &
2562 rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2565 void __attribute__((cold))
2566 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2569 struct ixgbe_adapter *adapter =
2570 (struct ixgbe_adapter *)dev->data->dev_private;
2572 PMD_INIT_FUNC_TRACE();
2574 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2575 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2577 txq->ops->release_mbufs(txq);
2578 txq->ops->reset(txq);
2582 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2583 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2585 ixgbe_rx_queue_release_mbufs(rxq);
2586 ixgbe_reset_rx_queue(adapter, rxq);
2592 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2596 PMD_INIT_FUNC_TRACE();
2598 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2599 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2600 dev->data->rx_queues[i] = NULL;
2602 dev->data->nb_rx_queues = 0;
2604 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2605 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2606 dev->data->tx_queues[i] = NULL;
2608 dev->data->nb_tx_queues = 0;
2611 /*********************************************************************
2613 * Device RX/TX init functions
2615 **********************************************************************/
2618 * Receive Side Scaling (RSS)
2619 * See section 7.1.2.8 in the following document:
2620 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2623 * The source and destination IP addresses of the IP header and the source
2624 * and destination ports of TCP/UDP headers, if any, of received packets are
2625 * hashed against a configurable random key to compute a 32-bit RSS hash result.
2626 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2627 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
2628 * RSS output index which is used as the RX queue index where to store the
2630 * The following output is supplied in the RX write-back descriptor:
2631 * - 32-bit result of the Microsoft RSS hash function,
2632 * - 4-bit RSS type field.
2636 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2637 * Used as the default key.
2639 static uint8_t rss_intel_key[40] = {
2640 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2641 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2642 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2643 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2644 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2648 ixgbe_rss_disable(struct rte_eth_dev *dev)
2650 struct ixgbe_hw *hw;
2654 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2655 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2656 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2657 mrqc &= ~IXGBE_MRQC_RSSEN;
2658 IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2662 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2672 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2673 rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2675 hash_key = rss_conf->rss_key;
2676 if (hash_key != NULL) {
2677 /* Fill in RSS hash key */
2678 for (i = 0; i < 10; i++) {
2679 rss_key = hash_key[(i * 4)];
2680 rss_key |= hash_key[(i * 4) + 1] << 8;
2681 rss_key |= hash_key[(i * 4) + 2] << 16;
2682 rss_key |= hash_key[(i * 4) + 3] << 24;
2683 IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
2687 /* Set configured hashing protocols in MRQC register */
2688 rss_hf = rss_conf->rss_hf;
2689 mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
2690 if (rss_hf & ETH_RSS_IPV4)
2691 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2692 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2693 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2694 if (rss_hf & ETH_RSS_IPV6)
2695 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2696 if (rss_hf & ETH_RSS_IPV6_EX)
2697 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2698 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
2699 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2700 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2701 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2702 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2703 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2704 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
2705 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2706 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2707 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2708 IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2712 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2713 struct rte_eth_rss_conf *rss_conf)
2715 struct ixgbe_hw *hw;
2720 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2722 if (!ixgbe_rss_update_sp(hw->mac.type)) {
2723 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2727 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2730 * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
2731 * "RSS enabling cannot be done dynamically while it must be
2732 * preceded by a software reset"
2733 * Before changing anything, first check that the update RSS operation
2734 * does not attempt to disable RSS, if RSS was enabled at
2735 * initialization time, or does not attempt to enable RSS, if RSS was
2736 * disabled at initialization time.
2738 rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
2739 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2740 if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
2741 if (rss_hf != 0) /* Enable RSS */
2743 return 0; /* Nothing to do */
2746 if (rss_hf == 0) /* Disable RSS */
2748 ixgbe_hw_rss_hash_set(hw, rss_conf);
2753 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2754 struct rte_eth_rss_conf *rss_conf)
2756 struct ixgbe_hw *hw;
2765 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2766 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2767 rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2768 hash_key = rss_conf->rss_key;
2769 if (hash_key != NULL) {
2770 /* Return RSS hash key */
2771 for (i = 0; i < 10; i++) {
2772 rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
2773 hash_key[(i * 4)] = rss_key & 0x000000FF;
2774 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2775 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2776 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2780 /* Get RSS functions configured in MRQC register */
2781 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2782 if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
2783 rss_conf->rss_hf = 0;
2787 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
2788 rss_hf |= ETH_RSS_IPV4;
2789 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
2790 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2791 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
2792 rss_hf |= ETH_RSS_IPV6;
2793 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
2794 rss_hf |= ETH_RSS_IPV6_EX;
2795 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
2796 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2797 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
2798 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2799 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
2800 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2801 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
2802 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2803 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
2804 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2805 rss_conf->rss_hf = rss_hf;
2810 ixgbe_rss_configure(struct rte_eth_dev *dev)
2812 struct rte_eth_rss_conf rss_conf;
2813 struct ixgbe_hw *hw;
2817 uint16_t sp_reta_size;
2820 PMD_INIT_FUNC_TRACE();
2821 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2823 sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
2826 * Fill in redirection table
2827 * The byte-swap is needed because NIC registers are in
2828 * little-endian order.
2831 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
2832 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
2834 if (j == dev->data->nb_rx_queues)
2836 reta = (reta << 8) | j;
2838 IXGBE_WRITE_REG(hw, reta_reg,
2843 * Configure the RSS key and the RSS protocols used to compute
2844 * the RSS hash of input packets.
2846 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2847 if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
2848 ixgbe_rss_disable(dev);
2851 if (rss_conf.rss_key == NULL)
2852 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2853 ixgbe_hw_rss_hash_set(hw, &rss_conf);
2856 #define NUM_VFTA_REGISTERS 128
2857 #define NIC_RX_BUFFER_SIZE 0x200
2858 #define X550_RX_BUFFER_SIZE 0x180
2861 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2863 struct rte_eth_vmdq_dcb_conf *cfg;
2864 struct ixgbe_hw *hw;
2865 enum rte_eth_nb_pools num_pools;
2866 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2868 uint8_t nb_tcs; /* number of traffic classes */
2871 PMD_INIT_FUNC_TRACE();
2872 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2873 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2874 num_pools = cfg->nb_queue_pools;
2875 /* Check we have a valid number of pools */
2876 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2877 ixgbe_rss_disable(dev);
2880 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2881 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2885 * split rx buffer up into sections, each for 1 traffic class
2887 switch (hw->mac.type) {
2888 case ixgbe_mac_X550:
2889 case ixgbe_mac_X550EM_x:
2890 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
2893 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2896 for (i = 0 ; i < nb_tcs; i++) {
2897 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2898 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2899 /* clear 10 bits. */
2900 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2901 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2903 /* zero alloc all unused TCs */
2904 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2905 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2906 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2907 /* clear 10 bits. */
2908 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2911 /* MRQC: enable vmdq and dcb */
2912 mrqc = ((num_pools == ETH_16_POOLS) ? \
2913 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2914 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2916 /* PFVTCTL: turn on virtualisation and set the default pool */
2917 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2918 if (cfg->enable_default_pool) {
2919 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2921 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2924 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2926 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2928 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2930 * mapping is done with 3 bits per priority,
2931 * so shift by i*3 each time
2933 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
2935 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2937 /* RTRPCS: DCB related */
2938 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2940 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2941 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2942 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2943 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2945 /* VFTA - enable all vlan filters */
2946 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2947 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2950 /* VFRE: pool enabling for receive - 16 or 32 */
2951 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2952 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2955 * MPSAR - allow pools to read specific mac addresses
2956 * In this case, all pools should be able to read from mac addr 0
2958 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2959 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2961 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2962 for (i = 0; i < cfg->nb_pool_maps; i++) {
2963 /* set vlan id in VF register and set the valid bit */
2964 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2965 (cfg->pool_map[i].vlan_id & 0xFFF)));
2967 * Put the allowed pools in VFB reg. As we only have 16 or 32
2968 * pools, we only need to use the first half of the register
2971 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2976 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2977 * @hw: pointer to hardware structure
2978 * @dcb_config: pointer to ixgbe_dcb_config structure
2981 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
2982 struct ixgbe_dcb_config *dcb_config)
2987 PMD_INIT_FUNC_TRACE();
2988 if (hw->mac.type != ixgbe_mac_82598EB) {
2989 /* Disable the Tx desc arbiter so that MTQC can be changed */
2990 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2991 reg |= IXGBE_RTTDCS_ARBDIS;
2992 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2994 /* Enable DCB for Tx with 8 TCs */
2995 if (dcb_config->num_tcs.pg_tcs == 8) {
2996 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
2999 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3001 if (dcb_config->vt_mode)
3002 reg |= IXGBE_MTQC_VT_ENA;
3003 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3005 /* Disable drop for all queues */
3006 for (q = 0; q < 128; q++)
3007 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3008 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3010 /* Enable the Tx desc arbiter */
3011 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3012 reg &= ~IXGBE_RTTDCS_ARBDIS;
3013 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3015 /* Enable Security TX Buffer IFG for DCB */
3016 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3017 reg |= IXGBE_SECTX_DCB;
3018 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3024 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3025 * @dev: pointer to rte_eth_dev structure
3026 * @dcb_config: pointer to ixgbe_dcb_config structure
3029 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3030 struct ixgbe_dcb_config *dcb_config)
3032 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3033 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3034 struct ixgbe_hw *hw =
3035 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3037 PMD_INIT_FUNC_TRACE();
3038 if (hw->mac.type != ixgbe_mac_82598EB)
3039 /*PF VF Transmit Enable*/
3040 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3041 vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3043 /*Configure general DCB TX parameters*/
3044 ixgbe_dcb_tx_hw_config(hw,dcb_config);
3049 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3050 struct ixgbe_dcb_config *dcb_config)
3052 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3053 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3054 struct ixgbe_dcb_tc_config *tc;
3057 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3058 if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
3059 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3060 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3063 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3064 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3066 /* User Priority to Traffic Class mapping */
3067 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3068 j = vmdq_rx_conf->dcb_tc[i];
3069 tc = &dcb_config->tc_config[j];
3070 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3076 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3077 struct ixgbe_dcb_config *dcb_config)
3079 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3080 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3081 struct ixgbe_dcb_tc_config *tc;
3084 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3085 if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
3086 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3087 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3090 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3091 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3094 /* User Priority to Traffic Class mapping */
3095 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3096 j = vmdq_tx_conf->dcb_tc[i];
3097 tc = &dcb_config->tc_config[j];
3098 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3105 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3106 struct ixgbe_dcb_config *dcb_config)
3108 struct rte_eth_dcb_rx_conf *rx_conf =
3109 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3110 struct ixgbe_dcb_tc_config *tc;
3113 dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3114 dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3116 /* User Priority to Traffic Class mapping */
3117 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3118 j = rx_conf->dcb_tc[i];
3119 tc = &dcb_config->tc_config[j];
3120 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3126 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3127 struct ixgbe_dcb_config *dcb_config)
3129 struct rte_eth_dcb_tx_conf *tx_conf =
3130 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3131 struct ixgbe_dcb_tc_config *tc;
3134 dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3135 dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3137 /* User Priority to Traffic Class mapping */
3138 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3139 j = tx_conf->dcb_tc[i];
3140 tc = &dcb_config->tc_config[j];
3141 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3147 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3148 * @hw: pointer to hardware structure
3149 * @dcb_config: pointer to ixgbe_dcb_config structure
3152 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3153 struct ixgbe_dcb_config *dcb_config)
3159 PMD_INIT_FUNC_TRACE();
3161 * Disable the arbiter before changing parameters
3162 * (always enable recycle mode; WSP)
3164 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3165 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3167 if (hw->mac.type != ixgbe_mac_82598EB) {
3168 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3169 if (dcb_config->num_tcs.pg_tcs == 4) {
3170 if (dcb_config->vt_mode)
3171 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3172 IXGBE_MRQC_VMDQRT4TCEN;
3174 /* no matter the mode is DCB or DCB_RSS, just
3175 * set the MRQE to RSSXTCEN. RSS is controlled
3178 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3179 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3180 IXGBE_MRQC_RTRSS4TCEN;
3183 if (dcb_config->num_tcs.pg_tcs == 8) {
3184 if (dcb_config->vt_mode)
3185 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3186 IXGBE_MRQC_VMDQRT8TCEN;
3188 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3189 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3190 IXGBE_MRQC_RTRSS8TCEN;
3194 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3197 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3198 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3199 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3200 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3202 /* VFTA - enable all vlan filters */
3203 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3204 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3208 * Configure Rx packet plane (recycle mode; WSP) and
3211 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3212 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3218 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3219 uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3221 switch (hw->mac.type) {
3222 case ixgbe_mac_82598EB:
3223 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3225 case ixgbe_mac_82599EB:
3226 case ixgbe_mac_X540:
3227 case ixgbe_mac_X550:
3228 case ixgbe_mac_X550EM_x:
3229 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3238 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3239 uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3241 switch (hw->mac.type) {
3242 case ixgbe_mac_82598EB:
3243 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
3244 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
3246 case ixgbe_mac_82599EB:
3247 case ixgbe_mac_X540:
3248 case ixgbe_mac_X550:
3249 case ixgbe_mac_X550EM_x:
3250 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
3251 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
3258 #define DCB_RX_CONFIG 1
3259 #define DCB_TX_CONFIG 1
3260 #define DCB_TX_PB 1024
3262 * ixgbe_dcb_hw_configure - Enable DCB and configure
3263 * general DCB in VT mode and non-VT mode parameters
3264 * @dev: pointer to rte_eth_dev structure
3265 * @dcb_config: pointer to ixgbe_dcb_config structure
3268 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3269 struct ixgbe_dcb_config *dcb_config)
3272 uint8_t i,pfc_en,nb_tcs;
3273 uint16_t pbsize, rx_buffer_size;
3274 uint8_t config_dcb_rx = 0;
3275 uint8_t config_dcb_tx = 0;
3276 uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3277 uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3278 uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3279 uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3280 uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3281 struct ixgbe_dcb_tc_config *tc;
3282 uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3283 struct ixgbe_hw *hw =
3284 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3286 switch(dev->data->dev_conf.rxmode.mq_mode){
3287 case ETH_MQ_RX_VMDQ_DCB:
3288 dcb_config->vt_mode = true;
3289 if (hw->mac.type != ixgbe_mac_82598EB) {
3290 config_dcb_rx = DCB_RX_CONFIG;
3292 *get dcb and VT rx configuration parameters
3295 ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3296 /*Configure general VMDQ and DCB RX parameters*/
3297 ixgbe_vmdq_dcb_configure(dev);
3301 case ETH_MQ_RX_DCB_RSS:
3302 dcb_config->vt_mode = false;
3303 config_dcb_rx = DCB_RX_CONFIG;
3304 /* Get dcb TX configuration parameters from rte_eth_conf */
3305 ixgbe_dcb_rx_config(dev, dcb_config);
3306 /*Configure general DCB RX parameters*/
3307 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3310 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3313 switch (dev->data->dev_conf.txmode.mq_mode) {
3314 case ETH_MQ_TX_VMDQ_DCB:
3315 dcb_config->vt_mode = true;
3316 config_dcb_tx = DCB_TX_CONFIG;
3317 /* get DCB and VT TX configuration parameters from rte_eth_conf */
3318 ixgbe_dcb_vt_tx_config(dev,dcb_config);
3319 /*Configure general VMDQ and DCB TX parameters*/
3320 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
3324 dcb_config->vt_mode = false;
3325 config_dcb_tx = DCB_TX_CONFIG;
3326 /*get DCB TX configuration parameters from rte_eth_conf*/
3327 ixgbe_dcb_tx_config(dev, dcb_config);
3328 /*Configure general DCB TX parameters*/
3329 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3332 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3336 nb_tcs = dcb_config->num_tcs.pfc_tcs;
3338 ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3339 if(nb_tcs == ETH_4_TCS) {
3340 /* Avoid un-configured priority mapping to TC0 */
3342 uint8_t mask = 0xFF;
3343 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3344 mask = (uint8_t)(mask & (~ (1 << map[i])));
3345 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3346 if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3350 /* Re-configure 4 TCs BW */
3351 for (i = 0; i < nb_tcs; i++) {
3352 tc = &dcb_config->tc_config[i];
3353 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3354 (uint8_t)(100 / nb_tcs);
3355 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3356 (uint8_t)(100 / nb_tcs);
3358 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3359 tc = &dcb_config->tc_config[i];
3360 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3361 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3365 switch (hw->mac.type) {
3366 case ixgbe_mac_X550:
3367 case ixgbe_mac_X550EM_x:
3368 rx_buffer_size = X550_RX_BUFFER_SIZE;
3371 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3376 /* Set RX buffer size */
3377 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3378 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3379 for (i = 0 ; i < nb_tcs; i++) {
3380 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3382 /* zero alloc all unused TCs */
3383 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3384 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3388 /* Only support an equally distributed Tx packet buffer strategy. */
3389 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3390 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3391 for (i = 0; i < nb_tcs; i++) {
3392 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3393 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3395 /* Clear unused TCs, if any, to zero buffer size*/
3396 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3397 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3398 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3402 /*Calculates traffic class credits*/
3403 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3404 IXGBE_DCB_TX_CONFIG);
3405 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3406 IXGBE_DCB_RX_CONFIG);
3409 /* Unpack CEE standard containers */
3410 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3411 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3412 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3413 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3414 /* Configure PG(ETS) RX */
3415 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
3419 /* Unpack CEE standard containers */
3420 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3421 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3422 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3423 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3424 /* Configure PG(ETS) TX */
3425 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
3428 /*Configure queue statistics registers*/
3429 ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3431 /* Check if the PFC is supported */
3432 if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3433 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3434 for (i = 0; i < nb_tcs; i++) {
3436 * If the TC count is 8,and the default high_water is 48,
3437 * the low_water is 16 as default.
3439 hw->fc.high_water[i] = (pbsize * 3 ) / 4;
3440 hw->fc.low_water[i] = pbsize / 4;
3441 /* Enable pfc for this TC */
3442 tc = &dcb_config->tc_config[i];
3443 tc->pfc = ixgbe_dcb_pfc_enabled;
3445 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3446 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3448 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3455 * ixgbe_configure_dcb - Configure DCB Hardware
3456 * @dev: pointer to rte_eth_dev
3458 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3460 struct ixgbe_dcb_config *dcb_cfg =
3461 IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3462 struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3464 PMD_INIT_FUNC_TRACE();
3466 /* check support mq_mode for DCB */
3467 if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3468 (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3469 (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3472 if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3475 /** Configure DCB hardware **/
3476 ixgbe_dcb_hw_configure(dev, dcb_cfg);
3482 * VMDq only support for 10 GbE NIC.
3485 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3487 struct rte_eth_vmdq_rx_conf *cfg;
3488 struct ixgbe_hw *hw;
3489 enum rte_eth_nb_pools num_pools;
3490 uint32_t mrqc, vt_ctl, vlanctrl;
3494 PMD_INIT_FUNC_TRACE();
3495 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3496 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3497 num_pools = cfg->nb_queue_pools;
3499 ixgbe_rss_disable(dev);
3501 /* MRQC: enable vmdq */
3502 mrqc = IXGBE_MRQC_VMDQEN;
3503 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3505 /* PFVTCTL: turn on virtualisation and set the default pool */
3506 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3507 if (cfg->enable_default_pool)
3508 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3510 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3512 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3514 for (i = 0; i < (int)num_pools; i++) {
3515 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3516 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3519 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3520 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3521 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3522 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3524 /* VFTA - enable all vlan filters */
3525 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3526 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3528 /* VFRE: pool enabling for receive - 64 */
3529 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3530 if (num_pools == ETH_64_POOLS)
3531 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3534 * MPSAR - allow pools to read specific mac addresses
3535 * In this case, all pools should be able to read from mac addr 0
3537 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3538 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3540 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3541 for (i = 0; i < cfg->nb_pool_maps; i++) {
3542 /* set vlan id in VF register and set the valid bit */
3543 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
3544 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3546 * Put the allowed pools in VFB reg. As we only have 16 or 64
3547 * pools, we only need to use the first half of the register
3550 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3551 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), \
3552 (cfg->pool_map[i].pools & UINT32_MAX));
3554 IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i*2+1)), \
3555 ((cfg->pool_map[i].pools >> 32) \
3560 /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3561 if (cfg->enable_loop_back) {
3562 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3563 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3564 IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3567 IXGBE_WRITE_FLUSH(hw);
3571 * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3572 * @hw: pointer to hardware structure
3575 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3580 PMD_INIT_FUNC_TRACE();
3581 /*PF VF Transmit Enable*/
3582 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3583 IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3585 /* Disable the Tx desc arbiter so that MTQC can be changed */
3586 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3587 reg |= IXGBE_RTTDCS_ARBDIS;
3588 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3590 reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3591 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3593 /* Disable drop for all queues */
3594 for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3595 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3596 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3598 /* Enable the Tx desc arbiter */
3599 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3600 reg &= ~IXGBE_RTTDCS_ARBDIS;
3601 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3603 IXGBE_WRITE_FLUSH(hw);
3608 static int __attribute__((cold))
3609 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3611 struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3615 /* Initialize software ring entries */
3616 for (i = 0; i < rxq->nb_rx_desc; i++) {
3617 volatile union ixgbe_adv_rx_desc *rxd;
3618 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
3620 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3621 (unsigned) rxq->queue_id);
3625 rte_mbuf_refcnt_set(mbuf, 1);
3627 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3629 mbuf->port = rxq->port_id;
3632 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
3633 rxd = &rxq->rx_ring[i];
3634 rxd->read.hdr_addr = 0;
3635 rxd->read.pkt_addr = dma_addr;
3643 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3645 struct ixgbe_hw *hw;
3648 ixgbe_rss_configure(dev);
3650 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3652 /* MRQC: enable VF RSS */
3653 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3654 mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3655 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3657 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3661 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3665 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3669 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3675 ixgbe_config_vf_default(struct rte_eth_dev *dev)
3677 struct ixgbe_hw *hw =
3678 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3680 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3682 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3687 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3688 IXGBE_MRQC_VMDQRT4TCEN);
3692 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3693 IXGBE_MRQC_VMDQRT8TCEN);
3697 "invalid pool number in IOV mode");
3704 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3706 struct ixgbe_hw *hw =
3707 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3709 if (hw->mac.type == ixgbe_mac_82598EB)
3712 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3714 * SRIOV inactive scheme
3715 * any DCB/RSS w/o VMDq multi-queue setting
3717 switch (dev->data->dev_conf.rxmode.mq_mode) {
3719 case ETH_MQ_RX_DCB_RSS:
3720 case ETH_MQ_RX_VMDQ_RSS:
3721 ixgbe_rss_configure(dev);
3724 case ETH_MQ_RX_VMDQ_DCB:
3725 ixgbe_vmdq_dcb_configure(dev);
3728 case ETH_MQ_RX_VMDQ_ONLY:
3729 ixgbe_vmdq_rx_hw_configure(dev);
3732 case ETH_MQ_RX_NONE:
3734 /* if mq_mode is none, disable rss mode.*/
3735 ixgbe_rss_disable(dev);
3740 * SRIOV active scheme
3741 * Support RSS together with VMDq & SRIOV
3743 switch (dev->data->dev_conf.rxmode.mq_mode) {
3745 case ETH_MQ_RX_VMDQ_RSS:
3746 ixgbe_config_vf_rss(dev);
3749 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
3750 case ETH_MQ_RX_VMDQ_DCB:
3751 case ETH_MQ_RX_VMDQ_DCB_RSS:
3753 "Could not support DCB with VMDq & SRIOV");
3756 ixgbe_config_vf_default(dev);
3765 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
3767 struct ixgbe_hw *hw =
3768 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3772 if (hw->mac.type == ixgbe_mac_82598EB)
3775 /* disable arbiter before setting MTQC */
3776 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3777 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3778 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3780 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3782 * SRIOV inactive scheme
3783 * any DCB w/o VMDq multi-queue setting
3785 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
3786 ixgbe_vmdq_tx_hw_configure(hw);
3788 mtqc = IXGBE_MTQC_64Q_1PB;
3789 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3792 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3795 * SRIOV active scheme
3796 * FIXME if support DCB together with VMDq & SRIOV
3799 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3802 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
3805 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
3809 mtqc = IXGBE_MTQC_64Q_1PB;
3810 PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3812 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3815 /* re-enable arbiter */
3816 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3817 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3823 * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
3825 * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
3826 * spec rev. 3.0 chapter 8.2.3.8.13.
3828 * @pool Memory pool of the Rx queue
3830 static inline uint32_t
3831 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
3833 struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
3835 /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
3838 (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
3841 return IXGBE_RSCCTL_MAXDESC_16;
3842 else if (maxdesc >= 8)
3843 return IXGBE_RSCCTL_MAXDESC_8;
3844 else if (maxdesc >= 4)
3845 return IXGBE_RSCCTL_MAXDESC_4;
3847 return IXGBE_RSCCTL_MAXDESC_1;
3851 * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
3854 * (Taken from FreeBSD tree)
3855 * (yes this is all very magic and confusing :)
3858 * @entry the register array entry
3859 * @vector the MSIX vector for this queue
3863 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
3865 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3868 vector |= IXGBE_IVAR_ALLOC_VAL;
3870 switch (hw->mac.type) {
3872 case ixgbe_mac_82598EB:
3874 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3876 entry += (type * 64);
3877 index = (entry >> 2) & 0x1F;
3878 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3879 ivar &= ~(0xFF << (8 * (entry & 0x3)));
3880 ivar |= (vector << (8 * (entry & 0x3)));
3881 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3884 case ixgbe_mac_82599EB:
3885 case ixgbe_mac_X540:
3886 if (type == -1) { /* MISC IVAR */
3887 index = (entry & 1) * 8;
3888 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3889 ivar &= ~(0xFF << index);
3890 ivar |= (vector << index);
3891 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3892 } else { /* RX/TX IVARS */
3893 index = (16 * (entry & 1)) + (8 * type);
3894 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3895 ivar &= ~(0xFF << index);
3896 ivar |= (vector << index);
3897 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3907 void __attribute__((cold))
3908 ixgbe_set_rx_function(struct rte_eth_dev *dev)
3910 uint16_t i, rx_using_sse;
3911 struct ixgbe_adapter *adapter =
3912 (struct ixgbe_adapter *)dev->data->dev_private;
3915 * In order to allow Vector Rx there are a few configuration
3916 * conditions to be met and Rx Bulk Allocation should be allowed.
3918 if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
3919 !adapter->rx_bulk_alloc_allowed) {
3920 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
3921 "preconditions or RTE_IXGBE_INC_VECTOR is "
3923 dev->data->port_id);
3925 adapter->rx_vec_allowed = false;
3929 * Initialize the appropriate LRO callback.
3931 * If all queues satisfy the bulk allocation preconditions
3932 * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
3933 * Otherwise use a single allocation version.
3935 if (dev->data->lro) {
3936 if (adapter->rx_bulk_alloc_allowed) {
3937 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
3938 "allocation version");
3939 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3941 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
3942 "allocation version");
3943 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3945 } else if (dev->data->scattered_rx) {
3947 * Set the non-LRO scattered callback: there are Vector and
3948 * single allocation versions.
3950 if (adapter->rx_vec_allowed) {
3951 PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
3952 "callback (port=%d).",
3953 dev->data->port_id);
3955 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
3956 } else if (adapter->rx_bulk_alloc_allowed) {
3957 PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
3958 "allocation callback (port=%d).",
3959 dev->data->port_id);
3960 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
3962 PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
3963 "single allocation) "
3964 "Scattered Rx callback "
3966 dev->data->port_id);
3968 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
3971 * Below we set "simple" callbacks according to port/queues parameters.
3972 * If parameters allow we are going to choose between the following
3976 * - Single buffer allocation (the simplest one)
3978 } else if (adapter->rx_vec_allowed) {
3979 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
3980 "burst size no less than %d (port=%d).",
3981 RTE_IXGBE_DESCS_PER_LOOP,
3982 dev->data->port_id);
3984 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
3985 } else if (adapter->rx_bulk_alloc_allowed) {
3986 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
3987 "satisfied. Rx Burst Bulk Alloc function "
3988 "will be used on port=%d.",
3989 dev->data->port_id);
3991 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
3993 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
3994 "satisfied, or Scattered Rx is requested "
3996 dev->data->port_id);
3998 dev->rx_pkt_burst = ixgbe_recv_pkts;
4001 /* Propagate information about RX function choice through all queues. */
4004 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4005 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4007 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4008 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4009 rxq->rx_using_sse = rx_using_sse;
4014 * ixgbe_set_rsc - configure RSC related port HW registers
4016 * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4017 * of 82599 Spec (x540 configuration is virtually the same).
4021 * Returns 0 in case of success or a non-zero error code
4024 ixgbe_set_rsc(struct rte_eth_dev *dev)
4026 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4027 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4028 struct rte_eth_dev_info dev_info = { 0 };
4029 bool rsc_capable = false;
4034 dev->dev_ops->dev_infos_get(dev, &dev_info);
4035 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4038 if (!rsc_capable && rx_conf->enable_lro) {
4039 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4044 /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4046 if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4048 * According to chapter of 4.6.7.2.1 of the Spec Rev.
4049 * 3.0 RSC configuration requires HW CRC stripping being
4050 * enabled. If user requested both HW CRC stripping off
4051 * and RSC on - return an error.
4053 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4058 /* RFCTL configuration */
4060 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4061 if (rx_conf->enable_lro)
4063 * Since NFS packets coalescing is not supported - clear
4064 * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4067 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4068 IXGBE_RFCTL_NFSR_DIS);
4070 rfctl |= IXGBE_RFCTL_RSC_DIS;
4072 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4075 /* If LRO hasn't been requested - we are done here. */
4076 if (!rx_conf->enable_lro)
4079 /* Set RDRXCTL.RSCACKC bit */
4080 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4081 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4082 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4084 /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4085 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4086 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4088 IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4090 IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4092 IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4094 IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4097 * ixgbe PMD doesn't support header-split at the moment.
4099 * Following the 4.6.7.2.1 chapter of the 82599/x540
4100 * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4101 * should be configured even if header split is not
4102 * enabled. We will configure it 128 bytes following the
4103 * recommendation in the spec.
4105 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4106 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4107 IXGBE_SRRCTL_BSIZEHDR_MASK;
4110 * TODO: Consider setting the Receive Descriptor Minimum
4111 * Threshold Size for an RSC case. This is not an obviously
4112 * beneficiary option but the one worth considering...
4115 rscctl |= IXGBE_RSCCTL_RSCEN;
4116 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4117 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4120 * RSC: Set ITR interval corresponding to 2K ints/s.
4122 * Full-sized RSC aggregations for a 10Gb/s link will
4123 * arrive at about 20K aggregation/s rate.
4125 * 2K inst/s rate will make only 10% of the
4126 * aggregations to be closed due to the interrupt timer
4127 * expiration for a streaming at wire-speed case.
4129 * For a sparse streaming case this setting will yield
4130 * at most 500us latency for a single RSC aggregation.
4132 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4133 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4135 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4136 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4137 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4138 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4141 * RSC requires the mapping of the queue to the
4144 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4149 PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4155 * Initializes Receive Unit.
4157 int __attribute__((cold))
4158 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4160 struct ixgbe_hw *hw;
4161 struct ixgbe_rx_queue *rxq;
4172 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4175 PMD_INIT_FUNC_TRACE();
4176 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4179 * Make sure receives are disabled while setting
4180 * up the RX context (registers, descriptor rings, etc.).
4182 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4183 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4185 /* Enable receipt of broadcasted frames */
4186 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4187 fctrl |= IXGBE_FCTRL_BAM;
4188 fctrl |= IXGBE_FCTRL_DPF;
4189 fctrl |= IXGBE_FCTRL_PMCF;
4190 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4193 * Configure CRC stripping, if any.
4195 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4196 if (rx_conf->hw_strip_crc)
4197 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4199 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4202 * Configure jumbo frame support, if any.
4204 if (rx_conf->jumbo_frame == 1) {
4205 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4206 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4207 maxfrs &= 0x0000FFFF;
4208 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4209 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4211 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4214 * If loopback mode is configured for 82599, set LPBK bit.
4216 if (hw->mac.type == ixgbe_mac_82599EB &&
4217 dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4218 hlreg0 |= IXGBE_HLREG0_LPBK;
4220 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4222 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4224 /* Setup RX queues */
4225 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4226 rxq = dev->data->rx_queues[i];
4229 * Reset crc_len in case it was changed after queue setup by a
4230 * call to configure.
4232 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4234 /* Setup the Base and Length of the Rx Descriptor Rings */
4235 bus_addr = rxq->rx_ring_phys_addr;
4236 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4237 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4238 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4239 (uint32_t)(bus_addr >> 32));
4240 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4241 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4242 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4243 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4245 /* Configure the SRRCTL register */
4246 #ifdef RTE_HEADER_SPLIT_ENABLE
4248 * Configure Header Split
4250 if (rx_conf->header_split) {
4251 if (hw->mac.type == ixgbe_mac_82599EB) {
4252 /* Must setup the PSRTYPE register */
4254 psrtype = IXGBE_PSRTYPE_TCPHDR |
4255 IXGBE_PSRTYPE_UDPHDR |
4256 IXGBE_PSRTYPE_IPV4HDR |
4257 IXGBE_PSRTYPE_IPV6HDR;
4258 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4260 srrctl = ((rx_conf->split_hdr_size <<
4261 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4262 IXGBE_SRRCTL_BSIZEHDR_MASK);
4263 srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4266 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4268 /* Set if packets are dropped when no descriptors available */
4270 srrctl |= IXGBE_SRRCTL_DROP_EN;
4273 * Configure the RX buffer size in the BSIZEPACKET field of
4274 * the SRRCTL register of the queue.
4275 * The value is in 1 KB resolution. Valid values can be from
4278 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4279 RTE_PKTMBUF_HEADROOM);
4280 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4281 IXGBE_SRRCTL_BSIZEPKT_MASK);
4283 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4285 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4286 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4288 /* It adds dual VLAN length for supporting dual VLAN */
4289 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4290 2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4291 dev->data->scattered_rx = 1;
4294 if (rx_conf->enable_scatter)
4295 dev->data->scattered_rx = 1;
4298 * Device configured with multiple RX queues.
4300 ixgbe_dev_mq_rx_configure(dev);
4303 * Setup the Checksum Register.
4304 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4305 * Enable IP/L4 checkum computation by hardware if requested to do so.
4307 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4308 rxcsum |= IXGBE_RXCSUM_PCSD;
4309 if (rx_conf->hw_ip_checksum)
4310 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4312 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4314 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4316 if (hw->mac.type == ixgbe_mac_82599EB ||
4317 hw->mac.type == ixgbe_mac_X540) {
4318 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4319 if (rx_conf->hw_strip_crc)
4320 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4322 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4323 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4324 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4327 rc = ixgbe_set_rsc(dev);
4331 ixgbe_set_rx_function(dev);
4337 * Initializes Transmit Unit.
4339 void __attribute__((cold))
4340 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4342 struct ixgbe_hw *hw;
4343 struct ixgbe_tx_queue *txq;
4349 PMD_INIT_FUNC_TRACE();
4350 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4352 /* Enable TX CRC (checksum offload requirement) and hw padding
4353 * (TSO requirement) */
4354 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4355 hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4356 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4358 /* Setup the Base and Length of the Tx Descriptor Rings */
4359 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4360 txq = dev->data->tx_queues[i];
4362 bus_addr = txq->tx_ring_phys_addr;
4363 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4364 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4365 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4366 (uint32_t)(bus_addr >> 32));
4367 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4368 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4369 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4370 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4371 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4374 * Disable Tx Head Writeback RO bit, since this hoses
4375 * bookkeeping if things aren't delivered in order.
4377 switch (hw->mac.type) {
4378 case ixgbe_mac_82598EB:
4379 txctrl = IXGBE_READ_REG(hw,
4380 IXGBE_DCA_TXCTRL(txq->reg_idx));
4381 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4382 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4386 case ixgbe_mac_82599EB:
4387 case ixgbe_mac_X540:
4388 case ixgbe_mac_X550:
4389 case ixgbe_mac_X550EM_x:
4391 txctrl = IXGBE_READ_REG(hw,
4392 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4393 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4394 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4400 /* Device configured with multiple TX queues. */
4401 ixgbe_dev_mq_tx_configure(dev);
4405 * Set up link for 82599 loopback mode Tx->Rx.
4407 static inline void __attribute__((cold))
4408 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4410 PMD_INIT_FUNC_TRACE();
4412 if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4413 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4415 PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4424 IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4425 ixgbe_reset_pipeline_82599(hw);
4427 hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4433 * Start Transmit and Receive Units.
4435 int __attribute__((cold))
4436 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4438 struct ixgbe_hw *hw;
4439 struct ixgbe_tx_queue *txq;
4440 struct ixgbe_rx_queue *rxq;
4447 PMD_INIT_FUNC_TRACE();
4448 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4450 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4451 txq = dev->data->tx_queues[i];
4452 /* Setup Transmit Threshold Registers */
4453 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4454 txdctl |= txq->pthresh & 0x7F;
4455 txdctl |= ((txq->hthresh & 0x7F) << 8);
4456 txdctl |= ((txq->wthresh & 0x7F) << 16);
4457 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4460 if (hw->mac.type != ixgbe_mac_82598EB) {
4461 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4462 dmatxctl |= IXGBE_DMATXCTL_TE;
4463 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4466 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4467 txq = dev->data->tx_queues[i];
4468 if (!txq->tx_deferred_start) {
4469 ret = ixgbe_dev_tx_queue_start(dev, i);
4475 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4476 rxq = dev->data->rx_queues[i];
4477 if (!rxq->rx_deferred_start) {
4478 ret = ixgbe_dev_rx_queue_start(dev, i);
4484 /* Enable Receive engine */
4485 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4486 if (hw->mac.type == ixgbe_mac_82598EB)
4487 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4488 rxctrl |= IXGBE_RXCTRL_RXEN;
4489 hw->mac.ops.enable_rx_dma(hw, rxctrl);
4491 /* If loopback mode is enabled for 82599, set up the link accordingly */
4492 if (hw->mac.type == ixgbe_mac_82599EB &&
4493 dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4494 ixgbe_setup_loopback_link_82599(hw);
4500 * Start Receive Units for specified queue.
4502 int __attribute__((cold))
4503 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4505 struct ixgbe_hw *hw;
4506 struct ixgbe_rx_queue *rxq;
4510 PMD_INIT_FUNC_TRACE();
4511 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4513 if (rx_queue_id < dev->data->nb_rx_queues) {
4514 rxq = dev->data->rx_queues[rx_queue_id];
4516 /* Allocate buffers for descriptor rings */
4517 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4518 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4522 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4523 rxdctl |= IXGBE_RXDCTL_ENABLE;
4524 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4526 /* Wait until RX Enable ready */
4527 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4530 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4531 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4533 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4536 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4537 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4538 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4546 * Stop Receive Units for specified queue.
4548 int __attribute__((cold))
4549 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4551 struct ixgbe_hw *hw;
4552 struct ixgbe_adapter *adapter =
4553 (struct ixgbe_adapter *)dev->data->dev_private;
4554 struct ixgbe_rx_queue *rxq;
4558 PMD_INIT_FUNC_TRACE();
4559 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4561 if (rx_queue_id < dev->data->nb_rx_queues) {
4562 rxq = dev->data->rx_queues[rx_queue_id];
4564 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4565 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4566 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4568 /* Wait until RX Enable ready */
4569 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4572 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4573 } while (--poll_ms && (rxdctl | IXGBE_RXDCTL_ENABLE));
4575 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4578 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4580 ixgbe_rx_queue_release_mbufs(rxq);
4581 ixgbe_reset_rx_queue(adapter, rxq);
4582 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4591 * Start Transmit Units for specified queue.
4593 int __attribute__((cold))
4594 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4596 struct ixgbe_hw *hw;
4597 struct ixgbe_tx_queue *txq;
4601 PMD_INIT_FUNC_TRACE();
4602 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4604 if (tx_queue_id < dev->data->nb_tx_queues) {
4605 txq = dev->data->tx_queues[tx_queue_id];
4606 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4607 txdctl |= IXGBE_TXDCTL_ENABLE;
4608 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4610 /* Wait until TX Enable ready */
4611 if (hw->mac.type == ixgbe_mac_82599EB) {
4612 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4615 txdctl = IXGBE_READ_REG(hw,
4616 IXGBE_TXDCTL(txq->reg_idx));
4617 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4619 PMD_INIT_LOG(ERR, "Could not enable "
4620 "Tx Queue %d", tx_queue_id);
4623 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4624 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4625 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4633 * Stop Transmit Units for specified queue.
4635 int __attribute__((cold))
4636 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4638 struct ixgbe_hw *hw;
4639 struct ixgbe_tx_queue *txq;
4641 uint32_t txtdh, txtdt;
4644 PMD_INIT_FUNC_TRACE();
4645 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4647 if (tx_queue_id < dev->data->nb_tx_queues) {
4648 txq = dev->data->tx_queues[tx_queue_id];
4650 /* Wait until TX queue is empty */
4651 if (hw->mac.type == ixgbe_mac_82599EB) {
4652 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4654 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4655 txtdh = IXGBE_READ_REG(hw,
4656 IXGBE_TDH(txq->reg_idx));
4657 txtdt = IXGBE_READ_REG(hw,
4658 IXGBE_TDT(txq->reg_idx));
4659 } while (--poll_ms && (txtdh != txtdt));
4661 PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
4662 "when stopping.", tx_queue_id);
4665 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4666 txdctl &= ~IXGBE_TXDCTL_ENABLE;
4667 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4669 /* Wait until TX Enable ready */
4670 if (hw->mac.type == ixgbe_mac_82599EB) {
4671 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4674 txdctl = IXGBE_READ_REG(hw,
4675 IXGBE_TXDCTL(txq->reg_idx));
4676 } while (--poll_ms && (txdctl | IXGBE_TXDCTL_ENABLE));
4678 PMD_INIT_LOG(ERR, "Could not disable "
4679 "Tx Queue %d", tx_queue_id);
4682 if (txq->ops != NULL) {
4683 txq->ops->release_mbufs(txq);
4684 txq->ops->reset(txq);
4686 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4694 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4695 struct rte_eth_rxq_info *qinfo)
4697 struct ixgbe_rx_queue *rxq;
4699 rxq = dev->data->rx_queues[queue_id];
4701 qinfo->mp = rxq->mb_pool;
4702 qinfo->scattered_rx = dev->data->scattered_rx;
4703 qinfo->nb_desc = rxq->nb_rx_desc;
4705 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
4706 qinfo->conf.rx_drop_en = rxq->drop_en;
4707 qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
4711 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4712 struct rte_eth_txq_info *qinfo)
4714 struct ixgbe_tx_queue *txq;
4716 txq = dev->data->tx_queues[queue_id];
4718 qinfo->nb_desc = txq->nb_tx_desc;
4720 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
4721 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
4722 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
4724 qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
4725 qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
4726 qinfo->conf.txq_flags = txq->txq_flags;
4727 qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
4731 * [VF] Initializes Receive Unit.
4733 int __attribute__((cold))
4734 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
4736 struct ixgbe_hw *hw;
4737 struct ixgbe_rx_queue *rxq;
4739 uint32_t srrctl, psrtype = 0;
4744 PMD_INIT_FUNC_TRACE();
4745 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4747 if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
4748 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4749 "it should be power of 2");
4753 if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
4754 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4755 "it should be equal to or less than %d",
4756 hw->mac.max_rx_queues);
4761 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
4762 * disables the VF receipt of packets if the PF MTU is > 1500.
4763 * This is done to deal with 82599 limitations that imposes
4764 * the PF and all VFs to share the same MTU.
4765 * Then, the PF driver enables again the VF receipt of packet when
4766 * the VF driver issues a IXGBE_VF_SET_LPE request.
4767 * In the meantime, the VF device cannot be used, even if the VF driver
4768 * and the Guest VM network stack are ready to accept packets with a
4769 * size up to the PF MTU.
4770 * As a work-around to this PF behaviour, force the call to
4771 * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
4772 * VF packets received can work in all cases.
4774 ixgbevf_rlpml_set_vf(hw,
4775 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
4777 /* Setup RX queues */
4778 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4779 rxq = dev->data->rx_queues[i];
4781 /* Allocate buffers for descriptor rings */
4782 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
4786 /* Setup the Base and Length of the Rx Descriptor Rings */
4787 bus_addr = rxq->rx_ring_phys_addr;
4789 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
4790 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4791 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
4792 (uint32_t)(bus_addr >> 32));
4793 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
4794 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4795 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
4796 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
4799 /* Configure the SRRCTL register */
4800 #ifdef RTE_HEADER_SPLIT_ENABLE
4802 * Configure Header Split
4804 if (dev->data->dev_conf.rxmode.header_split) {
4805 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
4806 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4807 IXGBE_SRRCTL_BSIZEHDR_MASK);
4808 srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4811 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4813 /* Set if packets are dropped when no descriptors available */
4815 srrctl |= IXGBE_SRRCTL_DROP_EN;
4818 * Configure the RX buffer size in the BSIZEPACKET field of
4819 * the SRRCTL register of the queue.
4820 * The value is in 1 KB resolution. Valid values can be from
4823 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4824 RTE_PKTMBUF_HEADROOM);
4825 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4826 IXGBE_SRRCTL_BSIZEPKT_MASK);
4829 * VF modification to write virtual function SRRCTL register
4831 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
4833 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4834 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4836 if (dev->data->dev_conf.rxmode.enable_scatter ||
4837 /* It adds dual VLAN length for supporting dual VLAN */
4838 (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4839 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
4840 if (!dev->data->scattered_rx)
4841 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
4842 dev->data->scattered_rx = 1;
4846 #ifdef RTE_HEADER_SPLIT_ENABLE
4847 if (dev->data->dev_conf.rxmode.header_split)
4848 /* Must setup the PSRTYPE register */
4849 psrtype = IXGBE_PSRTYPE_TCPHDR |
4850 IXGBE_PSRTYPE_UDPHDR |
4851 IXGBE_PSRTYPE_IPV4HDR |
4852 IXGBE_PSRTYPE_IPV6HDR;
4855 /* Set RQPL for VF RSS according to max Rx queue */
4856 psrtype |= (dev->data->nb_rx_queues >> 1) <<
4857 IXGBE_PSRTYPE_RQPL_SHIFT;
4858 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
4860 ixgbe_set_rx_function(dev);
4866 * [VF] Initializes Transmit Unit.
4868 void __attribute__((cold))
4869 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
4871 struct ixgbe_hw *hw;
4872 struct ixgbe_tx_queue *txq;
4877 PMD_INIT_FUNC_TRACE();
4878 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4880 /* Setup the Base and Length of the Tx Descriptor Rings */
4881 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4882 txq = dev->data->tx_queues[i];
4883 bus_addr = txq->tx_ring_phys_addr;
4884 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
4885 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4886 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
4887 (uint32_t)(bus_addr >> 32));
4888 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
4889 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4890 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4891 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
4892 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
4895 * Disable Tx Head Writeback RO bit, since this hoses
4896 * bookkeeping if things aren't delivered in order.
4898 txctrl = IXGBE_READ_REG(hw,
4899 IXGBE_VFDCA_TXCTRL(i));
4900 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4901 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
4907 * [VF] Start Transmit and Receive Units.
4909 void __attribute__((cold))
4910 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
4912 struct ixgbe_hw *hw;
4913 struct ixgbe_tx_queue *txq;
4914 struct ixgbe_rx_queue *rxq;
4920 PMD_INIT_FUNC_TRACE();
4921 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4923 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4924 txq = dev->data->tx_queues[i];
4925 /* Setup Transmit Threshold Registers */
4926 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4927 txdctl |= txq->pthresh & 0x7F;
4928 txdctl |= ((txq->hthresh & 0x7F) << 8);
4929 txdctl |= ((txq->wthresh & 0x7F) << 16);
4930 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4933 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4935 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4936 txdctl |= IXGBE_TXDCTL_ENABLE;
4937 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4940 /* Wait until TX Enable ready */
4943 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4944 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4946 PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
4948 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4950 rxq = dev->data->rx_queues[i];
4952 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4953 rxdctl |= IXGBE_RXDCTL_ENABLE;
4954 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
4956 /* Wait until RX Enable ready */
4960 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4961 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4963 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
4965 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
4970 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
4971 int __attribute__((weak))
4972 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
4977 uint16_t __attribute__((weak))
4978 ixgbe_recv_pkts_vec(
4979 void __rte_unused *rx_queue,
4980 struct rte_mbuf __rte_unused **rx_pkts,
4981 uint16_t __rte_unused nb_pkts)
4986 uint16_t __attribute__((weak))
4987 ixgbe_recv_scattered_pkts_vec(
4988 void __rte_unused *rx_queue,
4989 struct rte_mbuf __rte_unused **rx_pkts,
4990 uint16_t __rte_unused nb_pkts)
4995 int __attribute__((weak))
4996 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)