4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 * Copyright 2014 6WIND S.A.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_tailq.h>
58 #include <rte_per_lcore.h>
59 #include <rte_lcore.h>
60 #include <rte_atomic.h>
61 #include <rte_branch_prediction.h>
63 #include <rte_mempool.h>
64 #include <rte_malloc.h>
66 #include <rte_ether.h>
67 #include <rte_ethdev.h>
68 #include <rte_prefetch.h>
72 #include <rte_string_fns.h>
73 #include <rte_errno.h>
75 #include "ixgbe_logs.h"
76 #include "ixgbe/ixgbe_api.h"
77 #include "ixgbe/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "ixgbe/ixgbe_dcb.h"
80 #include "ixgbe/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
83 #define IXGBE_RSS_OFFLOAD_ALL ( \
89 ETH_RSS_IPV6_TCP_EX | \
94 /* Bit Mask to indicate what bits required for building TX context */
95 #define IXGBE_TX_OFFLOAD_MASK ( \
101 static inline struct rte_mbuf *
102 rte_rxmbuf_alloc(struct rte_mempool *mp)
106 m = __rte_mbuf_raw_alloc(mp);
107 __rte_mbuf_sanity_check_raw(m, 0);
113 #define RTE_PMD_USE_PREFETCH
116 #ifdef RTE_PMD_USE_PREFETCH
118 * Prefetch a cache line into all cache levels.
120 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
122 #define rte_ixgbe_prefetch(p) do {} while(0)
125 /*********************************************************************
129 **********************************************************************/
132 * Check for descriptors with their DD bit set and free mbufs.
133 * Return the total number of buffers freed.
135 static inline int __attribute__((always_inline))
136 ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
138 struct igb_tx_entry *txep;
142 /* check DD bit on threshold descriptor */
143 status = txq->tx_ring[txq->tx_next_dd].wb.status;
144 if (! (status & IXGBE_ADVTXD_STAT_DD))
148 * first buffer to free from S/W ring is at index
149 * tx_next_dd - (tx_rs_thresh-1)
151 txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
153 /* free buffers one at a time */
154 if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
155 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
156 txep->mbuf->next = NULL;
157 rte_mempool_put(txep->mbuf->pool, txep->mbuf);
161 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
162 rte_pktmbuf_free_seg(txep->mbuf);
167 /* buffers were freed, update counters */
168 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
169 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
170 if (txq->tx_next_dd >= txq->nb_tx_desc)
171 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
173 return txq->tx_rs_thresh;
176 /* Populate 4 descriptors with data from 4 mbufs */
178 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
180 uint64_t buf_dma_addr;
184 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
185 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
186 pkt_len = (*pkts)->data_len;
188 /* write data to descriptor */
189 txdp->read.buffer_addr = buf_dma_addr;
190 txdp->read.cmd_type_len =
191 ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
192 txdp->read.olinfo_status =
193 (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
194 rte_prefetch0(&(*pkts)->pool);
198 /* Populate 1 descriptor with data from 1 mbuf */
200 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
202 uint64_t buf_dma_addr;
205 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
206 pkt_len = (*pkts)->data_len;
208 /* write data to descriptor */
209 txdp->read.buffer_addr = buf_dma_addr;
210 txdp->read.cmd_type_len =
211 ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
212 txdp->read.olinfo_status =
213 (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
214 rte_prefetch0(&(*pkts)->pool);
218 * Fill H/W descriptor ring with mbuf data.
219 * Copy mbuf pointers to the S/W ring.
222 ixgbe_tx_fill_hw_ring(struct igb_tx_queue *txq, struct rte_mbuf **pkts,
225 volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
226 struct igb_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
227 const int N_PER_LOOP = 4;
228 const int N_PER_LOOP_MASK = N_PER_LOOP-1;
229 int mainpart, leftover;
233 * Process most of the packets in chunks of N pkts. Any
234 * leftover packets will get processed one at a time.
236 mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
237 leftover = (nb_pkts & ((uint32_t) N_PER_LOOP_MASK));
238 for (i = 0; i < mainpart; i += N_PER_LOOP) {
239 /* Copy N mbuf pointers to the S/W ring */
240 for (j = 0; j < N_PER_LOOP; ++j) {
241 (txep + i + j)->mbuf = *(pkts + i + j);
243 tx4(txdp + i, pkts + i);
246 if (unlikely(leftover > 0)) {
247 for (i = 0; i < leftover; ++i) {
248 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
249 tx1(txdp + mainpart + i, pkts + mainpart + i);
254 static inline uint16_t
255 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
258 struct igb_tx_queue *txq = (struct igb_tx_queue *)tx_queue;
259 volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
263 * Begin scanning the H/W ring for done descriptors when the
264 * number of available descriptors drops below tx_free_thresh. For
265 * each done descriptor, free the associated buffer.
267 if (txq->nb_tx_free < txq->tx_free_thresh)
268 ixgbe_tx_free_bufs(txq);
270 /* Only use descriptors that are available */
271 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
272 if (unlikely(nb_pkts == 0))
275 /* Use exactly nb_pkts descriptors */
276 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
279 * At this point, we know there are enough descriptors in the
280 * ring to transmit all the packets. This assumes that each
281 * mbuf contains a single segment, and that no new offloads
282 * are expected, which would require a new context descriptor.
286 * See if we're going to wrap-around. If so, handle the top
287 * of the descriptor ring first, then do the bottom. If not,
288 * the processing looks just like the "bottom" part anyway...
290 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
291 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
292 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
295 * We know that the last descriptor in the ring will need to
296 * have its RS bit set because tx_rs_thresh has to be
297 * a divisor of the ring size
299 tx_r[txq->tx_next_rs].read.cmd_type_len |=
300 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
301 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
306 /* Fill H/W descriptor ring with mbuf data */
307 ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
308 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
311 * Determine if RS bit should be set
312 * This is what we actually want:
313 * if ((txq->tx_tail - 1) >= txq->tx_next_rs)
314 * but instead of subtracting 1 and doing >=, we can just do
315 * greater than without subtracting.
317 if (txq->tx_tail > txq->tx_next_rs) {
318 tx_r[txq->tx_next_rs].read.cmd_type_len |=
319 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
320 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
322 if (txq->tx_next_rs >= txq->nb_tx_desc)
323 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
327 * Check for wrap-around. This would only happen if we used
328 * up to the last descriptor in the ring, no more, no less.
330 if (txq->tx_tail >= txq->nb_tx_desc)
333 /* update tail pointer */
335 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
341 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
346 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
347 if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
348 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
350 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
354 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
355 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
356 nb_tx = (uint16_t)(nb_tx + ret);
357 nb_pkts = (uint16_t)(nb_pkts - ret);
366 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
367 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
368 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
370 uint32_t type_tucmd_mlhl;
371 uint32_t mss_l4len_idx = 0;
373 uint32_t vlan_macip_lens;
374 union ixgbe_tx_offload tx_offload_mask;
376 ctx_idx = txq->ctx_curr;
377 tx_offload_mask.data = 0;
380 /* Specify which HW CTX to upload. */
381 mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
383 if (ol_flags & PKT_TX_VLAN_PKT) {
384 tx_offload_mask.vlan_tci |= ~0;
387 /* check if TCP segmentation required for this packet */
388 if (ol_flags & PKT_TX_TCP_SEG) {
389 /* implies IP cksum and TCP cksum */
390 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
391 IXGBE_ADVTXD_TUCMD_L4T_TCP |
392 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
394 tx_offload_mask.l2_len |= ~0;
395 tx_offload_mask.l3_len |= ~0;
396 tx_offload_mask.l4_len |= ~0;
397 tx_offload_mask.tso_segsz |= ~0;
398 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
399 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
400 } else { /* no TSO, check if hardware checksum is needed */
401 if (ol_flags & PKT_TX_IP_CKSUM) {
402 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
403 tx_offload_mask.l2_len |= ~0;
404 tx_offload_mask.l3_len |= ~0;
407 switch (ol_flags & PKT_TX_L4_MASK) {
408 case PKT_TX_UDP_CKSUM:
409 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
410 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
411 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
412 tx_offload_mask.l2_len |= ~0;
413 tx_offload_mask.l3_len |= ~0;
415 case PKT_TX_TCP_CKSUM:
416 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
417 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
418 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
419 tx_offload_mask.l2_len |= ~0;
420 tx_offload_mask.l3_len |= ~0;
421 tx_offload_mask.l4_len |= ~0;
423 case PKT_TX_SCTP_CKSUM:
424 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
425 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
427 tx_offload_mask.l2_len |= ~0;
428 tx_offload_mask.l3_len |= ~0;
431 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
432 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
437 txq->ctx_cache[ctx_idx].flags = ol_flags;
438 txq->ctx_cache[ctx_idx].tx_offload.data =
439 tx_offload_mask.data & tx_offload.data;
440 txq->ctx_cache[ctx_idx].tx_offload_mask = tx_offload_mask;
442 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
443 vlan_macip_lens = tx_offload.l3_len;
444 vlan_macip_lens |= (tx_offload.l2_len << IXGBE_ADVTXD_MACLEN_SHIFT);
445 vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
446 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
447 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
448 ctx_txd->seqnum_seed = 0;
452 * Check which hardware context can be used. Use the existing match
453 * or create a new context descriptor.
455 static inline uint32_t
456 what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
457 union ixgbe_tx_offload tx_offload)
459 /* If match with the current used context */
460 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
461 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
462 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
463 return txq->ctx_curr;
466 /* What if match with the next context */
468 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
469 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
470 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
471 return txq->ctx_curr;
474 /* Mismatch, use the previous context */
475 return (IXGBE_CTX_NUM);
478 static inline uint32_t
479 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
482 if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
483 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
484 if (ol_flags & PKT_TX_IP_CKSUM)
485 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
486 if (ol_flags & PKT_TX_TCP_SEG)
487 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
491 static inline uint32_t
492 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
494 uint32_t cmdtype = 0;
495 if (ol_flags & PKT_TX_VLAN_PKT)
496 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
497 if (ol_flags & PKT_TX_TCP_SEG)
498 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
502 /* Default RS bit threshold values */
503 #ifndef DEFAULT_TX_RS_THRESH
504 #define DEFAULT_TX_RS_THRESH 32
506 #ifndef DEFAULT_TX_FREE_THRESH
507 #define DEFAULT_TX_FREE_THRESH 32
510 /* Reset transmit descriptors after they have been used */
512 ixgbe_xmit_cleanup(struct igb_tx_queue *txq)
514 struct igb_tx_entry *sw_ring = txq->sw_ring;
515 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
516 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
517 uint16_t nb_tx_desc = txq->nb_tx_desc;
518 uint16_t desc_to_clean_to;
519 uint16_t nb_tx_to_clean;
521 /* Determine the last descriptor needing to be cleaned */
522 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
523 if (desc_to_clean_to >= nb_tx_desc)
524 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
526 /* Check to make sure the last descriptor to clean is done */
527 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
528 if (! (txr[desc_to_clean_to].wb.status & IXGBE_TXD_STAT_DD))
530 PMD_TX_FREE_LOG(DEBUG,
531 "TX descriptor %4u is not done"
532 "(port=%d queue=%d)",
534 txq->port_id, txq->queue_id);
535 /* Failed to clean any descriptors, better luck next time */
539 /* Figure out how many descriptors will be cleaned */
540 if (last_desc_cleaned > desc_to_clean_to)
541 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
544 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
547 PMD_TX_FREE_LOG(DEBUG,
548 "Cleaning %4u TX descriptors: %4u to %4u "
549 "(port=%d queue=%d)",
550 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
551 txq->port_id, txq->queue_id);
554 * The last descriptor to clean is done, so that means all the
555 * descriptors from the last descriptor that was cleaned
556 * up to the last descriptor with the RS bit set
557 * are done. Only reset the threshold descriptor.
559 txr[desc_to_clean_to].wb.status = 0;
561 /* Update the txq to reflect the last descriptor that was cleaned */
562 txq->last_desc_cleaned = desc_to_clean_to;
563 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
570 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
573 struct igb_tx_queue *txq;
574 struct igb_tx_entry *sw_ring;
575 struct igb_tx_entry *txe, *txn;
576 volatile union ixgbe_adv_tx_desc *txr;
577 volatile union ixgbe_adv_tx_desc *txd;
578 struct rte_mbuf *tx_pkt;
579 struct rte_mbuf *m_seg;
580 uint64_t buf_dma_addr;
581 uint32_t olinfo_status;
582 uint32_t cmd_type_len;
593 union ixgbe_tx_offload tx_offload = { .data = 0 };
596 sw_ring = txq->sw_ring;
598 tx_id = txq->tx_tail;
599 txe = &sw_ring[tx_id];
601 /* Determine if the descriptor ring needs to be cleaned. */
602 if ((txq->nb_tx_desc - txq->nb_tx_free) > txq->tx_free_thresh) {
603 ixgbe_xmit_cleanup(txq);
606 rte_prefetch0(&txe->mbuf->pool);
609 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
612 pkt_len = tx_pkt->pkt_len;
615 * Determine how many (if any) context descriptors
616 * are needed for offload functionality.
618 ol_flags = tx_pkt->ol_flags;
620 /* If hardware offload required */
621 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
623 tx_offload.l2_len = tx_pkt->l2_len;
624 tx_offload.l3_len = tx_pkt->l3_len;
625 tx_offload.l4_len = tx_pkt->l4_len;
626 tx_offload.vlan_tci = tx_pkt->vlan_tci;
627 tx_offload.tso_segsz = tx_pkt->tso_segsz;
629 /* If new context need be built or reuse the exist ctx. */
630 ctx = what_advctx_update(txq, tx_ol_req,
632 /* Only allocate context descriptor if required*/
633 new_ctx = (ctx == IXGBE_CTX_NUM);
638 * Keep track of how many descriptors are used this loop
639 * This will always be the number of segments + the number of
640 * Context descriptors required to transmit the packet
642 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
645 * The number of descriptors that must be allocated for a
646 * packet is the number of segments of that packet, plus 1
647 * Context Descriptor for the hardware offload, if any.
648 * Determine the last TX descriptor to allocate in the TX ring
649 * for the packet, starting from the current position (tx_id)
652 tx_last = (uint16_t) (tx_id + nb_used - 1);
655 if (tx_last >= txq->nb_tx_desc)
656 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
658 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
659 " tx_first=%u tx_last=%u",
660 (unsigned) txq->port_id,
661 (unsigned) txq->queue_id,
667 * Make sure there are enough TX descriptors available to
668 * transmit the entire packet.
669 * nb_used better be less than or equal to txq->tx_rs_thresh
671 if (nb_used > txq->nb_tx_free) {
672 PMD_TX_FREE_LOG(DEBUG,
673 "Not enough free TX descriptors "
674 "nb_used=%4u nb_free=%4u "
675 "(port=%d queue=%d)",
676 nb_used, txq->nb_tx_free,
677 txq->port_id, txq->queue_id);
679 if (ixgbe_xmit_cleanup(txq) != 0) {
680 /* Could not clean any descriptors */
686 /* nb_used better be <= txq->tx_rs_thresh */
687 if (unlikely(nb_used > txq->tx_rs_thresh)) {
688 PMD_TX_FREE_LOG(DEBUG,
689 "The number of descriptors needed to "
690 "transmit the packet exceeds the "
691 "RS bit threshold. This will impact "
693 "nb_used=%4u nb_free=%4u "
695 "(port=%d queue=%d)",
696 nb_used, txq->nb_tx_free,
698 txq->port_id, txq->queue_id);
700 * Loop here until there are enough TX
701 * descriptors or until the ring cannot be
704 while (nb_used > txq->nb_tx_free) {
705 if (ixgbe_xmit_cleanup(txq) != 0) {
707 * Could not clean any
719 * By now there are enough free TX descriptors to transmit
724 * Set common flags of all TX Data Descriptors.
726 * The following bits must be set in all Data Descriptors:
727 * - IXGBE_ADVTXD_DTYP_DATA
728 * - IXGBE_ADVTXD_DCMD_DEXT
730 * The following bits must be set in the first Data Descriptor
731 * and are ignored in the other ones:
732 * - IXGBE_ADVTXD_DCMD_IFCS
733 * - IXGBE_ADVTXD_MAC_1588
734 * - IXGBE_ADVTXD_DCMD_VLE
736 * The following bits must only be set in the last Data
738 * - IXGBE_TXD_CMD_EOP
740 * The following bits can be set in any Data Descriptor, but
741 * are only set in the last Data Descriptor:
744 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
745 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
747 #ifdef RTE_LIBRTE_IEEE1588
748 if (ol_flags & PKT_TX_IEEE1588_TMST)
749 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
755 if (ol_flags & PKT_TX_TCP_SEG) {
756 /* when TSO is on, paylen in descriptor is the
757 * not the packet len but the tcp payload len */
758 pkt_len -= (tx_offload.l2_len +
759 tx_offload.l3_len + tx_offload.l4_len);
763 * Setup the TX Advanced Context Descriptor if required
766 volatile struct ixgbe_adv_tx_context_desc *
769 ctx_txd = (volatile struct
770 ixgbe_adv_tx_context_desc *)
773 txn = &sw_ring[txe->next_id];
774 rte_prefetch0(&txn->mbuf->pool);
776 if (txe->mbuf != NULL) {
777 rte_pktmbuf_free_seg(txe->mbuf);
781 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
784 txe->last_id = tx_last;
785 tx_id = txe->next_id;
790 * Setup the TX Advanced Data Descriptor,
791 * This path will go through
792 * whatever new/reuse the context descriptor
794 cmd_type_len |= tx_desc_ol_flags_to_cmdtype(ol_flags);
795 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
796 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
799 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
804 txn = &sw_ring[txe->next_id];
805 rte_prefetch0(&txn->mbuf->pool);
807 if (txe->mbuf != NULL)
808 rte_pktmbuf_free_seg(txe->mbuf);
812 * Set up Transmit Data Descriptor.
814 slen = m_seg->data_len;
815 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
816 txd->read.buffer_addr =
817 rte_cpu_to_le_64(buf_dma_addr);
818 txd->read.cmd_type_len =
819 rte_cpu_to_le_32(cmd_type_len | slen);
820 txd->read.olinfo_status =
821 rte_cpu_to_le_32(olinfo_status);
822 txe->last_id = tx_last;
823 tx_id = txe->next_id;
826 } while (m_seg != NULL);
829 * The last packet data descriptor needs End Of Packet (EOP)
831 cmd_type_len |= IXGBE_TXD_CMD_EOP;
832 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
833 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
835 /* Set RS bit only on threshold packets' last descriptor */
836 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
837 PMD_TX_FREE_LOG(DEBUG,
838 "Setting RS bit on TXD id="
839 "%4u (port=%d queue=%d)",
840 tx_last, txq->port_id, txq->queue_id);
842 cmd_type_len |= IXGBE_TXD_CMD_RS;
844 /* Update txq RS bit counters */
847 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
853 * Set the Transmit Descriptor Tail (TDT)
855 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
856 (unsigned) txq->port_id, (unsigned) txq->queue_id,
857 (unsigned) tx_id, (unsigned) nb_tx);
858 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
859 txq->tx_tail = tx_id;
864 /*********************************************************************
868 **********************************************************************/
869 static inline uint64_t
870 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
874 static uint64_t ip_pkt_types_map[16] = {
875 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
876 PKT_RX_IPV6_HDR, 0, 0, 0,
877 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
878 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
881 static uint64_t ip_rss_types_map[16] = {
882 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
883 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
884 PKT_RX_RSS_HASH, 0, 0, 0,
885 0, 0, 0, PKT_RX_FDIR,
888 #ifdef RTE_LIBRTE_IEEE1588
889 static uint64_t ip_pkt_etqf_map[8] = {
890 0, 0, 0, PKT_RX_IEEE1588_PTP,
894 pkt_flags = (hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ?
895 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
896 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F];
898 pkt_flags = (hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ? 0 :
899 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F];
902 return pkt_flags | ip_rss_types_map[hl_tp_rs & 0xF];
905 static inline uint64_t
906 rx_desc_status_to_pkt_flags(uint32_t rx_status)
911 * Check if VLAN present only.
912 * Do not check whether L3/L4 rx checksum done by NIC or not,
913 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
915 pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
917 #ifdef RTE_LIBRTE_IEEE1588
918 if (rx_status & IXGBE_RXD_STAT_TMST)
919 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
924 static inline uint64_t
925 rx_desc_error_to_pkt_flags(uint32_t rx_status)
928 * Bit 31: IPE, IPv4 checksum error
929 * Bit 30: L4I, L4I integrity error
931 static uint64_t error_to_pkt_flags_map[4] = {
932 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
933 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
935 return error_to_pkt_flags_map[(rx_status >>
936 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
939 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
941 * LOOK_AHEAD defines how many desc statuses to check beyond the
942 * current descriptor.
943 * It must be a pound define for optimal performance.
944 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
945 * function only works with LOOK_AHEAD=8.
948 #if (LOOK_AHEAD != 8)
949 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
952 ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq)
954 volatile union ixgbe_adv_rx_desc *rxdp;
955 struct igb_rx_entry *rxep;
959 int s[LOOK_AHEAD], nb_dd;
963 /* get references to current descriptor and S/W ring entry */
964 rxdp = &rxq->rx_ring[rxq->rx_tail];
965 rxep = &rxq->sw_ring[rxq->rx_tail];
967 /* check to make sure there is at least 1 packet to receive */
968 if (! (rxdp->wb.upper.status_error & IXGBE_RXDADV_STAT_DD))
972 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
973 * reference packets that are ready to be received.
975 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
976 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
978 /* Read desc statuses backwards to avoid race condition */
979 for (j = LOOK_AHEAD-1; j >= 0; --j)
980 s[j] = rxdp[j].wb.upper.status_error;
982 /* Compute how many status bits were set */
984 for (j = 0; j < LOOK_AHEAD; ++j)
985 nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
989 /* Translate descriptor info to mbuf format */
990 for (j = 0; j < nb_dd; ++j) {
992 pkt_len = (uint16_t)(rxdp[j].wb.upper.length - rxq->crc_len);
993 mb->data_len = pkt_len;
994 mb->pkt_len = pkt_len;
995 mb->vlan_tci = rxdp[j].wb.upper.vlan;
996 mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
998 /* convert descriptor fields to rte mbuf flags */
999 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(
1000 rxdp[j].wb.lower.lo_dword.data);
1001 /* reuse status field from scan list */
1002 pkt_flags |= rx_desc_status_to_pkt_flags(s[j]);
1003 pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1004 mb->ol_flags = pkt_flags;
1006 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1007 mb->hash.rss = rxdp[j].wb.lower.hi_dword.rss;
1008 else if (pkt_flags & PKT_RX_FDIR) {
1009 mb->hash.fdir.hash =
1010 (uint16_t)((rxdp[j].wb.lower.hi_dword.csum_ip.csum)
1011 & IXGBE_ATR_HASH_MASK);
1012 mb->hash.fdir.id = rxdp[j].wb.lower.hi_dword.csum_ip.ip_id;
1016 /* Move mbuf pointers from the S/W ring to the stage */
1017 for (j = 0; j < LOOK_AHEAD; ++j) {
1018 rxq->rx_stage[i + j] = rxep[j].mbuf;
1021 /* stop if all requested packets could not be received */
1022 if (nb_dd != LOOK_AHEAD)
1026 /* clear software ring entries so we can cleanup correctly */
1027 for (i = 0; i < nb_rx; ++i) {
1028 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1036 ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
1038 volatile union ixgbe_adv_rx_desc *rxdp;
1039 struct igb_rx_entry *rxep;
1040 struct rte_mbuf *mb;
1045 /* allocate buffers in bulk directly into the S/W ring */
1046 alloc_idx = (uint16_t)(rxq->rx_free_trigger -
1047 (rxq->rx_free_thresh - 1));
1048 rxep = &rxq->sw_ring[alloc_idx];
1049 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1050 rxq->rx_free_thresh);
1051 if (unlikely(diag != 0))
1054 rxdp = &rxq->rx_ring[alloc_idx];
1055 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1056 /* populate the static rte mbuf fields */
1058 rte_mbuf_refcnt_set(mb, 1);
1060 mb->data_off = RTE_PKTMBUF_HEADROOM;
1062 mb->port = rxq->port_id;
1064 /* populate the descriptors */
1065 dma_addr = (uint64_t)mb->buf_physaddr + RTE_PKTMBUF_HEADROOM;
1066 rxdp[i].read.hdr_addr = dma_addr;
1067 rxdp[i].read.pkt_addr = dma_addr;
1070 /* update tail pointer */
1072 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rxq->rx_free_trigger);
1074 /* update state of internal queue structure */
1075 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_trigger +
1076 rxq->rx_free_thresh);
1077 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1078 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
1084 static inline uint16_t
1085 ixgbe_rx_fill_from_stage(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1088 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1091 /* how many packets are ready to return? */
1092 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1094 /* copy mbuf pointers to the application's packet list */
1095 for (i = 0; i < nb_pkts; ++i)
1096 rx_pkts[i] = stage[i];
1098 /* update internal queue state */
1099 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1100 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1105 static inline uint16_t
1106 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1109 struct igb_rx_queue *rxq = (struct igb_rx_queue *)rx_queue;
1112 /* Any previously recv'd pkts will be returned from the Rx stage */
1113 if (rxq->rx_nb_avail)
1114 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1116 /* Scan the H/W ring for packets to receive */
1117 nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1119 /* update internal queue state */
1120 rxq->rx_next_avail = 0;
1121 rxq->rx_nb_avail = nb_rx;
1122 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1124 /* if required, allocate new buffers to replenish descriptors */
1125 if (rxq->rx_tail > rxq->rx_free_trigger) {
1126 if (ixgbe_rx_alloc_bufs(rxq) != 0) {
1128 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1129 "queue_id=%u", (unsigned) rxq->port_id,
1130 (unsigned) rxq->queue_id);
1132 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1133 rxq->rx_free_thresh;
1136 * Need to rewind any previous receives if we cannot
1137 * allocate new buffers to replenish the old ones.
1139 rxq->rx_nb_avail = 0;
1140 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1141 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1142 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1148 if (rxq->rx_tail >= rxq->nb_rx_desc)
1151 /* received any packets this loop? */
1152 if (rxq->rx_nb_avail)
1153 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1158 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1160 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1165 if (unlikely(nb_pkts == 0))
1168 if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1169 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1171 /* request is relatively large, chunk it up */
1175 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1176 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1177 nb_rx = (uint16_t)(nb_rx + ret);
1178 nb_pkts = (uint16_t)(nb_pkts - ret);
1185 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
1188 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1191 struct igb_rx_queue *rxq;
1192 volatile union ixgbe_adv_rx_desc *rx_ring;
1193 volatile union ixgbe_adv_rx_desc *rxdp;
1194 struct igb_rx_entry *sw_ring;
1195 struct igb_rx_entry *rxe;
1196 struct rte_mbuf *rxm;
1197 struct rte_mbuf *nmb;
1198 union ixgbe_adv_rx_desc rxd;
1201 uint32_t hlen_type_rss;
1211 rx_id = rxq->rx_tail;
1212 rx_ring = rxq->rx_ring;
1213 sw_ring = rxq->sw_ring;
1214 while (nb_rx < nb_pkts) {
1216 * The order of operations here is important as the DD status
1217 * bit must not be read after any other descriptor fields.
1218 * rx_ring and rxdp are pointing to volatile data so the order
1219 * of accesses cannot be reordered by the compiler. If they were
1220 * not volatile, they could be reordered which could lead to
1221 * using invalid descriptor fields when read from rxd.
1223 rxdp = &rx_ring[rx_id];
1224 staterr = rxdp->wb.upper.status_error;
1225 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1232 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1233 * is likely to be invalid and to be dropped by the various
1234 * validation checks performed by the network stack.
1236 * Allocate a new mbuf to replenish the RX ring descriptor.
1237 * If the allocation fails:
1238 * - arrange for that RX descriptor to be the first one
1239 * being parsed the next time the receive function is
1240 * invoked [on the same queue].
1242 * - Stop parsing the RX ring and return immediately.
1244 * This policy do not drop the packet received in the RX
1245 * descriptor for which the allocation of a new mbuf failed.
1246 * Thus, it allows that packet to be later retrieved if
1247 * mbuf have been freed in the mean time.
1248 * As a side effect, holding RX descriptors instead of
1249 * systematically giving them back to the NIC may lead to
1250 * RX ring exhaustion situations.
1251 * However, the NIC can gracefully prevent such situations
1252 * to happen by sending specific "back-pressure" flow control
1253 * frames to its peer(s).
1255 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1256 "ext_err_stat=0x%08x pkt_len=%u",
1257 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1258 (unsigned) rx_id, (unsigned) staterr,
1259 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1261 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1263 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1264 "queue_id=%u", (unsigned) rxq->port_id,
1265 (unsigned) rxq->queue_id);
1266 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1271 rxe = &sw_ring[rx_id];
1273 if (rx_id == rxq->nb_rx_desc)
1276 /* Prefetch next mbuf while processing current one. */
1277 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1280 * When next RX descriptor is on a cache-line boundary,
1281 * prefetch the next 4 RX descriptors and the next 8 pointers
1284 if ((rx_id & 0x3) == 0) {
1285 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1286 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1292 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1293 rxdp->read.hdr_addr = dma_addr;
1294 rxdp->read.pkt_addr = dma_addr;
1297 * Initialize the returned mbuf.
1298 * 1) setup generic mbuf fields:
1299 * - number of segments,
1302 * - RX port identifier.
1303 * 2) integrate hardware offload data, if any:
1304 * - RSS flag & hash,
1305 * - IP checksum flag,
1306 * - VLAN TCI, if any,
1309 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1311 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1312 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1315 rxm->pkt_len = pkt_len;
1316 rxm->data_len = pkt_len;
1317 rxm->port = rxq->port_id;
1319 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1320 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1321 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1323 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1324 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1325 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1326 rxm->ol_flags = pkt_flags;
1328 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1329 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
1330 else if (pkt_flags & PKT_RX_FDIR) {
1331 rxm->hash.fdir.hash =
1332 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1333 & IXGBE_ATR_HASH_MASK);
1334 rxm->hash.fdir.id = rxd.wb.lower.hi_dword.csum_ip.ip_id;
1337 * Store the mbuf address into the next entry of the array
1338 * of returned packets.
1340 rx_pkts[nb_rx++] = rxm;
1342 rxq->rx_tail = rx_id;
1345 * If the number of free RX descriptors is greater than the RX free
1346 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1348 * Update the RDT with the value of the last processed RX descriptor
1349 * minus 1, to guarantee that the RDT register is never equal to the
1350 * RDH register, which creates a "full" ring situtation from the
1351 * hardware point of view...
1353 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1354 if (nb_hold > rxq->rx_free_thresh) {
1355 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1356 "nb_hold=%u nb_rx=%u",
1357 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1358 (unsigned) rx_id, (unsigned) nb_hold,
1360 rx_id = (uint16_t) ((rx_id == 0) ?
1361 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1362 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1365 rxq->nb_rx_hold = nb_hold;
1370 ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1373 struct igb_rx_queue *rxq;
1374 volatile union ixgbe_adv_rx_desc *rx_ring;
1375 volatile union ixgbe_adv_rx_desc *rxdp;
1376 struct igb_rx_entry *sw_ring;
1377 struct igb_rx_entry *rxe;
1378 struct rte_mbuf *first_seg;
1379 struct rte_mbuf *last_seg;
1380 struct rte_mbuf *rxm;
1381 struct rte_mbuf *nmb;
1382 union ixgbe_adv_rx_desc rxd;
1383 uint64_t dma; /* Physical address of mbuf data buffer */
1385 uint32_t hlen_type_rss;
1395 rx_id = rxq->rx_tail;
1396 rx_ring = rxq->rx_ring;
1397 sw_ring = rxq->sw_ring;
1400 * Retrieve RX context of current packet, if any.
1402 first_seg = rxq->pkt_first_seg;
1403 last_seg = rxq->pkt_last_seg;
1405 while (nb_rx < nb_pkts) {
1408 * The order of operations here is important as the DD status
1409 * bit must not be read after any other descriptor fields.
1410 * rx_ring and rxdp are pointing to volatile data so the order
1411 * of accesses cannot be reordered by the compiler. If they were
1412 * not volatile, they could be reordered which could lead to
1413 * using invalid descriptor fields when read from rxd.
1415 rxdp = &rx_ring[rx_id];
1416 staterr = rxdp->wb.upper.status_error;
1417 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1424 * Allocate a new mbuf to replenish the RX ring descriptor.
1425 * If the allocation fails:
1426 * - arrange for that RX descriptor to be the first one
1427 * being parsed the next time the receive function is
1428 * invoked [on the same queue].
1430 * - Stop parsing the RX ring and return immediately.
1432 * This policy does not drop the packet received in the RX
1433 * descriptor for which the allocation of a new mbuf failed.
1434 * Thus, it allows that packet to be later retrieved if
1435 * mbuf have been freed in the mean time.
1436 * As a side effect, holding RX descriptors instead of
1437 * systematically giving them back to the NIC may lead to
1438 * RX ring exhaustion situations.
1439 * However, the NIC can gracefully prevent such situations
1440 * to happen by sending specific "back-pressure" flow control
1441 * frames to its peer(s).
1443 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1444 "staterr=0x%x data_len=%u",
1445 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1446 (unsigned) rx_id, (unsigned) staterr,
1447 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1449 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1451 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1452 "queue_id=%u", (unsigned) rxq->port_id,
1453 (unsigned) rxq->queue_id);
1454 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1459 rxe = &sw_ring[rx_id];
1461 if (rx_id == rxq->nb_rx_desc)
1464 /* Prefetch next mbuf while processing current one. */
1465 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1468 * When next RX descriptor is on a cache-line boundary,
1469 * prefetch the next 4 RX descriptors and the next 8 pointers
1472 if ((rx_id & 0x3) == 0) {
1473 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1474 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1478 * Update RX descriptor with the physical address of the new
1479 * data buffer of the new allocated mbuf.
1483 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1484 rxdp->read.hdr_addr = dma;
1485 rxdp->read.pkt_addr = dma;
1488 * Set data length & data buffer address of mbuf.
1490 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1491 rxm->data_len = data_len;
1492 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1495 * If this is the first buffer of the received packet,
1496 * set the pointer to the first mbuf of the packet and
1497 * initialize its context.
1498 * Otherwise, update the total length and the number of segments
1499 * of the current scattered packet, and update the pointer to
1500 * the last mbuf of the current packet.
1502 if (first_seg == NULL) {
1504 first_seg->pkt_len = data_len;
1505 first_seg->nb_segs = 1;
1507 first_seg->pkt_len = (uint16_t)(first_seg->pkt_len
1509 first_seg->nb_segs++;
1510 last_seg->next = rxm;
1514 * If this is not the last buffer of the received packet,
1515 * update the pointer to the last mbuf of the current scattered
1516 * packet and continue to parse the RX ring.
1518 if (! (staterr & IXGBE_RXDADV_STAT_EOP)) {
1524 * This is the last buffer of the received packet.
1525 * If the CRC is not stripped by the hardware:
1526 * - Subtract the CRC length from the total packet length.
1527 * - If the last buffer only contains the whole CRC or a part
1528 * of it, free the mbuf associated to the last buffer.
1529 * If part of the CRC is also contained in the previous
1530 * mbuf, subtract the length of that CRC part from the
1531 * data length of the previous mbuf.
1534 if (unlikely(rxq->crc_len > 0)) {
1535 first_seg->pkt_len -= ETHER_CRC_LEN;
1536 if (data_len <= ETHER_CRC_LEN) {
1537 rte_pktmbuf_free_seg(rxm);
1538 first_seg->nb_segs--;
1539 last_seg->data_len = (uint16_t)
1540 (last_seg->data_len -
1541 (ETHER_CRC_LEN - data_len));
1542 last_seg->next = NULL;
1545 (uint16_t) (data_len - ETHER_CRC_LEN);
1549 * Initialize the first mbuf of the returned packet:
1550 * - RX port identifier,
1551 * - hardware offload data, if any:
1552 * - RSS flag & hash,
1553 * - IP checksum flag,
1554 * - VLAN TCI, if any,
1557 first_seg->port = rxq->port_id;
1560 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1561 * set in the pkt_flags field.
1563 first_seg->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1564 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1565 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1566 pkt_flags = (pkt_flags |
1567 rx_desc_status_to_pkt_flags(staterr));
1568 pkt_flags = (pkt_flags |
1569 rx_desc_error_to_pkt_flags(staterr));
1570 first_seg->ol_flags = pkt_flags;
1572 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1573 first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1574 else if (pkt_flags & PKT_RX_FDIR) {
1575 first_seg->hash.fdir.hash =
1576 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1577 & IXGBE_ATR_HASH_MASK);
1578 first_seg->hash.fdir.id =
1579 rxd.wb.lower.hi_dword.csum_ip.ip_id;
1582 /* Prefetch data of first segment, if configured to do so. */
1583 rte_packet_prefetch((char *)first_seg->buf_addr +
1584 first_seg->data_off);
1587 * Store the mbuf address into the next entry of the array
1588 * of returned packets.
1590 rx_pkts[nb_rx++] = first_seg;
1593 * Setup receipt context for a new packet.
1599 * Record index of the next RX descriptor to probe.
1601 rxq->rx_tail = rx_id;
1604 * Save receive context.
1606 rxq->pkt_first_seg = first_seg;
1607 rxq->pkt_last_seg = last_seg;
1610 * If the number of free RX descriptors is greater than the RX free
1611 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1613 * Update the RDT with the value of the last processed RX descriptor
1614 * minus 1, to guarantee that the RDT register is never equal to the
1615 * RDH register, which creates a "full" ring situtation from the
1616 * hardware point of view...
1618 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1619 if (nb_hold > rxq->rx_free_thresh) {
1620 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1621 "nb_hold=%u nb_rx=%u",
1622 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1623 (unsigned) rx_id, (unsigned) nb_hold,
1625 rx_id = (uint16_t) ((rx_id == 0) ?
1626 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1627 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1630 rxq->nb_rx_hold = nb_hold;
1634 /*********************************************************************
1636 * Queue management functions
1638 **********************************************************************/
1641 * Rings setup and release.
1643 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1644 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
1645 * also optimize cache line size effect. H/W supports up to cache line size 128.
1647 #define IXGBE_ALIGN 128
1650 * Maximum number of Ring Descriptors.
1652 * Since RDLEN/TDLEN should be multiple of 128 bytes, the number of ring
1653 * descriptors should meet the following condition:
1654 * (num_ring_desc * sizeof(rx/tx descriptor)) % 128 == 0
1656 #define IXGBE_MIN_RING_DESC 32
1657 #define IXGBE_MAX_RING_DESC 4096
1660 * Create memzone for HW rings. malloc can't be used as the physical address is
1661 * needed. If the memzone is already created, then this function returns a ptr
1664 static const struct rte_memzone *
1665 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1666 uint16_t queue_id, uint32_t ring_size, int socket_id)
1668 char z_name[RTE_MEMZONE_NAMESIZE];
1669 const struct rte_memzone *mz;
1671 snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1672 dev->driver->pci_drv.name, ring_name,
1673 dev->data->port_id, queue_id);
1675 mz = rte_memzone_lookup(z_name);
1679 #ifdef RTE_LIBRTE_XEN_DOM0
1680 return rte_memzone_reserve_bounded(z_name, ring_size,
1681 socket_id, 0, IXGBE_ALIGN, RTE_PGSIZE_2M);
1683 return rte_memzone_reserve_aligned(z_name, ring_size,
1684 socket_id, 0, IXGBE_ALIGN);
1689 ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1693 if (txq->sw_ring != NULL) {
1694 for (i = 0; i < txq->nb_tx_desc; i++) {
1695 if (txq->sw_ring[i].mbuf != NULL) {
1696 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1697 txq->sw_ring[i].mbuf = NULL;
1704 ixgbe_tx_free_swring(struct igb_tx_queue *txq)
1707 txq->sw_ring != NULL)
1708 rte_free(txq->sw_ring);
1712 ixgbe_tx_queue_release(struct igb_tx_queue *txq)
1714 if (txq != NULL && txq->ops != NULL) {
1715 txq->ops->release_mbufs(txq);
1716 txq->ops->free_swring(txq);
1722 ixgbe_dev_tx_queue_release(void *txq)
1724 ixgbe_tx_queue_release(txq);
1727 /* (Re)set dynamic igb_tx_queue fields to defaults */
1729 ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
1731 static const union ixgbe_adv_tx_desc zeroed_desc = { .read = {
1733 struct igb_tx_entry *txe = txq->sw_ring;
1736 /* Zero out HW ring memory */
1737 for (i = 0; i < txq->nb_tx_desc; i++) {
1738 txq->tx_ring[i] = zeroed_desc;
1741 /* Initialize SW ring entries */
1742 prev = (uint16_t) (txq->nb_tx_desc - 1);
1743 for (i = 0; i < txq->nb_tx_desc; i++) {
1744 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1745 txd->wb.status = IXGBE_TXD_STAT_DD;
1748 txe[prev].next_id = i;
1752 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
1753 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1756 txq->nb_tx_used = 0;
1758 * Always allow 1 descriptor to be un-allocated to avoid
1759 * a H/W race condition
1761 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1762 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1764 memset((void*)&txq->ctx_cache, 0,
1765 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1768 static struct ixgbe_txq_ops def_txq_ops = {
1769 .release_mbufs = ixgbe_tx_queue_release_mbufs,
1770 .free_swring = ixgbe_tx_free_swring,
1771 .reset = ixgbe_reset_tx_queue,
1774 /* Takes an ethdev and a queue and sets up the tx function to be used based on
1775 * the queue parameters. Used in tx_queue_setup by primary process and then
1776 * in dev_init by secondary process when attaching to an existing ethdev.
1779 set_tx_function(struct rte_eth_dev *dev, struct igb_tx_queue *txq)
1781 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1782 if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
1783 && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
1784 PMD_INIT_LOG(INFO, "Using simple tx code path");
1785 #ifdef RTE_IXGBE_INC_VECTOR
1786 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
1787 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
1788 ixgbe_txq_vec_setup(txq) == 0)) {
1789 PMD_INIT_LOG(INFO, "Vector tx enabled.");
1790 dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
1793 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
1795 PMD_INIT_LOG(INFO, "Using full-featured tx code path");
1797 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
1798 (unsigned long)txq->txq_flags,
1799 (unsigned long)IXGBE_SIMPLE_FLAGS);
1801 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
1802 (unsigned long)txq->tx_rs_thresh,
1803 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
1804 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1809 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1812 unsigned int socket_id,
1813 const struct rte_eth_txconf *tx_conf)
1815 const struct rte_memzone *tz;
1816 struct igb_tx_queue *txq;
1817 struct ixgbe_hw *hw;
1818 uint16_t tx_rs_thresh, tx_free_thresh;
1820 PMD_INIT_FUNC_TRACE();
1821 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1824 * Validate number of transmit descriptors.
1825 * It must not exceed hardware maximum, and must be multiple
1828 if (((nb_desc * sizeof(union ixgbe_adv_tx_desc)) % IXGBE_ALIGN) != 0 ||
1829 (nb_desc > IXGBE_MAX_RING_DESC) ||
1830 (nb_desc < IXGBE_MIN_RING_DESC)) {
1835 * The following two parameters control the setting of the RS bit on
1836 * transmit descriptors.
1837 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
1838 * descriptors have been used.
1839 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
1840 * descriptors are used or if the number of descriptors required
1841 * to transmit a packet is greater than the number of free TX
1843 * The following constraints must be satisfied:
1844 * tx_rs_thresh must be greater than 0.
1845 * tx_rs_thresh must be less than the size of the ring minus 2.
1846 * tx_rs_thresh must be less than or equal to tx_free_thresh.
1847 * tx_rs_thresh must be a divisor of the ring size.
1848 * tx_free_thresh must be greater than 0.
1849 * tx_free_thresh must be less than the size of the ring minus 3.
1850 * One descriptor in the TX ring is used as a sentinel to avoid a
1851 * H/W race condition, hence the maximum threshold constraints.
1852 * When set to zero use default values.
1854 tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
1855 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
1856 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
1857 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
1858 if (tx_rs_thresh >= (nb_desc - 2)) {
1859 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
1860 "of TX descriptors minus 2. (tx_rs_thresh=%u "
1861 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
1862 (int)dev->data->port_id, (int)queue_idx);
1865 if (tx_free_thresh >= (nb_desc - 3)) {
1866 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
1867 "tx_free_thresh must be less than the number of "
1868 "TX descriptors minus 3. (tx_free_thresh=%u "
1869 "port=%d queue=%d)",
1870 (unsigned int)tx_free_thresh,
1871 (int)dev->data->port_id, (int)queue_idx);
1874 if (tx_rs_thresh > tx_free_thresh) {
1875 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
1876 "tx_free_thresh. (tx_free_thresh=%u "
1877 "tx_rs_thresh=%u port=%d queue=%d)",
1878 (unsigned int)tx_free_thresh,
1879 (unsigned int)tx_rs_thresh,
1880 (int)dev->data->port_id,
1884 if ((nb_desc % tx_rs_thresh) != 0) {
1885 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
1886 "number of TX descriptors. (tx_rs_thresh=%u "
1887 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
1888 (int)dev->data->port_id, (int)queue_idx);
1893 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
1894 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
1895 * by the NIC and all descriptors are written back after the NIC
1896 * accumulates WTHRESH descriptors.
1898 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
1899 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
1900 "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
1901 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
1902 (int)dev->data->port_id, (int)queue_idx);
1906 /* Free memory prior to re-allocation if needed... */
1907 if (dev->data->tx_queues[queue_idx] != NULL) {
1908 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
1909 dev->data->tx_queues[queue_idx] = NULL;
1912 /* First allocate the tx queue data structure */
1913 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct igb_tx_queue),
1914 RTE_CACHE_LINE_SIZE, socket_id);
1919 * Allocate TX ring hardware descriptors. A memzone large enough to
1920 * handle the maximum ring size is allocated in order to allow for
1921 * resizing in later calls to the queue setup function.
1923 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1924 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
1927 ixgbe_tx_queue_release(txq);
1931 txq->nb_tx_desc = nb_desc;
1932 txq->tx_rs_thresh = tx_rs_thresh;
1933 txq->tx_free_thresh = tx_free_thresh;
1934 txq->pthresh = tx_conf->tx_thresh.pthresh;
1935 txq->hthresh = tx_conf->tx_thresh.hthresh;
1936 txq->wthresh = tx_conf->tx_thresh.wthresh;
1937 txq->queue_id = queue_idx;
1938 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1939 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1940 txq->port_id = dev->data->port_id;
1941 txq->txq_flags = tx_conf->txq_flags;
1942 txq->ops = &def_txq_ops;
1943 txq->tx_deferred_start = tx_conf->tx_deferred_start;
1946 * Modification to set VFTDT for virtual function if vf is detected
1948 if (hw->mac.type == ixgbe_mac_82599_vf ||
1949 hw->mac.type == ixgbe_mac_X540_vf)
1950 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
1952 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
1953 #ifndef RTE_LIBRTE_XEN_DOM0
1954 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1956 txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
1958 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
1960 /* Allocate software ring */
1961 txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
1962 sizeof(struct igb_tx_entry) * nb_desc,
1963 RTE_CACHE_LINE_SIZE, socket_id);
1964 if (txq->sw_ring == NULL) {
1965 ixgbe_tx_queue_release(txq);
1968 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1969 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1971 /* set up vector or scalar TX function as appropriate */
1972 set_tx_function(dev, txq);
1974 txq->ops->reset(txq);
1976 dev->data->tx_queues[queue_idx] = txq;
1983 ixgbe_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1987 if (rxq->sw_ring != NULL) {
1988 for (i = 0; i < rxq->nb_rx_desc; i++) {
1989 if (rxq->sw_ring[i].mbuf != NULL) {
1990 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1991 rxq->sw_ring[i].mbuf = NULL;
1994 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
1995 if (rxq->rx_nb_avail) {
1996 for (i = 0; i < rxq->rx_nb_avail; ++i) {
1997 struct rte_mbuf *mb;
1998 mb = rxq->rx_stage[rxq->rx_next_avail + i];
1999 rte_pktmbuf_free_seg(mb);
2001 rxq->rx_nb_avail = 0;
2008 ixgbe_rx_queue_release(struct igb_rx_queue *rxq)
2011 ixgbe_rx_queue_release_mbufs(rxq);
2012 rte_free(rxq->sw_ring);
2018 ixgbe_dev_rx_queue_release(void *rxq)
2020 ixgbe_rx_queue_release(rxq);
2024 * Check if Rx Burst Bulk Alloc function can be used.
2026 * 0: the preconditions are satisfied and the bulk allocation function
2028 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2029 * function must be used.
2032 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2033 check_rx_burst_bulk_alloc_preconditions(struct igb_rx_queue *rxq)
2035 check_rx_burst_bulk_alloc_preconditions(__rte_unused struct igb_rx_queue *rxq)
2041 * Make sure the following pre-conditions are satisfied:
2042 * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2043 * rxq->rx_free_thresh < rxq->nb_rx_desc
2044 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2045 * rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2046 * Scattered packets are not supported. This should be checked
2047 * outside of this function.
2049 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2050 if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2051 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2052 "rxq->rx_free_thresh=%d, "
2053 "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2054 rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2056 } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2057 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2058 "rxq->rx_free_thresh=%d, "
2059 "rxq->nb_rx_desc=%d",
2060 rxq->rx_free_thresh, rxq->nb_rx_desc);
2062 } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2063 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2064 "rxq->nb_rx_desc=%d, "
2065 "rxq->rx_free_thresh=%d",
2066 rxq->nb_rx_desc, rxq->rx_free_thresh);
2068 } else if (!(rxq->nb_rx_desc <
2069 (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2070 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2071 "rxq->nb_rx_desc=%d, "
2072 "IXGBE_MAX_RING_DESC=%d, "
2073 "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2074 rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2075 RTE_PMD_IXGBE_RX_MAX_BURST);
2085 /* Reset dynamic igb_rx_queue fields back to defaults */
2087 ixgbe_reset_rx_queue(struct igb_rx_queue *rxq)
2089 static const union ixgbe_adv_rx_desc zeroed_desc = { .read = {
2095 * By default, the Rx queue setup function allocates enough memory for
2096 * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires
2097 * extra memory at the end of the descriptor ring to be zero'd out. A
2098 * pre-condition for using the Rx burst bulk alloc function is that the
2099 * number of descriptors is less than or equal to
2100 * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2101 * constraints here to see if we need to zero out memory after the end
2102 * of the H/W descriptor ring.
2104 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2105 if (check_rx_burst_bulk_alloc_preconditions(rxq) == 0)
2106 /* zero out extra memory */
2107 len = (uint16_t)(rxq->nb_rx_desc + RTE_PMD_IXGBE_RX_MAX_BURST);
2110 /* do not zero out extra memory */
2111 len = rxq->nb_rx_desc;
2114 * Zero out HW ring memory. Zero out extra memory at the end of
2115 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2116 * reads extra memory as zeros.
2118 for (i = 0; i < len; i++) {
2119 rxq->rx_ring[i] = zeroed_desc;
2122 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2124 * initialize extra software ring entries. Space for these extra
2125 * entries is always allocated
2127 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2128 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST; ++i) {
2129 rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
2132 rxq->rx_nb_avail = 0;
2133 rxq->rx_next_avail = 0;
2134 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2135 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
2137 rxq->nb_rx_hold = 0;
2138 rxq->pkt_first_seg = NULL;
2139 rxq->pkt_last_seg = NULL;
2143 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2146 unsigned int socket_id,
2147 const struct rte_eth_rxconf *rx_conf,
2148 struct rte_mempool *mp)
2150 const struct rte_memzone *rz;
2151 struct igb_rx_queue *rxq;
2152 struct ixgbe_hw *hw;
2153 int use_def_burst_func = 1;
2156 PMD_INIT_FUNC_TRACE();
2157 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2160 * Validate number of receive descriptors.
2161 * It must not exceed hardware maximum, and must be multiple
2164 if (((nb_desc * sizeof(union ixgbe_adv_rx_desc)) % IXGBE_ALIGN) != 0 ||
2165 (nb_desc > IXGBE_MAX_RING_DESC) ||
2166 (nb_desc < IXGBE_MIN_RING_DESC)) {
2170 /* Free memory prior to re-allocation if needed... */
2171 if (dev->data->rx_queues[queue_idx] != NULL) {
2172 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2173 dev->data->rx_queues[queue_idx] = NULL;
2176 /* First allocate the rx queue data structure */
2177 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct igb_rx_queue),
2178 RTE_CACHE_LINE_SIZE, socket_id);
2182 rxq->nb_rx_desc = nb_desc;
2183 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2184 rxq->queue_id = queue_idx;
2185 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2186 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2187 rxq->port_id = dev->data->port_id;
2188 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2190 rxq->drop_en = rx_conf->rx_drop_en;
2191 rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2194 * Allocate RX ring hardware descriptors. A memzone large enough to
2195 * handle the maximum ring size is allocated in order to allow for
2196 * resizing in later calls to the queue setup function.
2198 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
2199 RX_RING_SZ, socket_id);
2201 ixgbe_rx_queue_release(rxq);
2206 * Zero init all the descriptors in the ring.
2208 memset (rz->addr, 0, RX_RING_SZ);
2211 * Modified to setup VFRDT for Virtual Function
2213 if (hw->mac.type == ixgbe_mac_82599_vf ||
2214 hw->mac.type == ixgbe_mac_X540_vf) {
2216 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2218 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2222 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2224 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2226 #ifndef RTE_LIBRTE_XEN_DOM0
2227 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
2229 rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2231 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2234 * Allocate software ring. Allow for space at the end of the
2235 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2236 * function does not access an invalid memory region.
2238 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2239 len = (uint16_t)(nb_desc + RTE_PMD_IXGBE_RX_MAX_BURST);
2243 rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2244 sizeof(struct igb_rx_entry) * len,
2245 RTE_CACHE_LINE_SIZE, socket_id);
2246 if (rxq->sw_ring == NULL) {
2247 ixgbe_rx_queue_release(rxq);
2250 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2251 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
2254 * Certain constraints must be met in order to use the bulk buffer
2255 * allocation Rx burst function.
2257 use_def_burst_func = check_rx_burst_bulk_alloc_preconditions(rxq);
2259 #ifdef RTE_IXGBE_INC_VECTOR
2260 ixgbe_rxq_vec_setup(rxq);
2262 /* Check if pre-conditions are satisfied, and no Scattered Rx */
2263 if (!use_def_burst_func && !dev->data->scattered_rx) {
2264 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2265 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
2266 "satisfied. Rx Burst Bulk Alloc function will be "
2267 "used on port=%d, queue=%d.",
2268 rxq->port_id, rxq->queue_id);
2269 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
2270 #ifdef RTE_IXGBE_INC_VECTOR
2271 if (!ixgbe_rx_vec_condition_check(dev)) {
2272 PMD_INIT_LOG(INFO, "Vector rx enabled, please make "
2273 "sure RX burst size no less than 32.");
2274 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
2279 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions "
2280 "are not satisfied, Scattered Rx is requested, "
2281 "or RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC is not "
2282 "enabled (port=%d, queue=%d).",
2283 rxq->port_id, rxq->queue_id);
2285 dev->data->rx_queues[queue_idx] = rxq;
2287 ixgbe_reset_rx_queue(rxq);
2293 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2295 #define IXGBE_RXQ_SCAN_INTERVAL 4
2296 volatile union ixgbe_adv_rx_desc *rxdp;
2297 struct igb_rx_queue *rxq;
2300 if (rx_queue_id >= dev->data->nb_rx_queues) {
2301 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2305 rxq = dev->data->rx_queues[rx_queue_id];
2306 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2308 while ((desc < rxq->nb_rx_desc) &&
2309 (rxdp->wb.upper.status_error & IXGBE_RXDADV_STAT_DD)) {
2310 desc += IXGBE_RXQ_SCAN_INTERVAL;
2311 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2312 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2313 rxdp = &(rxq->rx_ring[rxq->rx_tail +
2314 desc - rxq->nb_rx_desc]);
2321 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2323 volatile union ixgbe_adv_rx_desc *rxdp;
2324 struct igb_rx_queue *rxq = rx_queue;
2327 if (unlikely(offset >= rxq->nb_rx_desc))
2329 desc = rxq->rx_tail + offset;
2330 if (desc >= rxq->nb_rx_desc)
2331 desc -= rxq->nb_rx_desc;
2333 rxdp = &rxq->rx_ring[desc];
2334 return !!(rxdp->wb.upper.status_error & IXGBE_RXDADV_STAT_DD);
2338 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2342 PMD_INIT_FUNC_TRACE();
2344 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2345 struct igb_tx_queue *txq = dev->data->tx_queues[i];
2347 txq->ops->release_mbufs(txq);
2348 txq->ops->reset(txq);
2352 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2353 struct igb_rx_queue *rxq = dev->data->rx_queues[i];
2355 ixgbe_rx_queue_release_mbufs(rxq);
2356 ixgbe_reset_rx_queue(rxq);
2361 /*********************************************************************
2363 * Device RX/TX init functions
2365 **********************************************************************/
2368 * Receive Side Scaling (RSS)
2369 * See section 7.1.2.8 in the following document:
2370 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2373 * The source and destination IP addresses of the IP header and the source
2374 * and destination ports of TCP/UDP headers, if any, of received packets are
2375 * hashed against a configurable random key to compute a 32-bit RSS hash result.
2376 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2377 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
2378 * RSS output index which is used as the RX queue index where to store the
2380 * The following output is supplied in the RX write-back descriptor:
2381 * - 32-bit result of the Microsoft RSS hash function,
2382 * - 4-bit RSS type field.
2386 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2387 * Used as the default key.
2389 static uint8_t rss_intel_key[40] = {
2390 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2391 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2392 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2393 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2394 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2398 ixgbe_rss_disable(struct rte_eth_dev *dev)
2400 struct ixgbe_hw *hw;
2403 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2404 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
2405 mrqc &= ~IXGBE_MRQC_RSSEN;
2406 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2410 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2418 hash_key = rss_conf->rss_key;
2419 if (hash_key != NULL) {
2420 /* Fill in RSS hash key */
2421 for (i = 0; i < 10; i++) {
2422 rss_key = hash_key[(i * 4)];
2423 rss_key |= hash_key[(i * 4) + 1] << 8;
2424 rss_key |= hash_key[(i * 4) + 2] << 16;
2425 rss_key |= hash_key[(i * 4) + 3] << 24;
2426 IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RSSRK(0), i, rss_key);
2430 /* Set configured hashing protocols in MRQC register */
2431 rss_hf = rss_conf->rss_hf;
2432 mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
2433 if (rss_hf & ETH_RSS_IPV4)
2434 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2435 if (rss_hf & ETH_RSS_IPV4_TCP)
2436 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2437 if (rss_hf & ETH_RSS_IPV6)
2438 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2439 if (rss_hf & ETH_RSS_IPV6_EX)
2440 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2441 if (rss_hf & ETH_RSS_IPV6_TCP)
2442 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2443 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2444 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2445 if (rss_hf & ETH_RSS_IPV4_UDP)
2446 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2447 if (rss_hf & ETH_RSS_IPV6_UDP)
2448 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2449 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2450 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2451 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2455 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2456 struct rte_eth_rss_conf *rss_conf)
2458 struct ixgbe_hw *hw;
2462 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2465 * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
2466 * "RSS enabling cannot be done dynamically while it must be
2467 * preceded by a software reset"
2468 * Before changing anything, first check that the update RSS operation
2469 * does not attempt to disable RSS, if RSS was enabled at
2470 * initialization time, or does not attempt to enable RSS, if RSS was
2471 * disabled at initialization time.
2473 rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
2474 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
2475 if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
2476 if (rss_hf != 0) /* Enable RSS */
2478 return 0; /* Nothing to do */
2481 if (rss_hf == 0) /* Disable RSS */
2483 ixgbe_hw_rss_hash_set(hw, rss_conf);
2488 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2489 struct rte_eth_rss_conf *rss_conf)
2491 struct ixgbe_hw *hw;
2498 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2499 hash_key = rss_conf->rss_key;
2500 if (hash_key != NULL) {
2501 /* Return RSS hash key */
2502 for (i = 0; i < 10; i++) {
2503 rss_key = IXGBE_READ_REG_ARRAY(hw, IXGBE_RSSRK(0), i);
2504 hash_key[(i * 4)] = rss_key & 0x000000FF;
2505 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2506 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2507 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2511 /* Get RSS functions configured in MRQC register */
2512 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
2513 if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
2514 rss_conf->rss_hf = 0;
2518 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
2519 rss_hf |= ETH_RSS_IPV4;
2520 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
2521 rss_hf |= ETH_RSS_IPV4_TCP;
2522 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
2523 rss_hf |= ETH_RSS_IPV6;
2524 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
2525 rss_hf |= ETH_RSS_IPV6_EX;
2526 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
2527 rss_hf |= ETH_RSS_IPV6_TCP;
2528 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
2529 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2530 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
2531 rss_hf |= ETH_RSS_IPV4_UDP;
2532 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
2533 rss_hf |= ETH_RSS_IPV6_UDP;
2534 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
2535 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2536 rss_conf->rss_hf = rss_hf;
2541 ixgbe_rss_configure(struct rte_eth_dev *dev)
2543 struct rte_eth_rss_conf rss_conf;
2544 struct ixgbe_hw *hw;
2549 PMD_INIT_FUNC_TRACE();
2550 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2553 * Fill in redirection table
2554 * The byte-swap is needed because NIC registers are in
2555 * little-endian order.
2558 for (i = 0, j = 0; i < 128; i++, j++) {
2559 if (j == dev->data->nb_rx_queues)
2561 reta = (reta << 8) | j;
2563 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2),
2568 * Configure the RSS key and the RSS protocols used to compute
2569 * the RSS hash of input packets.
2571 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2572 if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
2573 ixgbe_rss_disable(dev);
2576 if (rss_conf.rss_key == NULL)
2577 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2578 ixgbe_hw_rss_hash_set(hw, &rss_conf);
2581 #define NUM_VFTA_REGISTERS 128
2582 #define NIC_RX_BUFFER_SIZE 0x200
2585 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2587 struct rte_eth_vmdq_dcb_conf *cfg;
2588 struct ixgbe_hw *hw;
2589 enum rte_eth_nb_pools num_pools;
2590 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2592 uint8_t nb_tcs; /* number of traffic classes */
2595 PMD_INIT_FUNC_TRACE();
2596 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2597 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2598 num_pools = cfg->nb_queue_pools;
2599 /* Check we have a valid number of pools */
2600 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2601 ixgbe_rss_disable(dev);
2604 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2605 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2609 * split rx buffer up into sections, each for 1 traffic class
2611 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2612 for (i = 0 ; i < nb_tcs; i++) {
2613 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2614 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2615 /* clear 10 bits. */
2616 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2617 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2619 /* zero alloc all unused TCs */
2620 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2621 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2622 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2623 /* clear 10 bits. */
2624 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2627 /* MRQC: enable vmdq and dcb */
2628 mrqc = ((num_pools == ETH_16_POOLS) ? \
2629 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2630 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2632 /* PFVTCTL: turn on virtualisation and set the default pool */
2633 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2634 if (cfg->enable_default_pool) {
2635 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2637 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2640 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2642 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2644 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2646 * mapping is done with 3 bits per priority,
2647 * so shift by i*3 each time
2649 queue_mapping |= ((cfg->dcb_queue[i] & 0x07) << (i * 3));
2651 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2653 /* RTRPCS: DCB related */
2654 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2656 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2657 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2658 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2659 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2661 /* VFTA - enable all vlan filters */
2662 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2663 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2666 /* VFRE: pool enabling for receive - 16 or 32 */
2667 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2668 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2671 * MPSAR - allow pools to read specific mac addresses
2672 * In this case, all pools should be able to read from mac addr 0
2674 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2675 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2677 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2678 for (i = 0; i < cfg->nb_pool_maps; i++) {
2679 /* set vlan id in VF register and set the valid bit */
2680 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2681 (cfg->pool_map[i].vlan_id & 0xFFF)));
2683 * Put the allowed pools in VFB reg. As we only have 16 or 32
2684 * pools, we only need to use the first half of the register
2687 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2692 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2693 * @hw: pointer to hardware structure
2694 * @dcb_config: pointer to ixgbe_dcb_config structure
2697 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
2698 struct ixgbe_dcb_config *dcb_config)
2703 PMD_INIT_FUNC_TRACE();
2704 if (hw->mac.type != ixgbe_mac_82598EB) {
2705 /* Disable the Tx desc arbiter so that MTQC can be changed */
2706 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2707 reg |= IXGBE_RTTDCS_ARBDIS;
2708 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2710 /* Enable DCB for Tx with 8 TCs */
2711 if (dcb_config->num_tcs.pg_tcs == 8) {
2712 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
2715 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
2717 if (dcb_config->vt_mode)
2718 reg |= IXGBE_MTQC_VT_ENA;
2719 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
2721 /* Disable drop for all queues */
2722 for (q = 0; q < 128; q++)
2723 IXGBE_WRITE_REG(hw, IXGBE_QDE,
2724 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
2726 /* Enable the Tx desc arbiter */
2727 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2728 reg &= ~IXGBE_RTTDCS_ARBDIS;
2729 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2731 /* Enable Security TX Buffer IFG for DCB */
2732 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
2733 reg |= IXGBE_SECTX_DCB;
2734 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
2740 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
2741 * @dev: pointer to rte_eth_dev structure
2742 * @dcb_config: pointer to ixgbe_dcb_config structure
2745 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
2746 struct ixgbe_dcb_config *dcb_config)
2748 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2749 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2750 struct ixgbe_hw *hw =
2751 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2753 PMD_INIT_FUNC_TRACE();
2754 if (hw->mac.type != ixgbe_mac_82598EB)
2755 /*PF VF Transmit Enable*/
2756 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
2757 vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2759 /*Configure general DCB TX parameters*/
2760 ixgbe_dcb_tx_hw_config(hw,dcb_config);
2765 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
2766 struct ixgbe_dcb_config *dcb_config)
2768 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
2769 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2770 struct ixgbe_dcb_tc_config *tc;
2773 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
2774 if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
2775 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2776 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2779 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2780 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2782 /* User Priority to Traffic Class mapping */
2783 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2784 j = vmdq_rx_conf->dcb_queue[i];
2785 tc = &dcb_config->tc_config[j];
2786 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
2792 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
2793 struct ixgbe_dcb_config *dcb_config)
2795 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2796 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2797 struct ixgbe_dcb_tc_config *tc;
2800 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
2801 if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
2802 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2803 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2806 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2807 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2810 /* User Priority to Traffic Class mapping */
2811 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2812 j = vmdq_tx_conf->dcb_queue[i];
2813 tc = &dcb_config->tc_config[j];
2814 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
2821 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
2822 struct ixgbe_dcb_config *dcb_config)
2824 struct rte_eth_dcb_rx_conf *rx_conf =
2825 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
2826 struct ixgbe_dcb_tc_config *tc;
2829 dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
2830 dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
2832 /* User Priority to Traffic Class mapping */
2833 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2834 j = rx_conf->dcb_queue[i];
2835 tc = &dcb_config->tc_config[j];
2836 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
2842 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
2843 struct ixgbe_dcb_config *dcb_config)
2845 struct rte_eth_dcb_tx_conf *tx_conf =
2846 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
2847 struct ixgbe_dcb_tc_config *tc;
2850 dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
2851 dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
2853 /* User Priority to Traffic Class mapping */
2854 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2855 j = tx_conf->dcb_queue[i];
2856 tc = &dcb_config->tc_config[j];
2857 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
2863 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
2864 * @hw: pointer to hardware structure
2865 * @dcb_config: pointer to ixgbe_dcb_config structure
2868 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
2869 struct ixgbe_dcb_config *dcb_config)
2875 PMD_INIT_FUNC_TRACE();
2877 * Disable the arbiter before changing parameters
2878 * (always enable recycle mode; WSP)
2880 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
2881 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
2883 if (hw->mac.type != ixgbe_mac_82598EB) {
2884 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
2885 if (dcb_config->num_tcs.pg_tcs == 4) {
2886 if (dcb_config->vt_mode)
2887 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2888 IXGBE_MRQC_VMDQRT4TCEN;
2890 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
2891 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2895 if (dcb_config->num_tcs.pg_tcs == 8) {
2896 if (dcb_config->vt_mode)
2897 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2898 IXGBE_MRQC_VMDQRT8TCEN;
2900 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
2901 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2906 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
2909 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2910 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2911 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2912 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2914 /* VFTA - enable all vlan filters */
2915 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2916 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2920 * Configure Rx packet plane (recycle mode; WSP) and
2923 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
2924 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
2930 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
2931 uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
2933 switch (hw->mac.type) {
2934 case ixgbe_mac_82598EB:
2935 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
2937 case ixgbe_mac_82599EB:
2938 case ixgbe_mac_X540:
2939 case ixgbe_mac_X550:
2940 case ixgbe_mac_X550EM_x:
2941 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
2950 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
2951 uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
2953 switch (hw->mac.type) {
2954 case ixgbe_mac_82598EB:
2955 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
2956 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
2958 case ixgbe_mac_82599EB:
2959 case ixgbe_mac_X540:
2960 case ixgbe_mac_X550:
2961 case ixgbe_mac_X550EM_x:
2962 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
2963 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
2970 #define DCB_RX_CONFIG 1
2971 #define DCB_TX_CONFIG 1
2972 #define DCB_TX_PB 1024
2974 * ixgbe_dcb_hw_configure - Enable DCB and configure
2975 * general DCB in VT mode and non-VT mode parameters
2976 * @dev: pointer to rte_eth_dev structure
2977 * @dcb_config: pointer to ixgbe_dcb_config structure
2980 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
2981 struct ixgbe_dcb_config *dcb_config)
2984 uint8_t i,pfc_en,nb_tcs;
2986 uint8_t config_dcb_rx = 0;
2987 uint8_t config_dcb_tx = 0;
2988 uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2989 uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2990 uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2991 uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2992 uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2993 struct ixgbe_dcb_tc_config *tc;
2994 uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2995 struct ixgbe_hw *hw =
2996 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2998 switch(dev->data->dev_conf.rxmode.mq_mode){
2999 case ETH_MQ_RX_VMDQ_DCB:
3000 dcb_config->vt_mode = true;
3001 if (hw->mac.type != ixgbe_mac_82598EB) {
3002 config_dcb_rx = DCB_RX_CONFIG;
3004 *get dcb and VT rx configuration parameters
3007 ixgbe_vmdq_dcb_rx_config(dev,dcb_config);
3008 /*Configure general VMDQ and DCB RX parameters*/
3009 ixgbe_vmdq_dcb_configure(dev);
3013 dcb_config->vt_mode = false;
3014 config_dcb_rx = DCB_RX_CONFIG;
3015 /* Get dcb TX configuration parameters from rte_eth_conf */
3016 ixgbe_dcb_rx_config(dev,dcb_config);
3017 /*Configure general DCB RX parameters*/
3018 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3021 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3024 switch (dev->data->dev_conf.txmode.mq_mode) {
3025 case ETH_MQ_TX_VMDQ_DCB:
3026 dcb_config->vt_mode = true;
3027 config_dcb_tx = DCB_TX_CONFIG;
3028 /* get DCB and VT TX configuration parameters from rte_eth_conf */
3029 ixgbe_dcb_vt_tx_config(dev,dcb_config);
3030 /*Configure general VMDQ and DCB TX parameters*/
3031 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
3035 dcb_config->vt_mode = false;
3036 config_dcb_tx = DCB_TX_CONFIG;
3037 /*get DCB TX configuration parameters from rte_eth_conf*/
3038 ixgbe_dcb_tx_config(dev,dcb_config);
3039 /*Configure general DCB TX parameters*/
3040 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3043 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3047 nb_tcs = dcb_config->num_tcs.pfc_tcs;
3049 ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3050 if(nb_tcs == ETH_4_TCS) {
3051 /* Avoid un-configured priority mapping to TC0 */
3053 uint8_t mask = 0xFF;
3054 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3055 mask = (uint8_t)(mask & (~ (1 << map[i])));
3056 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3057 if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3061 /* Re-configure 4 TCs BW */
3062 for (i = 0; i < nb_tcs; i++) {
3063 tc = &dcb_config->tc_config[i];
3064 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3065 (uint8_t)(100 / nb_tcs);
3066 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3067 (uint8_t)(100 / nb_tcs);
3069 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3070 tc = &dcb_config->tc_config[i];
3071 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3072 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3077 /* Set RX buffer size */
3078 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3079 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3080 for (i = 0 ; i < nb_tcs; i++) {
3081 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3083 /* zero alloc all unused TCs */
3084 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3085 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3089 /* Only support an equally distributed Tx packet buffer strategy. */
3090 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3091 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3092 for (i = 0; i < nb_tcs; i++) {
3093 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3094 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3096 /* Clear unused TCs, if any, to zero buffer size*/
3097 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3098 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3099 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3103 /*Calculates traffic class credits*/
3104 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3105 IXGBE_DCB_TX_CONFIG);
3106 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
3107 IXGBE_DCB_RX_CONFIG);
3110 /* Unpack CEE standard containers */
3111 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3112 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3113 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3114 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3115 /* Configure PG(ETS) RX */
3116 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
3120 /* Unpack CEE standard containers */
3121 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3122 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3123 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3124 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3125 /* Configure PG(ETS) TX */
3126 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
3129 /*Configure queue statistics registers*/
3130 ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3132 /* Check if the PFC is supported */
3133 if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3134 pbsize = (uint16_t) (NIC_RX_BUFFER_SIZE / nb_tcs);
3135 for (i = 0; i < nb_tcs; i++) {
3137 * If the TC count is 8,and the default high_water is 48,
3138 * the low_water is 16 as default.
3140 hw->fc.high_water[i] = (pbsize * 3 ) / 4;
3141 hw->fc.low_water[i] = pbsize / 4;
3142 /* Enable pfc for this TC */
3143 tc = &dcb_config->tc_config[i];
3144 tc->pfc = ixgbe_dcb_pfc_enabled;
3146 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3147 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3149 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3156 * ixgbe_configure_dcb - Configure DCB Hardware
3157 * @dev: pointer to rte_eth_dev
3159 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3161 struct ixgbe_dcb_config *dcb_cfg =
3162 IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3163 struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3165 PMD_INIT_FUNC_TRACE();
3167 /* check support mq_mode for DCB */
3168 if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3169 (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB))
3172 if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3175 /** Configure DCB hardware **/
3176 ixgbe_dcb_hw_configure(dev,dcb_cfg);
3182 * VMDq only support for 10 GbE NIC.
3185 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3187 struct rte_eth_vmdq_rx_conf *cfg;
3188 struct ixgbe_hw *hw;
3189 enum rte_eth_nb_pools num_pools;
3190 uint32_t mrqc, vt_ctl, vlanctrl;
3194 PMD_INIT_FUNC_TRACE();
3195 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3196 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3197 num_pools = cfg->nb_queue_pools;
3199 ixgbe_rss_disable(dev);
3201 /* MRQC: enable vmdq */
3202 mrqc = IXGBE_MRQC_VMDQEN;
3203 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3205 /* PFVTCTL: turn on virtualisation and set the default pool */
3206 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3207 if (cfg->enable_default_pool)
3208 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3210 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3212 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3214 for (i = 0; i < (int)num_pools; i++) {
3215 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3216 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3219 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3220 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3221 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
3222 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3224 /* VFTA - enable all vlan filters */
3225 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3226 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3228 /* VFRE: pool enabling for receive - 64 */
3229 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3230 if (num_pools == ETH_64_POOLS)
3231 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3234 * MPSAR - allow pools to read specific mac addresses
3235 * In this case, all pools should be able to read from mac addr 0
3237 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3238 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3240 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3241 for (i = 0; i < cfg->nb_pool_maps; i++) {
3242 /* set vlan id in VF register and set the valid bit */
3243 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
3244 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3246 * Put the allowed pools in VFB reg. As we only have 16 or 64
3247 * pools, we only need to use the first half of the register
3250 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3251 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), \
3252 (cfg->pool_map[i].pools & UINT32_MAX));
3254 IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i*2+1)), \
3255 ((cfg->pool_map[i].pools >> 32) \
3260 /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3261 if (cfg->enable_loop_back) {
3262 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3263 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3264 IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3267 IXGBE_WRITE_FLUSH(hw);
3271 * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3272 * @hw: pointer to hardware structure
3275 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3280 PMD_INIT_FUNC_TRACE();
3281 /*PF VF Transmit Enable*/
3282 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3283 IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3285 /* Disable the Tx desc arbiter so that MTQC can be changed */
3286 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3287 reg |= IXGBE_RTTDCS_ARBDIS;
3288 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3290 reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3291 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3293 /* Disable drop for all queues */
3294 for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3295 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3296 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3298 /* Enable the Tx desc arbiter */
3299 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3300 reg &= ~IXGBE_RTTDCS_ARBDIS;
3301 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3303 IXGBE_WRITE_FLUSH(hw);
3309 ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
3311 struct igb_rx_entry *rxe = rxq->sw_ring;
3315 /* Initialize software ring entries */
3316 for (i = 0; i < rxq->nb_rx_desc; i++) {
3317 volatile union ixgbe_adv_rx_desc *rxd;
3318 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
3320 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3321 (unsigned) rxq->queue_id);
3325 rte_mbuf_refcnt_set(mbuf, 1);
3327 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3329 mbuf->port = rxq->port_id;
3332 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
3333 rxd = &rxq->rx_ring[i];
3334 rxd->read.hdr_addr = dma_addr;
3335 rxd->read.pkt_addr = dma_addr;
3343 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3345 struct ixgbe_hw *hw =
3346 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3348 if (hw->mac.type == ixgbe_mac_82598EB)
3351 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3353 * SRIOV inactive scheme
3354 * any DCB/RSS w/o VMDq multi-queue setting
3356 switch (dev->data->dev_conf.rxmode.mq_mode) {
3358 ixgbe_rss_configure(dev);
3361 case ETH_MQ_RX_VMDQ_DCB:
3362 ixgbe_vmdq_dcb_configure(dev);
3365 case ETH_MQ_RX_VMDQ_ONLY:
3366 ixgbe_vmdq_rx_hw_configure(dev);
3369 case ETH_MQ_RX_NONE:
3370 /* if mq_mode is none, disable rss mode.*/
3371 default: ixgbe_rss_disable(dev);
3374 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3376 * SRIOV active scheme
3377 * FIXME if support DCB/RSS together with VMDq & SRIOV
3380 IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
3384 IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
3388 IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
3391 PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3399 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
3401 struct ixgbe_hw *hw =
3402 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3406 if (hw->mac.type == ixgbe_mac_82598EB)
3409 /* disable arbiter before setting MTQC */
3410 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3411 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3412 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3414 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3416 * SRIOV inactive scheme
3417 * any DCB w/o VMDq multi-queue setting
3419 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
3420 ixgbe_vmdq_tx_hw_configure(hw);
3422 mtqc = IXGBE_MTQC_64Q_1PB;
3423 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3426 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3429 * SRIOV active scheme
3430 * FIXME if support DCB together with VMDq & SRIOV
3433 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3436 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
3439 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
3443 mtqc = IXGBE_MTQC_64Q_1PB;
3444 PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3446 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
3449 /* re-enable arbiter */
3450 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3451 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3457 * Initializes Receive Unit.
3460 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
3462 struct ixgbe_hw *hw;
3463 struct igb_rx_queue *rxq;
3464 struct rte_pktmbuf_pool_private *mbp_priv;
3476 PMD_INIT_FUNC_TRACE();
3477 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3480 * Make sure receives are disabled while setting
3481 * up the RX context (registers, descriptor rings, etc.).
3483 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
3484 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
3486 /* Enable receipt of broadcasted frames */
3487 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
3488 fctrl |= IXGBE_FCTRL_BAM;
3489 fctrl |= IXGBE_FCTRL_DPF;
3490 fctrl |= IXGBE_FCTRL_PMCF;
3491 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
3494 * Configure CRC stripping, if any.
3496 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
3497 if (dev->data->dev_conf.rxmode.hw_strip_crc)
3498 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
3500 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
3503 * Configure jumbo frame support, if any.
3505 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
3506 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
3507 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
3508 maxfrs &= 0x0000FFFF;
3509 maxfrs |= (dev->data->dev_conf.rxmode.max_rx_pkt_len << 16);
3510 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
3512 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
3515 * If loopback mode is configured for 82599, set LPBK bit.
3517 if (hw->mac.type == ixgbe_mac_82599EB &&
3518 dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
3519 hlreg0 |= IXGBE_HLREG0_LPBK;
3521 hlreg0 &= ~IXGBE_HLREG0_LPBK;
3523 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
3525 /* Setup RX queues */
3526 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3527 rxq = dev->data->rx_queues[i];
3530 * Reset crc_len in case it was changed after queue setup by a
3531 * call to configure.
3533 rxq->crc_len = (uint8_t)
3534 ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
3537 /* Setup the Base and Length of the Rx Descriptor Rings */
3538 bus_addr = rxq->rx_ring_phys_addr;
3539 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
3540 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3541 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
3542 (uint32_t)(bus_addr >> 32));
3543 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
3544 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
3545 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
3546 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
3548 /* Configure the SRRCTL register */
3549 #ifdef RTE_HEADER_SPLIT_ENABLE
3551 * Configure Header Split
3553 if (dev->data->dev_conf.rxmode.header_split) {
3554 if (hw->mac.type == ixgbe_mac_82599EB) {
3555 /* Must setup the PSRTYPE register */
3557 psrtype = IXGBE_PSRTYPE_TCPHDR |
3558 IXGBE_PSRTYPE_UDPHDR |
3559 IXGBE_PSRTYPE_IPV4HDR |
3560 IXGBE_PSRTYPE_IPV6HDR;
3561 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
3563 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
3564 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
3565 IXGBE_SRRCTL_BSIZEHDR_MASK);
3566 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3569 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
3571 /* Set if packets are dropped when no descriptors available */
3573 srrctl |= IXGBE_SRRCTL_DROP_EN;
3576 * Configure the RX buffer size in the BSIZEPACKET field of
3577 * the SRRCTL register of the queue.
3578 * The value is in 1 KB resolution. Valid values can be from
3581 mbp_priv = rte_mempool_get_priv(rxq->mb_pool);
3582 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
3583 RTE_PKTMBUF_HEADROOM);
3584 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
3585 IXGBE_SRRCTL_BSIZEPKT_MASK);
3586 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
3588 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
3589 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
3591 /* It adds dual VLAN length for supporting dual VLAN */
3592 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
3593 2 * IXGBE_VLAN_TAG_SIZE) > buf_size){
3594 if (!dev->data->scattered_rx)
3595 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
3596 dev->data->scattered_rx = 1;
3597 #ifdef RTE_IXGBE_INC_VECTOR
3598 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
3600 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
3605 if (dev->data->dev_conf.rxmode.enable_scatter) {
3606 if (!dev->data->scattered_rx)
3607 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
3608 #ifdef RTE_IXGBE_INC_VECTOR
3609 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
3611 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
3613 dev->data->scattered_rx = 1;
3617 * Device configured with multiple RX queues.
3619 ixgbe_dev_mq_rx_configure(dev);
3622 * Setup the Checksum Register.
3623 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
3624 * Enable IP/L4 checkum computation by hardware if requested to do so.
3626 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
3627 rxcsum |= IXGBE_RXCSUM_PCSD;
3628 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
3629 rxcsum |= IXGBE_RXCSUM_IPPCSE;
3631 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
3633 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
3635 if (hw->mac.type == ixgbe_mac_82599EB) {
3636 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3637 if (dev->data->dev_conf.rxmode.hw_strip_crc)
3638 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3640 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
3641 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3642 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3649 * Initializes Transmit Unit.
3652 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
3654 struct ixgbe_hw *hw;
3655 struct igb_tx_queue *txq;
3661 PMD_INIT_FUNC_TRACE();
3662 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3664 /* Enable TX CRC (checksum offload requirement) and hw padding
3665 * (TSO requirement) */
3666 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
3667 hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
3668 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
3670 /* Setup the Base and Length of the Tx Descriptor Rings */
3671 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3672 txq = dev->data->tx_queues[i];
3674 bus_addr = txq->tx_ring_phys_addr;
3675 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
3676 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3677 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
3678 (uint32_t)(bus_addr >> 32));
3679 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
3680 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3681 /* Setup the HW Tx Head and TX Tail descriptor pointers */
3682 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
3683 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
3686 * Disable Tx Head Writeback RO bit, since this hoses
3687 * bookkeeping if things aren't delivered in order.
3689 switch (hw->mac.type) {
3690 case ixgbe_mac_82598EB:
3691 txctrl = IXGBE_READ_REG(hw,
3692 IXGBE_DCA_TXCTRL(txq->reg_idx));
3693 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3694 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
3698 case ixgbe_mac_82599EB:
3699 case ixgbe_mac_X540:
3700 case ixgbe_mac_X550:
3701 case ixgbe_mac_X550EM_x:
3703 txctrl = IXGBE_READ_REG(hw,
3704 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
3705 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3706 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
3712 /* Device configured with multiple TX queues. */
3713 ixgbe_dev_mq_tx_configure(dev);
3717 * Set up link for 82599 loopback mode Tx->Rx.
3720 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
3722 PMD_INIT_FUNC_TRACE();
3724 if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
3725 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
3727 PMD_INIT_LOG(ERR, "Could not enable loopback mode");
3736 IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
3737 ixgbe_reset_pipeline_82599(hw);
3739 hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
3745 * Start Transmit and Receive Units.
3748 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
3750 struct ixgbe_hw *hw;
3751 struct igb_tx_queue *txq;
3752 struct igb_rx_queue *rxq;
3758 PMD_INIT_FUNC_TRACE();
3759 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3761 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3762 txq = dev->data->tx_queues[i];
3763 /* Setup Transmit Threshold Registers */
3764 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
3765 txdctl |= txq->pthresh & 0x7F;
3766 txdctl |= ((txq->hthresh & 0x7F) << 8);
3767 txdctl |= ((txq->wthresh & 0x7F) << 16);
3768 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
3771 if (hw->mac.type != ixgbe_mac_82598EB) {
3772 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3773 dmatxctl |= IXGBE_DMATXCTL_TE;
3774 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3777 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3778 txq = dev->data->tx_queues[i];
3779 if (!txq->tx_deferred_start)
3780 ixgbe_dev_tx_queue_start(dev, i);
3783 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3784 rxq = dev->data->rx_queues[i];
3785 if (!rxq->rx_deferred_start)
3786 ixgbe_dev_rx_queue_start(dev, i);
3789 /* Enable Receive engine */
3790 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
3791 if (hw->mac.type == ixgbe_mac_82598EB)
3792 rxctrl |= IXGBE_RXCTRL_DMBYPS;
3793 rxctrl |= IXGBE_RXCTRL_RXEN;
3794 hw->mac.ops.enable_rx_dma(hw, rxctrl);
3796 /* If loopback mode is enabled for 82599, set up the link accordingly */
3797 if (hw->mac.type == ixgbe_mac_82599EB &&
3798 dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
3799 ixgbe_setup_loopback_link_82599(hw);
3804 * Start Receive Units for specified queue.
3807 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3809 struct ixgbe_hw *hw;
3810 struct igb_rx_queue *rxq;
3814 PMD_INIT_FUNC_TRACE();
3815 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3817 if (rx_queue_id < dev->data->nb_rx_queues) {
3818 rxq = dev->data->rx_queues[rx_queue_id];
3820 /* Allocate buffers for descriptor rings */
3821 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
3822 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
3826 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
3827 rxdctl |= IXGBE_RXDCTL_ENABLE;
3828 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
3830 /* Wait until RX Enable ready */
3831 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
3834 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
3835 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
3837 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
3840 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
3841 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
3849 * Stop Receive Units for specified queue.
3852 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3854 struct ixgbe_hw *hw;
3855 struct igb_rx_queue *rxq;
3859 PMD_INIT_FUNC_TRACE();
3860 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3862 if (rx_queue_id < dev->data->nb_rx_queues) {
3863 rxq = dev->data->rx_queues[rx_queue_id];
3865 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
3866 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
3867 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
3869 /* Wait until RX Enable ready */
3870 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
3873 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
3874 } while (--poll_ms && (rxdctl | IXGBE_RXDCTL_ENABLE));
3876 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
3879 rte_delay_us(RTE_IXGBE_WAIT_100_US);
3881 ixgbe_rx_queue_release_mbufs(rxq);
3882 ixgbe_reset_rx_queue(rxq);
3891 * Start Transmit Units for specified queue.
3894 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
3896 struct ixgbe_hw *hw;
3897 struct igb_tx_queue *txq;
3901 PMD_INIT_FUNC_TRACE();
3902 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3904 if (tx_queue_id < dev->data->nb_tx_queues) {
3905 txq = dev->data->tx_queues[tx_queue_id];
3906 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
3907 txdctl |= IXGBE_TXDCTL_ENABLE;
3908 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
3910 /* Wait until TX Enable ready */
3911 if (hw->mac.type == ixgbe_mac_82599EB) {
3912 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
3915 txdctl = IXGBE_READ_REG(hw,
3916 IXGBE_TXDCTL(txq->reg_idx));
3917 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
3919 PMD_INIT_LOG(ERR, "Could not enable "
3920 "Tx Queue %d", tx_queue_id);
3923 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
3924 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
3932 * Stop Transmit Units for specified queue.
3935 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
3937 struct ixgbe_hw *hw;
3938 struct igb_tx_queue *txq;
3940 uint32_t txtdh, txtdt;
3943 PMD_INIT_FUNC_TRACE();
3944 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3946 if (tx_queue_id < dev->data->nb_tx_queues) {
3947 txq = dev->data->tx_queues[tx_queue_id];
3949 /* Wait until TX queue is empty */
3950 if (hw->mac.type == ixgbe_mac_82599EB) {
3951 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
3953 rte_delay_us(RTE_IXGBE_WAIT_100_US);
3954 txtdh = IXGBE_READ_REG(hw,
3955 IXGBE_TDH(txq->reg_idx));
3956 txtdt = IXGBE_READ_REG(hw,
3957 IXGBE_TDT(txq->reg_idx));
3958 } while (--poll_ms && (txtdh != txtdt));
3960 PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
3961 "when stopping.", tx_queue_id);
3964 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
3965 txdctl &= ~IXGBE_TXDCTL_ENABLE;
3966 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
3968 /* Wait until TX Enable ready */
3969 if (hw->mac.type == ixgbe_mac_82599EB) {
3970 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
3973 txdctl = IXGBE_READ_REG(hw,
3974 IXGBE_TXDCTL(txq->reg_idx));
3975 } while (--poll_ms && (txdctl | IXGBE_TXDCTL_ENABLE));
3977 PMD_INIT_LOG(ERR, "Could not disable "
3978 "Tx Queue %d", tx_queue_id);
3981 if (txq->ops != NULL) {
3982 txq->ops->release_mbufs(txq);
3983 txq->ops->reset(txq);
3992 * [VF] Initializes Receive Unit.
3995 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
3997 struct ixgbe_hw *hw;
3998 struct igb_rx_queue *rxq;
3999 struct rte_pktmbuf_pool_private *mbp_priv;
4006 PMD_INIT_FUNC_TRACE();
4007 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4010 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
4011 * disables the VF receipt of packets if the PF MTU is > 1500.
4012 * This is done to deal with 82599 limitations that imposes
4013 * the PF and all VFs to share the same MTU.
4014 * Then, the PF driver enables again the VF receipt of packet when
4015 * the VF driver issues a IXGBE_VF_SET_LPE request.
4016 * In the meantime, the VF device cannot be used, even if the VF driver
4017 * and the Guest VM network stack are ready to accept packets with a
4018 * size up to the PF MTU.
4019 * As a work-around to this PF behaviour, force the call to
4020 * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
4021 * VF packets received can work in all cases.
4023 ixgbevf_rlpml_set_vf(hw,
4024 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
4026 /* Setup RX queues */
4027 dev->rx_pkt_burst = ixgbe_recv_pkts;
4028 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4029 rxq = dev->data->rx_queues[i];
4031 /* Allocate buffers for descriptor rings */
4032 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
4036 /* Setup the Base and Length of the Rx Descriptor Rings */
4037 bus_addr = rxq->rx_ring_phys_addr;
4039 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
4040 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4041 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
4042 (uint32_t)(bus_addr >> 32));
4043 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
4044 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4045 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
4046 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
4049 /* Configure the SRRCTL register */
4050 #ifdef RTE_HEADER_SPLIT_ENABLE
4052 * Configure Header Split
4054 if (dev->data->dev_conf.rxmode.header_split) {
4056 /* Must setup the PSRTYPE register */
4058 psrtype = IXGBE_PSRTYPE_TCPHDR |
4059 IXGBE_PSRTYPE_UDPHDR |
4060 IXGBE_PSRTYPE_IPV4HDR |
4061 IXGBE_PSRTYPE_IPV6HDR;
4063 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
4065 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
4066 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4067 IXGBE_SRRCTL_BSIZEHDR_MASK);
4068 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4071 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4073 /* Set if packets are dropped when no descriptors available */
4075 srrctl |= IXGBE_SRRCTL_DROP_EN;
4078 * Configure the RX buffer size in the BSIZEPACKET field of
4079 * the SRRCTL register of the queue.
4080 * The value is in 1 KB resolution. Valid values can be from
4083 mbp_priv = rte_mempool_get_priv(rxq->mb_pool);
4084 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
4085 RTE_PKTMBUF_HEADROOM);
4086 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4087 IXGBE_SRRCTL_BSIZEPKT_MASK);
4090 * VF modification to write virtual function SRRCTL register
4092 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
4094 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4095 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4097 /* It adds dual VLAN length for supporting dual VLAN */
4098 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
4099 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
4100 if (!dev->data->scattered_rx)
4101 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
4102 dev->data->scattered_rx = 1;
4103 #ifdef RTE_IXGBE_INC_VECTOR
4104 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4106 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
4111 if (dev->data->dev_conf.rxmode.enable_scatter) {
4112 if (!dev->data->scattered_rx)
4113 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
4114 #ifdef RTE_IXGBE_INC_VECTOR
4115 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4117 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
4119 dev->data->scattered_rx = 1;
4126 * [VF] Initializes Transmit Unit.
4129 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
4131 struct ixgbe_hw *hw;
4132 struct igb_tx_queue *txq;
4137 PMD_INIT_FUNC_TRACE();
4138 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4140 /* Setup the Base and Length of the Tx Descriptor Rings */
4141 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4142 txq = dev->data->tx_queues[i];
4143 bus_addr = txq->tx_ring_phys_addr;
4144 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
4145 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4146 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
4147 (uint32_t)(bus_addr >> 32));
4148 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
4149 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4150 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4151 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
4152 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
4155 * Disable Tx Head Writeback RO bit, since this hoses
4156 * bookkeeping if things aren't delivered in order.
4158 txctrl = IXGBE_READ_REG(hw,
4159 IXGBE_VFDCA_TXCTRL(i));
4160 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4161 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
4167 * [VF] Start Transmit and Receive Units.
4170 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
4172 struct ixgbe_hw *hw;
4173 struct igb_tx_queue *txq;
4174 struct igb_rx_queue *rxq;
4180 PMD_INIT_FUNC_TRACE();
4181 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4183 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4184 txq = dev->data->tx_queues[i];
4185 /* Setup Transmit Threshold Registers */
4186 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4187 txdctl |= txq->pthresh & 0x7F;
4188 txdctl |= ((txq->hthresh & 0x7F) << 8);
4189 txdctl |= ((txq->wthresh & 0x7F) << 16);
4190 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4193 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4195 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4196 txdctl |= IXGBE_TXDCTL_ENABLE;
4197 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
4200 /* Wait until TX Enable ready */
4203 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
4204 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4206 PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
4208 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4210 rxq = dev->data->rx_queues[i];
4212 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4213 rxdctl |= IXGBE_RXDCTL_ENABLE;
4214 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
4216 /* Wait until RX Enable ready */
4220 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
4221 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4223 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
4225 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);