4 * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
47 #include <rte_byteorder.h>
48 #include <rte_common.h>
49 #include <rte_cycles.h>
51 #include <rte_debug.h>
52 #include <rte_interrupts.h>
54 #include <rte_memory.h>
55 #include <rte_memzone.h>
56 #include <rte_launch.h>
57 #include <rte_tailq.h>
59 #include <rte_per_lcore.h>
60 #include <rte_lcore.h>
61 #include <rte_atomic.h>
62 #include <rte_branch_prediction.h>
64 #include <rte_mempool.h>
65 #include <rte_malloc.h>
67 #include <rte_ether.h>
68 #include <rte_ethdev.h>
69 #include <rte_prefetch.h>
73 #include <rte_string_fns.h>
74 #include <rte_errno.h>
76 #include "ixgbe_logs.h"
77 #include "ixgbe/ixgbe_api.h"
78 #include "ixgbe/ixgbe_vf.h"
79 #include "ixgbe_ethdev.h"
80 #include "ixgbe/ixgbe_dcb.h"
83 #define RTE_PMD_IXGBE_TX_MAX_BURST 32
85 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
86 #define RTE_PMD_IXGBE_RX_MAX_BURST 32
89 static inline struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
94 m = __rte_mbuf_raw_alloc(mp);
95 __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
99 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
100 (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->pkt.data) - \
101 (char *)(mb)->buf_addr))
103 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
104 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
107 * Structure associated with each descriptor of the RX ring of a RX queue.
109 struct igb_rx_entry {
110 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
114 * Structure associated with each descriptor of the TX ring of a TX queue.
116 struct igb_tx_entry {
117 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
118 uint16_t next_id; /**< Index of next descriptor in ring. */
119 uint16_t last_id; /**< Index of last scattered descriptor. */
123 * Structure associated with each RX queue.
125 struct igb_rx_queue {
126 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
127 volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
128 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
129 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
130 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
131 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
132 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
133 uint16_t nb_rx_desc; /**< number of RX descriptors. */
134 uint16_t rx_tail; /**< current value of RDT register. */
135 uint16_t nb_rx_hold; /**< number of held free RX desc. */
136 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
137 uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
138 uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
139 uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
141 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
142 uint16_t queue_id; /**< RX queue index. */
143 uint8_t port_id; /**< Device port identifier. */
144 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
145 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
146 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
147 /** need to alloc dummy mbuf, for wraparound when scanning hw ring */
148 struct rte_mbuf fake_mbuf;
149 /** hold packets to return to application */
150 struct rte_mbuf *rx_stage[RTE_PMD_IXGBE_RX_MAX_BURST*2];
155 * IXGBE CTX Constants
157 enum ixgbe_advctx_num {
158 IXGBE_CTX_0 = 0, /**< CTX0 */
159 IXGBE_CTX_1 = 1, /**< CTX1 */
160 IXGBE_CTX_NUM = 2, /**< CTX NUMBER */
164 * Structure to check if new context need be built
167 struct ixgbe_advctx_info {
168 uint16_t flags; /**< ol_flags for context build. */
169 uint32_t cmp_mask; /**< compare mask for vlan_macip_lens */
170 union rte_vlan_macip vlan_macip_lens; /**< vlan, mac ip length. */
174 * Structure associated with each TX queue.
176 struct igb_tx_queue {
177 /** TX ring virtual address. */
178 volatile union ixgbe_adv_tx_desc *tx_ring;
179 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
180 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
181 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
182 uint16_t nb_tx_desc; /**< number of TX descriptors. */
183 uint16_t tx_tail; /**< current value of TDT reg. */
184 uint16_t tx_free_thresh;/**< minimum TX before freeing. */
185 /** Number of TX descriptors to use before RS bit is set. */
186 uint16_t tx_rs_thresh;
187 /** Number of TX descriptors used since RS bit was set. */
189 /** Index to last TX descriptor to have been cleaned. */
190 uint16_t last_desc_cleaned;
191 /** Total number of TX descriptors ready to be allocated. */
193 uint16_t tx_next_dd; /**< next desc to scan for DD bit */
194 uint16_t tx_next_rs; /**< next desc to set RS bit */
195 uint16_t queue_id; /**< TX queue index. */
196 uint8_t port_id; /**< Device port identifier. */
197 uint8_t pthresh; /**< Prefetch threshold register. */
198 uint8_t hthresh; /**< Host threshold register. */
199 uint8_t wthresh; /**< Write-back threshold reg. */
200 uint32_t txq_flags; /**< Holds flags for this TXq */
201 uint32_t ctx_curr; /**< Hardware context states. */
202 /** Hardware context0 history. */
203 struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];
208 #define RTE_PMD_USE_PREFETCH
211 #ifdef RTE_PMD_USE_PREFETCH
213 * Prefetch a cache line into all cache levels.
215 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
217 #define rte_ixgbe_prefetch(p) do {} while(0)
220 #ifdef RTE_PMD_PACKET_PREFETCH
221 #define rte_packet_prefetch(p) rte_prefetch1(p)
223 #define rte_packet_prefetch(p) do {} while(0)
226 /*********************************************************************
230 **********************************************************************/
233 * The "simple" TX queue functions require that the following
234 * flags are set when the TX queue is configured:
235 * - ETH_TXQ_FLAGS_NOMULTSEGS
236 * - ETH_TXQ_FLAGS_NOVLANOFFL
237 * - ETH_TXQ_FLAGS_NOXSUMSCTP
238 * - ETH_TXQ_FLAGS_NOXSUMUDP
239 * - ETH_TXQ_FLAGS_NOXSUMTCP
240 * and that the RS bit threshold (tx_rs_thresh) is at least equal to
241 * RTE_PMD_IXGBE_TX_MAX_BURST.
243 #define IXGBE_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
244 ETH_TXQ_FLAGS_NOOFFLOADS)
247 * Check for descriptors with their DD bit set and free mbufs.
248 * Return the total number of buffers freed.
251 ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
253 struct igb_tx_entry *txep;
257 /* check DD bit on threshold descriptor */
258 status = txq->tx_ring[txq->tx_next_dd].wb.status;
259 if (! (status & IXGBE_ADVTXD_STAT_DD))
263 * first buffer to free from S/W ring is at index
264 * tx_next_dd - (tx_rs_thresh-1)
266 txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
268 /* prefetch the mbufs that are about to be freed */
269 for (i = 0; i < txq->tx_rs_thresh; ++i)
270 rte_prefetch0((txep + i)->mbuf);
272 /* free buffers one at a time */
273 if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
274 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
275 rte_mempool_put(txep->mbuf->pool, txep->mbuf);
279 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
280 rte_pktmbuf_free_seg(txep->mbuf);
285 /* buffers were freed, update counters */
286 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
287 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
288 if (txq->tx_next_dd >= txq->nb_tx_desc)
289 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
291 return txq->tx_rs_thresh;
295 * Populate descriptors with the following info:
296 * 1.) buffer_addr = phys_addr + headroom
297 * 2.) cmd_type_len = DCMD_DTYP_FLAGS | pkt_len
298 * 3.) olinfo_status = pkt_len << PAYLEN_SHIFT
301 /* Defines for Tx descriptor */
302 #define DCMD_DTYP_FLAGS (IXGBE_ADVTXD_DTYP_DATA |\
303 IXGBE_ADVTXD_DCMD_IFCS |\
304 IXGBE_ADVTXD_DCMD_DEXT |\
305 IXGBE_ADVTXD_DCMD_EOP)
307 /* Populate 4 descriptors with data from 4 mbufs */
309 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
311 uint64_t buf_dma_addr;
315 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
316 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
317 pkt_len = (*pkts)->pkt.data_len;
319 /* write data to descriptor */
320 txdp->read.buffer_addr = buf_dma_addr;
321 txdp->read.cmd_type_len =
322 ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
323 txdp->read.olinfo_status =
324 (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
328 /* Populate 1 descriptor with data from 1 mbuf */
330 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
332 uint64_t buf_dma_addr;
335 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
336 pkt_len = (*pkts)->pkt.data_len;
338 /* write data to descriptor */
339 txdp->read.buffer_addr = buf_dma_addr;
340 txdp->read.cmd_type_len =
341 ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
342 txdp->read.olinfo_status =
343 (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
347 * Fill H/W descriptor ring with mbuf data.
348 * Copy mbuf pointers to the S/W ring.
351 ixgbe_tx_fill_hw_ring(struct igb_tx_queue *txq, struct rte_mbuf **pkts,
354 volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
355 struct igb_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
356 const int N_PER_LOOP = 4;
357 const int N_PER_LOOP_MASK = N_PER_LOOP-1;
358 int mainpart, leftover;
362 * Process most of the packets in chunks of N pkts. Any
363 * leftover packets will get processed one at a time.
365 mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
366 leftover = (nb_pkts & ((uint32_t) N_PER_LOOP_MASK));
367 for (i = 0; i < mainpart; i += N_PER_LOOP) {
368 /* Copy N mbuf pointers to the S/W ring */
369 for (j = 0; j < N_PER_LOOP; ++j) {
370 (txep + i + j)->mbuf = *(pkts + i + j);
372 tx4(txdp + i, pkts + i);
375 if (unlikely(leftover > 0)) {
376 for (i = 0; i < leftover; ++i) {
377 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
378 tx1(txdp + mainpart + i, pkts + mainpart + i);
383 static inline uint16_t
384 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
387 struct igb_tx_queue *txq = (struct igb_tx_queue *)tx_queue;
388 volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
392 * Begin scanning the H/W ring for done descriptors when the
393 * number of available descriptors drops below tx_free_thresh. For
394 * each done descriptor, free the associated buffer.
396 if (txq->nb_tx_free < txq->tx_free_thresh)
397 ixgbe_tx_free_bufs(txq);
399 /* Only use descriptors that are available */
400 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
401 if (unlikely(nb_pkts == 0))
404 /* Use exactly nb_pkts descriptors */
405 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
408 * At this point, we know there are enough descriptors in the
409 * ring to transmit all the packets. This assumes that each
410 * mbuf contains a single segment, and that no new offloads
411 * are expected, which would require a new context descriptor.
415 * See if we're going to wrap-around. If so, handle the top
416 * of the descriptor ring first, then do the bottom. If not,
417 * the processing looks just like the "bottom" part anyway...
419 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
420 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
421 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
424 * We know that the last descriptor in the ring will need to
425 * have its RS bit set because tx_rs_thresh has to be
426 * a divisor of the ring size
428 tx_r[txq->tx_next_rs].read.cmd_type_len |=
429 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
430 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
435 /* Fill H/W descriptor ring with mbuf data */
436 ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
437 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
440 * Determine if RS bit should be set
441 * This is what we actually want:
442 * if ((txq->tx_tail - 1) >= txq->tx_next_rs)
443 * but instead of subtracting 1 and doing >=, we can just do
444 * greater than without subtracting.
446 if (txq->tx_tail > txq->tx_next_rs) {
447 tx_r[txq->tx_next_rs].read.cmd_type_len |=
448 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
449 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
451 if (txq->tx_next_rs >= txq->nb_tx_desc)
452 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
456 * Check for wrap-around. This would only happen if we used
457 * up to the last descriptor in the ring, no more, no less.
459 if (txq->tx_tail >= txq->nb_tx_desc)
462 /* update tail pointer */
464 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
470 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
475 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
476 if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
477 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
479 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
483 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
484 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
485 nb_tx = (uint16_t)(nb_tx + ret);
486 nb_pkts = (uint16_t)(nb_pkts - ret);
495 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
496 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
497 uint16_t ol_flags, uint32_t vlan_macip_lens)
499 uint32_t type_tucmd_mlhl;
500 uint32_t mss_l4len_idx;
504 ctx_idx = txq->ctx_curr;
508 if (ol_flags & PKT_TX_VLAN_PKT) {
509 cmp_mask |= TX_VLAN_CMP_MASK;
512 if (ol_flags & PKT_TX_IP_CKSUM) {
513 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
514 cmp_mask |= TX_MAC_LEN_CMP_MASK;
517 /* Specify which HW CTX to upload. */
518 mss_l4len_idx = (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
519 switch (ol_flags & PKT_TX_L4_MASK) {
520 case PKT_TX_UDP_CKSUM:
521 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
522 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
523 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
524 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
526 case PKT_TX_TCP_CKSUM:
527 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
528 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
529 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
530 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
532 case PKT_TX_SCTP_CKSUM:
533 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
534 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
535 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
536 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
539 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
540 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
544 txq->ctx_cache[ctx_idx].flags = ol_flags;
545 txq->ctx_cache[ctx_idx].cmp_mask = cmp_mask;
546 txq->ctx_cache[ctx_idx].vlan_macip_lens.data =
547 vlan_macip_lens & cmp_mask;
549 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
550 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
551 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
552 ctx_txd->seqnum_seed = 0;
556 * Check which hardware context can be used. Use the existing match
557 * or create a new context descriptor.
559 static inline uint32_t
560 what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
561 uint32_t vlan_macip_lens)
563 /* If match with the current used context */
564 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
565 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
566 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
567 return txq->ctx_curr;
570 /* What if match with the next context */
572 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
573 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
574 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
575 return txq->ctx_curr;
578 /* Mismatch, use the previous context */
579 return (IXGBE_CTX_NUM);
582 static inline uint32_t
583 tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
585 static const uint32_t l4_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_TXSM};
586 static const uint32_t l3_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_IXSM};
589 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
590 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
594 static inline uint32_t
595 tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
597 static const uint32_t vlan_cmd[2] = {0, IXGBE_ADVTXD_DCMD_VLE};
598 return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
601 /* Default RS bit threshold values */
602 #ifndef DEFAULT_TX_RS_THRESH
603 #define DEFAULT_TX_RS_THRESH 32
605 #ifndef DEFAULT_TX_FREE_THRESH
606 #define DEFAULT_TX_FREE_THRESH 32
609 /* Reset transmit descriptors after they have been used */
611 ixgbe_xmit_cleanup(struct igb_tx_queue *txq)
613 struct igb_tx_entry *sw_ring = txq->sw_ring;
614 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
615 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
616 uint16_t nb_tx_desc = txq->nb_tx_desc;
617 uint16_t desc_to_clean_to;
618 uint16_t nb_tx_to_clean;
620 /* Determine the last descriptor needing to be cleaned */
621 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
622 if (desc_to_clean_to >= nb_tx_desc)
623 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
625 /* Check to make sure the last descriptor to clean is done */
626 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
627 if (! (txr[desc_to_clean_to].wb.status & IXGBE_TXD_STAT_DD))
629 PMD_TX_FREE_LOG(DEBUG,
630 "TX descriptor %4u is not done"
631 "(port=%d queue=%d)",
633 txq->port_id, txq->queue_id);
634 /* Failed to clean any descriptors, better luck next time */
638 /* Figure out how many descriptors will be cleaned */
639 if (last_desc_cleaned > desc_to_clean_to)
640 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
643 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
646 PMD_TX_FREE_LOG(DEBUG,
647 "Cleaning %4u TX descriptors: %4u to %4u "
648 "(port=%d queue=%d)",
649 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
650 txq->port_id, txq->queue_id);
653 * The last descriptor to clean is done, so that means all the
654 * descriptors from the last descriptor that was cleaned
655 * up to the last descriptor with the RS bit set
656 * are done. Only reset the threshold descriptor.
658 txr[desc_to_clean_to].wb.status = 0;
660 /* Update the txq to reflect the last descriptor that was cleaned */
661 txq->last_desc_cleaned = desc_to_clean_to;
662 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
669 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
672 struct igb_tx_queue *txq;
673 struct igb_tx_entry *sw_ring;
674 struct igb_tx_entry *txe, *txn;
675 volatile union ixgbe_adv_tx_desc *txr;
676 volatile union ixgbe_adv_tx_desc *txd;
677 struct rte_mbuf *tx_pkt;
678 struct rte_mbuf *m_seg;
679 uint64_t buf_dma_addr;
680 uint32_t olinfo_status;
681 uint32_t cmd_type_len;
690 uint32_t vlan_macip_lens;
695 sw_ring = txq->sw_ring;
697 tx_id = txq->tx_tail;
698 txe = &sw_ring[tx_id];
700 /* Determine if the descriptor ring needs to be cleaned. */
701 if ((txq->nb_tx_desc - txq->nb_tx_free) > txq->tx_free_thresh) {
702 ixgbe_xmit_cleanup(txq);
706 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
709 pkt_len = tx_pkt->pkt.pkt_len;
711 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
714 * Determine how many (if any) context descriptors
715 * are needed for offload functionality.
717 ol_flags = tx_pkt->ol_flags;
718 vlan_macip_lens = tx_pkt->pkt.vlan_macip.data;
720 /* If hardware offload required */
721 tx_ol_req = (uint16_t)(ol_flags & PKT_TX_OFFLOAD_MASK);
723 /* If new context need be built or reuse the exist ctx. */
724 ctx = what_advctx_update(txq, tx_ol_req,
726 /* Only allocate context descriptor if required*/
727 new_ctx = (ctx == IXGBE_CTX_NUM);
732 * Keep track of how many descriptors are used this loop
733 * This will always be the number of segments + the number of
734 * Context descriptors required to transmit the packet
736 nb_used = (uint16_t)(tx_pkt->pkt.nb_segs + new_ctx);
739 * The number of descriptors that must be allocated for a
740 * packet is the number of segments of that packet, plus 1
741 * Context Descriptor for the hardware offload, if any.
742 * Determine the last TX descriptor to allocate in the TX ring
743 * for the packet, starting from the current position (tx_id)
746 tx_last = (uint16_t) (tx_id + nb_used - 1);
749 if (tx_last >= txq->nb_tx_desc)
750 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
752 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
753 " tx_first=%u tx_last=%u\n",
754 (unsigned) txq->port_id,
755 (unsigned) txq->queue_id,
761 * Make sure there are enough TX descriptors available to
762 * transmit the entire packet.
763 * nb_used better be less than or equal to txq->tx_rs_thresh
765 if (nb_used > txq->nb_tx_free) {
766 PMD_TX_FREE_LOG(DEBUG,
767 "Not enough free TX descriptors "
768 "nb_used=%4u nb_free=%4u "
769 "(port=%d queue=%d)",
770 nb_used, txq->nb_tx_free,
771 txq->port_id, txq->queue_id);
773 if (ixgbe_xmit_cleanup(txq) != 0) {
774 /* Could not clean any descriptors */
780 /* nb_used better be <= txq->tx_rs_thresh */
781 if (unlikely(nb_used > txq->tx_rs_thresh)) {
782 PMD_TX_FREE_LOG(DEBUG,
783 "The number of descriptors needed to "
784 "transmit the packet exceeds the "
785 "RS bit threshold. This will impact "
787 "nb_used=%4u nb_free=%4u "
789 "(port=%d queue=%d)",
790 nb_used, txq->nb_tx_free,
792 txq->port_id, txq->queue_id);
794 * Loop here until there are enough TX
795 * descriptors or until the ring cannot be
798 while (nb_used > txq->nb_tx_free) {
799 if (ixgbe_xmit_cleanup(txq) != 0) {
801 * Could not clean any
813 * By now there are enough free TX descriptors to transmit
818 * Set common flags of all TX Data Descriptors.
820 * The following bits must be set in all Data Descriptors:
821 * - IXGBE_ADVTXD_DTYP_DATA
822 * - IXGBE_ADVTXD_DCMD_DEXT
824 * The following bits must be set in the first Data Descriptor
825 * and are ignored in the other ones:
826 * - IXGBE_ADVTXD_DCMD_IFCS
827 * - IXGBE_ADVTXD_MAC_1588
828 * - IXGBE_ADVTXD_DCMD_VLE
830 * The following bits must only be set in the last Data
832 * - IXGBE_TXD_CMD_EOP
834 * The following bits can be set in any Data Descriptor, but
835 * are only set in the last Data Descriptor:
838 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
839 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
840 olinfo_status = (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
841 #ifdef RTE_LIBRTE_IEEE1588
842 if (ol_flags & PKT_TX_IEEE1588_TMST)
843 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
848 * Setup the TX Advanced Context Descriptor if required
851 volatile struct ixgbe_adv_tx_context_desc *
854 ctx_txd = (volatile struct
855 ixgbe_adv_tx_context_desc *)
858 txn = &sw_ring[txe->next_id];
859 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
861 if (txe->mbuf != NULL) {
862 rte_pktmbuf_free_seg(txe->mbuf);
866 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
869 txe->last_id = tx_last;
870 tx_id = txe->next_id;
875 * Setup the TX Advanced Data Descriptor,
876 * This path will go through
877 * whatever new/reuse the context descriptor
879 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
880 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
881 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
887 txn = &sw_ring[txe->next_id];
889 if (txe->mbuf != NULL)
890 rte_pktmbuf_free_seg(txe->mbuf);
894 * Set up Transmit Data Descriptor.
896 slen = m_seg->pkt.data_len;
897 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
898 txd->read.buffer_addr =
899 rte_cpu_to_le_64(buf_dma_addr);
900 txd->read.cmd_type_len =
901 rte_cpu_to_le_32(cmd_type_len | slen);
902 txd->read.olinfo_status =
903 rte_cpu_to_le_32(olinfo_status);
904 txe->last_id = tx_last;
905 tx_id = txe->next_id;
907 m_seg = m_seg->pkt.next;
908 } while (m_seg != NULL);
911 * The last packet data descriptor needs End Of Packet (EOP)
913 cmd_type_len |= IXGBE_TXD_CMD_EOP;
914 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
915 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
917 /* Set RS bit only on threshold packets' last descriptor */
918 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
919 PMD_TX_FREE_LOG(DEBUG,
920 "Setting RS bit on TXD id="
921 "%4u (port=%d queue=%d)",
922 tx_last, txq->port_id, txq->queue_id);
924 cmd_type_len |= IXGBE_TXD_CMD_RS;
926 /* Update txq RS bit counters */
929 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
935 * Set the Transmit Descriptor Tail (TDT)
937 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
938 (unsigned) txq->port_id, (unsigned) txq->queue_id,
939 (unsigned) tx_id, (unsigned) nb_tx);
940 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
941 txq->tx_tail = tx_id;
946 /*********************************************************************
950 **********************************************************************/
951 static inline uint16_t
952 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
956 static uint16_t ip_pkt_types_map[16] = {
957 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
958 PKT_RX_IPV6_HDR, 0, 0, 0,
959 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
960 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
963 static uint16_t ip_rss_types_map[16] = {
964 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
965 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
966 PKT_RX_RSS_HASH, 0, 0, 0,
967 0, 0, 0, PKT_RX_FDIR,
970 #ifdef RTE_LIBRTE_IEEE1588
971 static uint32_t ip_pkt_etqf_map[8] = {
972 0, 0, 0, PKT_RX_IEEE1588_PTP,
976 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ?
977 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
978 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
980 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ? 0 :
981 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
984 return (uint16_t)(pkt_flags | ip_rss_types_map[hl_tp_rs & 0xF]);
987 static inline uint16_t
988 rx_desc_status_to_pkt_flags(uint32_t rx_status)
993 * Check if VLAN present only.
994 * Do not check whether L3/L4 rx checksum done by NIC or not,
995 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
997 pkt_flags = (uint16_t)((rx_status & IXGBE_RXD_STAT_VP) ?
998 PKT_RX_VLAN_PKT : 0);
1000 #ifdef RTE_LIBRTE_IEEE1588
1001 if (rx_status & IXGBE_RXD_STAT_TMST)
1002 pkt_flags = (uint16_t)(pkt_flags | PKT_RX_IEEE1588_TMST);
1007 static inline uint16_t
1008 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1011 * Bit 31: IPE, IPv4 checksum error
1012 * Bit 30: L4I, L4I integrity error
1014 static uint16_t error_to_pkt_flags_map[4] = {
1015 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1016 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1018 return error_to_pkt_flags_map[(rx_status >>
1019 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1022 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
1024 * LOOK_AHEAD defines how many desc statuses to check beyond the
1025 * current descriptor.
1026 * It must be a pound define for optimal performance.
1027 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1028 * function only works with LOOK_AHEAD=8.
1030 #define LOOK_AHEAD 8
1031 #if (LOOK_AHEAD != 8)
1032 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1035 ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq)
1037 volatile union ixgbe_adv_rx_desc *rxdp;
1038 struct igb_rx_entry *rxep;
1039 struct rte_mbuf *mb;
1041 int s[LOOK_AHEAD], nb_dd;
1042 int i, j, nb_rx = 0;
1045 /* get references to current descriptor and S/W ring entry */
1046 rxdp = &rxq->rx_ring[rxq->rx_tail];
1047 rxep = &rxq->sw_ring[rxq->rx_tail];
1049 /* check to make sure there is at least 1 packet to receive */
1050 if (! (rxdp->wb.upper.status_error & IXGBE_RXDADV_STAT_DD))
1054 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1055 * reference packets that are ready to be received.
1057 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1058 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1060 /* Read desc statuses backwards to avoid race condition */
1061 for (j = LOOK_AHEAD-1; j >= 0; --j)
1062 s[j] = rxdp[j].wb.upper.status_error;
1064 /* Clear everything but the status bits (LSB) */
1065 for (j = 0; j < LOOK_AHEAD; ++j)
1066 s[j] &= IXGBE_RXDADV_STAT_DD;
1068 /* Compute how many status bits were set */
1069 nb_dd = s[0]+s[1]+s[2]+s[3]+s[4]+s[5]+s[6]+s[7];
1072 /* Translate descriptor info to mbuf format */
1073 for (j = 0; j < nb_dd; ++j) {
1075 pkt_len = (uint16_t)(rxdp[j].wb.upper.length -
1077 mb->pkt.data_len = pkt_len;
1078 mb->pkt.pkt_len = pkt_len;
1079 mb->pkt.vlan_macip.f.vlan_tci = rxdp[j].wb.upper.vlan;
1080 mb->pkt.hash.rss = rxdp[j].wb.lower.hi_dword.rss;
1082 /* convert descriptor fields to rte mbuf flags */
1083 mb->ol_flags = rx_desc_hlen_type_rss_to_pkt_flags(
1084 rxdp[j].wb.lower.lo_dword.data);
1085 /* reuse status field from scan list */
1086 mb->ol_flags = (uint16_t)(mb->ol_flags |
1087 rx_desc_status_to_pkt_flags(s[j]));
1088 mb->ol_flags = (uint16_t)(mb->ol_flags |
1089 rx_desc_error_to_pkt_flags(s[j]));
1092 /* Move mbuf pointers from the S/W ring to the stage */
1093 for (j = 0; j < LOOK_AHEAD; ++j) {
1094 rxq->rx_stage[i + j] = rxep[j].mbuf;
1097 /* stop if all requested packets could not be received */
1098 if (nb_dd != LOOK_AHEAD)
1102 /* clear software ring entries so we can cleanup correctly */
1103 for (i = 0; i < nb_rx; ++i) {
1104 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1112 ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
1114 volatile union ixgbe_adv_rx_desc *rxdp;
1115 struct igb_rx_entry *rxep;
1116 struct rte_mbuf *mb;
1121 /* allocate buffers in bulk directly into the S/W ring */
1122 alloc_idx = (uint16_t)(rxq->rx_free_trigger -
1123 (rxq->rx_free_thresh - 1));
1124 rxep = &rxq->sw_ring[alloc_idx];
1125 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1126 rxq->rx_free_thresh);
1127 if (unlikely(diag != 0))
1130 rxdp = &rxq->rx_ring[alloc_idx];
1131 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1132 /* populate the static rte mbuf fields */
1134 rte_mbuf_refcnt_set(mb, 1);
1135 mb->type = RTE_MBUF_PKT;
1136 mb->pkt.next = NULL;
1137 mb->pkt.data = (char *)mb->buf_addr + RTE_PKTMBUF_HEADROOM;
1138 mb->pkt.nb_segs = 1;
1139 mb->pkt.in_port = rxq->port_id;
1141 /* populate the descriptors */
1142 dma_addr = (uint64_t)mb->buf_physaddr + RTE_PKTMBUF_HEADROOM;
1143 rxdp[i].read.hdr_addr = dma_addr;
1144 rxdp[i].read.pkt_addr = dma_addr;
1147 /* update tail pointer */
1149 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rxq->rx_free_trigger);
1151 /* update state of internal queue structure */
1152 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_trigger +
1153 rxq->rx_free_thresh);
1154 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1155 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
1161 static inline uint16_t
1162 ixgbe_rx_fill_from_stage(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1165 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1168 /* how many packets are ready to return? */
1169 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1171 /* copy mbuf pointers to the application's packet list */
1172 for (i = 0; i < nb_pkts; ++i)
1173 rx_pkts[i] = stage[i];
1175 /* update internal queue state */
1176 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1177 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1182 static inline uint16_t
1183 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1186 struct igb_rx_queue *rxq = (struct igb_rx_queue *)rx_queue;
1189 /* Any previously recv'd pkts will be returned from the Rx stage */
1190 if (rxq->rx_nb_avail)
1191 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1193 /* Scan the H/W ring for packets to receive */
1194 nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1196 /* update internal queue state */
1197 rxq->rx_next_avail = 0;
1198 rxq->rx_nb_avail = nb_rx;
1199 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1201 /* if required, allocate new buffers to replenish descriptors */
1202 if (rxq->rx_tail > rxq->rx_free_trigger) {
1203 if (ixgbe_rx_alloc_bufs(rxq) != 0) {
1205 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1206 "queue_id=%u\n", (unsigned) rxq->port_id,
1207 (unsigned) rxq->queue_id);
1209 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1210 rxq->rx_free_thresh;
1213 * Need to rewind any previous receives if we cannot
1214 * allocate new buffers to replenish the old ones.
1216 rxq->rx_nb_avail = 0;
1217 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1218 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1219 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1225 if (rxq->rx_tail >= rxq->nb_rx_desc)
1228 /* received any packets this loop? */
1229 if (rxq->rx_nb_avail)
1230 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1235 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1237 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1242 if (unlikely(nb_pkts == 0))
1245 if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1246 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1248 /* request is relatively large, chunk it up */
1252 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1253 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1254 nb_rx = (uint16_t)(nb_rx + ret);
1255 nb_pkts = (uint16_t)(nb_pkts - ret);
1262 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
1265 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1268 struct igb_rx_queue *rxq;
1269 volatile union ixgbe_adv_rx_desc *rx_ring;
1270 volatile union ixgbe_adv_rx_desc *rxdp;
1271 struct igb_rx_entry *sw_ring;
1272 struct igb_rx_entry *rxe;
1273 struct rte_mbuf *rxm;
1274 struct rte_mbuf *nmb;
1275 union ixgbe_adv_rx_desc rxd;
1278 uint32_t hlen_type_rss;
1288 rx_id = rxq->rx_tail;
1289 rx_ring = rxq->rx_ring;
1290 sw_ring = rxq->sw_ring;
1291 while (nb_rx < nb_pkts) {
1293 * The order of operations here is important as the DD status
1294 * bit must not be read after any other descriptor fields.
1295 * rx_ring and rxdp are pointing to volatile data so the order
1296 * of accesses cannot be reordered by the compiler. If they were
1297 * not volatile, they could be reordered which could lead to
1298 * using invalid descriptor fields when read from rxd.
1300 rxdp = &rx_ring[rx_id];
1301 staterr = rxdp->wb.upper.status_error;
1302 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1309 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1310 * is likely to be invalid and to be dropped by the various
1311 * validation checks performed by the network stack.
1313 * Allocate a new mbuf to replenish the RX ring descriptor.
1314 * If the allocation fails:
1315 * - arrange for that RX descriptor to be the first one
1316 * being parsed the next time the receive function is
1317 * invoked [on the same queue].
1319 * - Stop parsing the RX ring and return immediately.
1321 * This policy do not drop the packet received in the RX
1322 * descriptor for which the allocation of a new mbuf failed.
1323 * Thus, it allows that packet to be later retrieved if
1324 * mbuf have been freed in the mean time.
1325 * As a side effect, holding RX descriptors instead of
1326 * systematically giving them back to the NIC may lead to
1327 * RX ring exhaustion situations.
1328 * However, the NIC can gracefully prevent such situations
1329 * to happen by sending specific "back-pressure" flow control
1330 * frames to its peer(s).
1332 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1333 "ext_err_stat=0x%08x pkt_len=%u\n",
1334 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1335 (unsigned) rx_id, (unsigned) staterr,
1336 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1338 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1340 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1341 "queue_id=%u\n", (unsigned) rxq->port_id,
1342 (unsigned) rxq->queue_id);
1343 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1348 rxe = &sw_ring[rx_id];
1350 if (rx_id == rxq->nb_rx_desc)
1353 /* Prefetch next mbuf while processing current one. */
1354 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1357 * When next RX descriptor is on a cache-line boundary,
1358 * prefetch the next 4 RX descriptors and the next 8 pointers
1361 if ((rx_id & 0x3) == 0) {
1362 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1363 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1369 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1370 rxdp->read.hdr_addr = dma_addr;
1371 rxdp->read.pkt_addr = dma_addr;
1374 * Initialize the returned mbuf.
1375 * 1) setup generic mbuf fields:
1376 * - number of segments,
1379 * - RX port identifier.
1380 * 2) integrate hardware offload data, if any:
1381 * - RSS flag & hash,
1382 * - IP checksum flag,
1383 * - VLAN TCI, if any,
1386 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1388 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1389 rte_packet_prefetch(rxm->pkt.data);
1390 rxm->pkt.nb_segs = 1;
1391 rxm->pkt.next = NULL;
1392 rxm->pkt.pkt_len = pkt_len;
1393 rxm->pkt.data_len = pkt_len;
1394 rxm->pkt.in_port = rxq->port_id;
1396 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1397 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1398 rxm->pkt.vlan_macip.f.vlan_tci =
1399 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1401 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1402 pkt_flags = (uint16_t)(pkt_flags |
1403 rx_desc_status_to_pkt_flags(staterr));
1404 pkt_flags = (uint16_t)(pkt_flags |
1405 rx_desc_error_to_pkt_flags(staterr));
1406 rxm->ol_flags = pkt_flags;
1408 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1409 rxm->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1410 else if (pkt_flags & PKT_RX_FDIR) {
1411 rxm->pkt.hash.fdir.hash =
1412 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1413 & IXGBE_ATR_HASH_MASK);
1414 rxm->pkt.hash.fdir.id = rxd.wb.lower.hi_dword.csum_ip.ip_id;
1417 * Store the mbuf address into the next entry of the array
1418 * of returned packets.
1420 rx_pkts[nb_rx++] = rxm;
1422 rxq->rx_tail = rx_id;
1425 * If the number of free RX descriptors is greater than the RX free
1426 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1428 * Update the RDT with the value of the last processed RX descriptor
1429 * minus 1, to guarantee that the RDT register is never equal to the
1430 * RDH register, which creates a "full" ring situtation from the
1431 * hardware point of view...
1433 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1434 if (nb_hold > rxq->rx_free_thresh) {
1435 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1436 "nb_hold=%u nb_rx=%u\n",
1437 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1438 (unsigned) rx_id, (unsigned) nb_hold,
1440 rx_id = (uint16_t) ((rx_id == 0) ?
1441 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1442 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1445 rxq->nb_rx_hold = nb_hold;
1450 ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1453 struct igb_rx_queue *rxq;
1454 volatile union ixgbe_adv_rx_desc *rx_ring;
1455 volatile union ixgbe_adv_rx_desc *rxdp;
1456 struct igb_rx_entry *sw_ring;
1457 struct igb_rx_entry *rxe;
1458 struct rte_mbuf *first_seg;
1459 struct rte_mbuf *last_seg;
1460 struct rte_mbuf *rxm;
1461 struct rte_mbuf *nmb;
1462 union ixgbe_adv_rx_desc rxd;
1463 uint64_t dma; /* Physical address of mbuf data buffer */
1465 uint32_t hlen_type_rss;
1475 rx_id = rxq->rx_tail;
1476 rx_ring = rxq->rx_ring;
1477 sw_ring = rxq->sw_ring;
1480 * Retrieve RX context of current packet, if any.
1482 first_seg = rxq->pkt_first_seg;
1483 last_seg = rxq->pkt_last_seg;
1485 while (nb_rx < nb_pkts) {
1488 * The order of operations here is important as the DD status
1489 * bit must not be read after any other descriptor fields.
1490 * rx_ring and rxdp are pointing to volatile data so the order
1491 * of accesses cannot be reordered by the compiler. If they were
1492 * not volatile, they could be reordered which could lead to
1493 * using invalid descriptor fields when read from rxd.
1495 rxdp = &rx_ring[rx_id];
1496 staterr = rxdp->wb.upper.status_error;
1497 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1504 * Allocate a new mbuf to replenish the RX ring descriptor.
1505 * If the allocation fails:
1506 * - arrange for that RX descriptor to be the first one
1507 * being parsed the next time the receive function is
1508 * invoked [on the same queue].
1510 * - Stop parsing the RX ring and return immediately.
1512 * This policy does not drop the packet received in the RX
1513 * descriptor for which the allocation of a new mbuf failed.
1514 * Thus, it allows that packet to be later retrieved if
1515 * mbuf have been freed in the mean time.
1516 * As a side effect, holding RX descriptors instead of
1517 * systematically giving them back to the NIC may lead to
1518 * RX ring exhaustion situations.
1519 * However, the NIC can gracefully prevent such situations
1520 * to happen by sending specific "back-pressure" flow control
1521 * frames to its peer(s).
1523 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
1524 "staterr=0x%x data_len=%u\n",
1525 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1526 (unsigned) rx_id, (unsigned) staterr,
1527 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1529 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1531 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1532 "queue_id=%u\n", (unsigned) rxq->port_id,
1533 (unsigned) rxq->queue_id);
1534 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1539 rxe = &sw_ring[rx_id];
1541 if (rx_id == rxq->nb_rx_desc)
1544 /* Prefetch next mbuf while processing current one. */
1545 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1548 * When next RX descriptor is on a cache-line boundary,
1549 * prefetch the next 4 RX descriptors and the next 8 pointers
1552 if ((rx_id & 0x3) == 0) {
1553 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1554 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1558 * Update RX descriptor with the physical address of the new
1559 * data buffer of the new allocated mbuf.
1563 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1564 rxdp->read.hdr_addr = dma;
1565 rxdp->read.pkt_addr = dma;
1568 * Set data length & data buffer address of mbuf.
1570 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1571 rxm->pkt.data_len = data_len;
1572 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1575 * If this is the first buffer of the received packet,
1576 * set the pointer to the first mbuf of the packet and
1577 * initialize its context.
1578 * Otherwise, update the total length and the number of segments
1579 * of the current scattered packet, and update the pointer to
1580 * the last mbuf of the current packet.
1582 if (first_seg == NULL) {
1584 first_seg->pkt.pkt_len = data_len;
1585 first_seg->pkt.nb_segs = 1;
1587 first_seg->pkt.pkt_len = (uint16_t)(first_seg->pkt.pkt_len
1589 first_seg->pkt.nb_segs++;
1590 last_seg->pkt.next = rxm;
1594 * If this is not the last buffer of the received packet,
1595 * update the pointer to the last mbuf of the current scattered
1596 * packet and continue to parse the RX ring.
1598 if (! (staterr & IXGBE_RXDADV_STAT_EOP)) {
1604 * This is the last buffer of the received packet.
1605 * If the CRC is not stripped by the hardware:
1606 * - Subtract the CRC length from the total packet length.
1607 * - If the last buffer only contains the whole CRC or a part
1608 * of it, free the mbuf associated to the last buffer.
1609 * If part of the CRC is also contained in the previous
1610 * mbuf, subtract the length of that CRC part from the
1611 * data length of the previous mbuf.
1613 rxm->pkt.next = NULL;
1614 if (unlikely(rxq->crc_len > 0)) {
1615 first_seg->pkt.pkt_len -= ETHER_CRC_LEN;
1616 if (data_len <= ETHER_CRC_LEN) {
1617 rte_pktmbuf_free_seg(rxm);
1618 first_seg->pkt.nb_segs--;
1619 last_seg->pkt.data_len = (uint16_t)
1620 (last_seg->pkt.data_len -
1621 (ETHER_CRC_LEN - data_len));
1622 last_seg->pkt.next = NULL;
1625 (uint16_t) (data_len - ETHER_CRC_LEN);
1629 * Initialize the first mbuf of the returned packet:
1630 * - RX port identifier,
1631 * - hardware offload data, if any:
1632 * - RSS flag & hash,
1633 * - IP checksum flag,
1634 * - VLAN TCI, if any,
1637 first_seg->pkt.in_port = rxq->port_id;
1640 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1641 * set in the pkt_flags field.
1643 first_seg->pkt.vlan_macip.f.vlan_tci =
1644 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1645 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1646 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1647 pkt_flags = (uint16_t)(pkt_flags |
1648 rx_desc_status_to_pkt_flags(staterr));
1649 pkt_flags = (uint16_t)(pkt_flags |
1650 rx_desc_error_to_pkt_flags(staterr));
1651 first_seg->ol_flags = pkt_flags;
1653 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1654 first_seg->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1655 else if (pkt_flags & PKT_RX_FDIR) {
1656 first_seg->pkt.hash.fdir.hash =
1657 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1658 & IXGBE_ATR_HASH_MASK);
1659 first_seg->pkt.hash.fdir.id =
1660 rxd.wb.lower.hi_dword.csum_ip.ip_id;
1663 /* Prefetch data of first segment, if configured to do so. */
1664 rte_packet_prefetch(first_seg->pkt.data);
1667 * Store the mbuf address into the next entry of the array
1668 * of returned packets.
1670 rx_pkts[nb_rx++] = first_seg;
1673 * Setup receipt context for a new packet.
1679 * Record index of the next RX descriptor to probe.
1681 rxq->rx_tail = rx_id;
1684 * Save receive context.
1686 rxq->pkt_first_seg = first_seg;
1687 rxq->pkt_last_seg = last_seg;
1690 * If the number of free RX descriptors is greater than the RX free
1691 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1693 * Update the RDT with the value of the last processed RX descriptor
1694 * minus 1, to guarantee that the RDT register is never equal to the
1695 * RDH register, which creates a "full" ring situtation from the
1696 * hardware point of view...
1698 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1699 if (nb_hold > rxq->rx_free_thresh) {
1700 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1701 "nb_hold=%u nb_rx=%u\n",
1702 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1703 (unsigned) rx_id, (unsigned) nb_hold,
1705 rx_id = (uint16_t) ((rx_id == 0) ?
1706 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1707 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1710 rxq->nb_rx_hold = nb_hold;
1714 /*********************************************************************
1716 * Queue management functions
1718 **********************************************************************/
1721 * Rings setup and release.
1723 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1724 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
1725 * also optimize cache line size effect. H/W supports up to cache line size 128.
1727 #define IXGBE_ALIGN 128
1730 * Maximum number of Ring Descriptors.
1732 * Since RDLEN/TDLEN should be multiple of 128 bytes, the number of ring
1733 * descriptors should meet the following condition:
1734 * (num_ring_desc * sizeof(rx/tx descriptor)) % 128 == 0
1736 #define IXGBE_MIN_RING_DESC 64
1737 #define IXGBE_MAX_RING_DESC 4096
1740 * Create memzone for HW rings. malloc can't be used as the physical address is
1741 * needed. If the memzone is already created, then this function returns a ptr
1744 static const struct rte_memzone *
1745 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1746 uint16_t queue_id, uint32_t ring_size, int socket_id)
1748 char z_name[RTE_MEMZONE_NAMESIZE];
1749 const struct rte_memzone *mz;
1751 rte_snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1752 dev->driver->pci_drv.name, ring_name,
1753 dev->data->port_id, queue_id);
1755 mz = rte_memzone_lookup(z_name);
1759 return rte_memzone_reserve_aligned(z_name, ring_size,
1760 socket_id, 0, IXGBE_ALIGN);
1764 ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1768 if (txq->sw_ring != NULL) {
1769 for (i = 0; i < txq->nb_tx_desc; i++) {
1770 if (txq->sw_ring[i].mbuf != NULL) {
1771 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1772 txq->sw_ring[i].mbuf = NULL;
1779 ixgbe_tx_queue_release(struct igb_tx_queue *txq)
1782 ixgbe_tx_queue_release_mbufs(txq);
1783 rte_free(txq->sw_ring);
1789 ixgbe_dev_tx_queue_release(void *txq)
1791 ixgbe_tx_queue_release(txq);
1794 /* (Re)set dynamic igb_tx_queue fields to defaults */
1796 ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
1798 struct igb_tx_entry *txe = txq->sw_ring;
1801 /* Zero out HW ring memory */
1802 for (i = 0; i < sizeof(union ixgbe_adv_tx_desc) * txq->nb_tx_desc; i++) {
1803 ((volatile char *)txq->tx_ring)[i] = 0;
1806 /* Initialize SW ring entries */
1807 prev = (uint16_t) (txq->nb_tx_desc - 1);
1808 for (i = 0; i < txq->nb_tx_desc; i++) {
1809 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1810 txd->wb.status = IXGBE_TXD_STAT_DD;
1813 txe[prev].next_id = i;
1817 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
1818 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1821 txq->nb_tx_used = 0;
1823 * Always allow 1 descriptor to be un-allocated to avoid
1824 * a H/W race condition
1826 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1827 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1829 memset((void*)&txq->ctx_cache, 0,
1830 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1834 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1837 unsigned int socket_id,
1838 const struct rte_eth_txconf *tx_conf)
1840 const struct rte_memzone *tz;
1841 struct igb_tx_queue *txq;
1842 struct ixgbe_hw *hw;
1843 uint16_t tx_rs_thresh, tx_free_thresh;
1845 PMD_INIT_FUNC_TRACE();
1846 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1849 * Validate number of transmit descriptors.
1850 * It must not exceed hardware maximum, and must be multiple
1853 if (((nb_desc * sizeof(union ixgbe_adv_tx_desc)) % IXGBE_ALIGN) != 0 ||
1854 (nb_desc > IXGBE_MAX_RING_DESC) ||
1855 (nb_desc < IXGBE_MIN_RING_DESC)) {
1860 * The following two parameters control the setting of the RS bit on
1861 * transmit descriptors.
1862 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
1863 * descriptors have been used.
1864 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
1865 * descriptors are used or if the number of descriptors required
1866 * to transmit a packet is greater than the number of free TX
1868 * The following constraints must be satisfied:
1869 * tx_rs_thresh must be greater than 0.
1870 * tx_rs_thresh must be less than the size of the ring minus 2.
1871 * tx_rs_thresh must be less than or equal to tx_free_thresh.
1872 * tx_rs_thresh must be a divisor of the ring size.
1873 * tx_free_thresh must be greater than 0.
1874 * tx_free_thresh must be less than the size of the ring minus 3.
1875 * One descriptor in the TX ring is used as a sentinel to avoid a
1876 * H/W race condition, hence the maximum threshold constraints.
1877 * When set to zero use default values.
1879 tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
1880 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
1881 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
1882 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
1883 if (tx_rs_thresh >= (nb_desc - 2)) {
1884 RTE_LOG(ERR, PMD, "tx_rs_thresh must be less than the number "
1885 "of TX descriptors minus 2. (tx_rs_thresh=%u port=%d "
1886 "queue=%d)\n", (unsigned int)tx_rs_thresh,
1887 (int)dev->data->port_id, (int)queue_idx);
1890 if (tx_free_thresh >= (nb_desc - 3)) {
1891 RTE_LOG(ERR, PMD, "tx_rs_thresh must be less than the "
1892 "tx_free_thresh must be less than the number of TX "
1893 "descriptors minus 3. (tx_free_thresh=%u port=%d "
1894 "queue=%d)\n", (unsigned int)tx_free_thresh,
1895 (int)dev->data->port_id, (int)queue_idx);
1898 if (tx_rs_thresh > tx_free_thresh) {
1899 RTE_LOG(ERR, PMD, "tx_rs_thresh must be less than or equal to "
1900 "tx_free_thresh. (tx_free_thresh=%u tx_rs_thresh=%u "
1901 "port=%d queue=%d)\n", (unsigned int)tx_free_thresh,
1902 (unsigned int)tx_rs_thresh, (int)dev->data->port_id,
1906 if ((nb_desc % tx_rs_thresh) != 0) {
1907 RTE_LOG(ERR, PMD, "tx_rs_thresh must be a divisor of the "
1908 "number of TX descriptors. (tx_rs_thresh=%u port=%d "
1909 "queue=%d)\n", (unsigned int)tx_rs_thresh,
1910 (int)dev->data->port_id, (int)queue_idx);
1915 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
1916 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
1917 * by the NIC and all descriptors are written back after the NIC
1918 * accumulates WTHRESH descriptors.
1920 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
1921 RTE_LOG(ERR, PMD, "TX WTHRESH must be set to 0 if "
1922 "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
1923 "port=%d queue=%d)\n", (unsigned int)tx_rs_thresh,
1924 (int)dev->data->port_id, (int)queue_idx);
1928 /* Free memory prior to re-allocation if needed... */
1929 if (dev->data->tx_queues[queue_idx] != NULL)
1930 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
1932 /* First allocate the tx queue data structure */
1933 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1939 * Allocate TX ring hardware descriptors. A memzone large enough to
1940 * handle the maximum ring size is allocated in order to allow for
1941 * resizing in later calls to the queue setup function.
1943 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1944 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
1947 ixgbe_tx_queue_release(txq);
1951 txq->nb_tx_desc = nb_desc;
1952 txq->tx_rs_thresh = tx_rs_thresh;
1953 txq->tx_free_thresh = tx_free_thresh;
1954 txq->pthresh = tx_conf->tx_thresh.pthresh;
1955 txq->hthresh = tx_conf->tx_thresh.hthresh;
1956 txq->wthresh = tx_conf->tx_thresh.wthresh;
1957 txq->queue_id = queue_idx;
1958 txq->port_id = dev->data->port_id;
1959 txq->txq_flags = tx_conf->txq_flags;
1962 * Modification to set VFTDT for virtual function if vf is detected
1964 if (hw->mac.type == ixgbe_mac_82599_vf)
1965 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
1967 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(queue_idx));
1969 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1970 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
1972 /* Allocate software ring */
1973 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1974 sizeof(struct igb_tx_entry) * nb_desc,
1976 if (txq->sw_ring == NULL) {
1977 ixgbe_tx_queue_release(txq);
1980 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1981 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1983 ixgbe_reset_tx_queue(txq);
1985 dev->data->tx_queues[queue_idx] = txq;
1987 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1988 if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) &&
1989 (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST))
1990 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
1992 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1998 ixgbe_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
2002 if (rxq->sw_ring != NULL) {
2003 for (i = 0; i < rxq->nb_rx_desc; i++) {
2004 if (rxq->sw_ring[i].mbuf != NULL) {
2005 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2006 rxq->sw_ring[i].mbuf = NULL;
2009 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2010 if (rxq->rx_nb_avail) {
2011 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2012 struct rte_mbuf *mb;
2013 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2014 rte_pktmbuf_free_seg(mb);
2016 rxq->rx_nb_avail = 0;
2023 ixgbe_rx_queue_release(struct igb_rx_queue *rxq)
2026 ixgbe_rx_queue_release_mbufs(rxq);
2027 rte_free(rxq->sw_ring);
2033 ixgbe_dev_rx_queue_release(void *rxq)
2035 ixgbe_rx_queue_release(rxq);
2039 * Check if Rx Burst Bulk Alloc function can be used.
2041 * 0: the preconditions are satisfied and the bulk allocation function
2043 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2044 * function must be used.
2047 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2048 check_rx_burst_bulk_alloc_preconditions(struct igb_rx_queue *rxq)
2050 check_rx_burst_bulk_alloc_preconditions(__rte_unused struct igb_rx_queue *rxq)
2056 * Make sure the following pre-conditions are satisfied:
2057 * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2058 * rxq->rx_free_thresh < rxq->nb_rx_desc
2059 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2060 * rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2061 * Scattered packets are not supported. This should be checked
2062 * outside of this function.
2064 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2065 if (! (rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST))
2067 else if (! (rxq->rx_free_thresh < rxq->nb_rx_desc))
2069 else if (! ((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0))
2071 else if (! (rxq->nb_rx_desc <
2072 (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST)))
2081 /* Reset dynamic igb_rx_queue fields back to defaults */
2083 ixgbe_reset_rx_queue(struct igb_rx_queue *rxq)
2089 * By default, the Rx queue setup function allocates enough memory for
2090 * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires
2091 * extra memory at the end of the descriptor ring to be zero'd out. A
2092 * pre-condition for using the Rx burst bulk alloc function is that the
2093 * number of descriptors is less than or equal to
2094 * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2095 * constraints here to see if we need to zero out memory after the end
2096 * of the H/W descriptor ring.
2098 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2099 if (check_rx_burst_bulk_alloc_preconditions(rxq) == 0)
2100 /* zero out extra memory */
2101 len = (uint16_t)(rxq->nb_rx_desc + RTE_PMD_IXGBE_RX_MAX_BURST);
2104 /* do not zero out extra memory */
2105 len = rxq->nb_rx_desc;
2108 * Zero out HW ring memory. Zero out extra memory at the end of
2109 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2110 * reads extra memory as zeros.
2112 for (i = 0; i < len * sizeof(union ixgbe_adv_rx_desc); i++) {
2113 ((volatile char *)rxq->rx_ring)[i] = 0;
2116 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2118 * initialize extra software ring entries. Space for these extra
2119 * entries is always allocated
2121 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2122 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST; ++i) {
2123 rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
2126 rxq->rx_nb_avail = 0;
2127 rxq->rx_next_avail = 0;
2128 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2129 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
2131 rxq->nb_rx_hold = 0;
2132 rxq->pkt_first_seg = NULL;
2133 rxq->pkt_last_seg = NULL;
2137 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2140 unsigned int socket_id,
2141 const struct rte_eth_rxconf *rx_conf,
2142 struct rte_mempool *mp)
2144 const struct rte_memzone *rz;
2145 struct igb_rx_queue *rxq;
2146 struct ixgbe_hw *hw;
2147 int use_def_burst_func = 1;
2150 PMD_INIT_FUNC_TRACE();
2151 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2154 * Validate number of receive descriptors.
2155 * It must not exceed hardware maximum, and must be multiple
2158 if (((nb_desc * sizeof(union ixgbe_adv_rx_desc)) % IXGBE_ALIGN) != 0 ||
2159 (nb_desc > IXGBE_MAX_RING_DESC) ||
2160 (nb_desc < IXGBE_MIN_RING_DESC)) {
2164 /* Free memory prior to re-allocation if needed... */
2165 if (dev->data->rx_queues[queue_idx] != NULL)
2166 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2168 /* First allocate the rx queue data structure */
2169 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
2174 rxq->nb_rx_desc = nb_desc;
2175 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2176 rxq->queue_id = queue_idx;
2177 rxq->port_id = dev->data->port_id;
2178 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2180 rxq->drop_en = rx_conf->rx_drop_en;
2183 * Allocate RX ring hardware descriptors. A memzone large enough to
2184 * handle the maximum ring size is allocated in order to allow for
2185 * resizing in later calls to the queue setup function.
2187 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
2188 IXGBE_MAX_RING_DESC * sizeof(union ixgbe_adv_rx_desc),
2191 ixgbe_rx_queue_release(rxq);
2195 * Modified to setup VFRDT for Virtual Function
2197 if (hw->mac.type == ixgbe_mac_82599_vf)
2198 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2200 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(queue_idx));
2202 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
2203 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2206 * Allocate software ring. Allow for space at the end of the
2207 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2208 * function does not access an invalid memory region.
2210 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2211 len = (uint16_t)(nb_desc + RTE_PMD_IXGBE_RX_MAX_BURST);
2215 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
2216 sizeof(struct igb_rx_entry) * len,
2218 if (rxq->sw_ring == NULL) {
2219 ixgbe_rx_queue_release(rxq);
2222 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
2223 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
2226 * Certain constaints must be met in order to use the bulk buffer
2227 * allocation Rx burst function.
2229 use_def_burst_func = check_rx_burst_bulk_alloc_preconditions(rxq);
2231 /* Check if pre-conditions are satisfied, and no Scattered Rx */
2232 if (!use_def_burst_func && !dev->data->scattered_rx) {
2233 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2234 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
2235 "satisfied. Rx Burst Bulk Alloc function will be "
2236 "used on port=%d, queue=%d.\n",
2237 rxq->port_id, rxq->queue_id);
2238 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
2241 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions "
2242 "are not satisfied, Scattered Rx is requested, "
2243 "or RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC is not "
2244 "enabled (port=%d, queue=%d).\n",
2245 rxq->port_id, rxq->queue_id);
2247 dev->data->rx_queues[queue_idx] = rxq;
2249 ixgbe_reset_rx_queue(rxq);
2255 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2259 PMD_INIT_FUNC_TRACE();
2261 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2262 struct igb_tx_queue *txq = dev->data->tx_queues[i];
2264 ixgbe_tx_queue_release_mbufs(txq);
2265 ixgbe_reset_tx_queue(txq);
2269 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2270 struct igb_rx_queue *rxq = dev->data->rx_queues[i];
2272 ixgbe_rx_queue_release_mbufs(rxq);
2273 ixgbe_reset_rx_queue(rxq);
2278 /*********************************************************************
2280 * Device RX/TX init functions
2282 **********************************************************************/
2285 * Receive Side Scaling (RSS)
2286 * See section 7.1.2.8 in the following document:
2287 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2290 * The source and destination IP addresses of the IP header and the source
2291 * and destination ports of TCP/UDP headers, if any, of received packets are
2292 * hashed against a configurable random key to compute a 32-bit RSS hash result.
2293 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2294 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
2295 * RSS output index which is used as the RX queue index where to store the
2297 * The following output is supplied in the RX write-back descriptor:
2298 * - 32-bit result of the Microsoft RSS hash function,
2299 * - 4-bit RSS type field.
2303 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2304 * Used as the default key.
2306 static uint8_t rss_intel_key[40] = {
2307 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2308 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2309 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2310 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2311 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2315 ixgbe_rss_disable(struct rte_eth_dev *dev)
2317 struct ixgbe_hw *hw;
2320 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2321 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
2322 mrqc &= ~IXGBE_MRQC_RSSEN;
2323 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2327 ixgbe_rss_configure(struct rte_eth_dev *dev)
2329 struct ixgbe_hw *hw;
2338 PMD_INIT_FUNC_TRACE();
2339 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2341 rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2342 if (rss_hf == 0) { /* Disable RSS */
2343 ixgbe_rss_disable(dev);
2346 hash_key = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
2347 if (hash_key == NULL)
2348 hash_key = rss_intel_key; /* Default hash key */
2350 /* Fill in RSS hash key */
2351 for (i = 0; i < 10; i++) {
2352 rss_key = hash_key[(i * 4)];
2353 rss_key |= hash_key[(i * 4) + 1] << 8;
2354 rss_key |= hash_key[(i * 4) + 2] << 16;
2355 rss_key |= hash_key[(i * 4) + 3] << 24;
2356 IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RSSRK(0), i, rss_key);
2359 /* Fill in redirection table */
2361 for (i = 0, j = 0; i < 128; i++, j++) {
2362 if (j == dev->data->nb_rx_queues) j = 0;
2363 reta = (reta << 8) | j;
2365 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), rte_bswap32(reta));
2368 /* Set configured hashing functions in MRQC register */
2369 mrqc = IXGBE_MRQC_RSSEN; /* RSS enable */
2370 if (rss_hf & ETH_RSS_IPV4)
2371 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2372 if (rss_hf & ETH_RSS_IPV4_TCP)
2373 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2374 if (rss_hf & ETH_RSS_IPV6)
2375 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2376 if (rss_hf & ETH_RSS_IPV6_EX)
2377 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2378 if (rss_hf & ETH_RSS_IPV6_TCP)
2379 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2380 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2381 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2382 if (rss_hf & ETH_RSS_IPV4_UDP)
2383 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2384 if (rss_hf & ETH_RSS_IPV6_UDP)
2385 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2386 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2387 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2388 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2391 #define NUM_VFTA_REGISTERS 128
2392 #define NIC_RX_BUFFER_SIZE 0x200
2395 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2397 struct rte_eth_vmdq_dcb_conf *cfg;
2398 struct ixgbe_hw *hw;
2399 enum rte_eth_nb_pools num_pools;
2400 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2402 uint8_t nb_tcs; /* number of traffic classes */
2405 PMD_INIT_FUNC_TRACE();
2406 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2407 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2408 num_pools = cfg->nb_queue_pools;
2409 /* Check we have a valid number of pools */
2410 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2411 ixgbe_rss_disable(dev);
2414 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2415 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2419 * split rx buffer up into sections, each for 1 traffic class
2421 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2422 for (i = 0 ; i < nb_tcs; i++) {
2423 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2424 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2425 /* clear 10 bits. */
2426 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2427 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2429 /* zero alloc all unused TCs */
2430 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2431 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2432 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2433 /* clear 10 bits. */
2434 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2437 /* MRQC: enable vmdq and dcb */
2438 mrqc = ((num_pools == ETH_16_POOLS) ? \
2439 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2440 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2442 /* PFVTCTL: turn on virtualisation and set the default pool */
2443 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2444 if (cfg->enable_default_pool) {
2445 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2447 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2449 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2451 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2453 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2455 * mapping is done with 3 bits per priority,
2456 * so shift by i*3 each time
2458 queue_mapping |= ((cfg->dcb_queue[i] & 0x07) << (i * 3));
2460 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2462 /* RTRPCS: DCB related */
2463 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2465 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2466 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2467 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2468 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2470 /* VFTA - enable all vlan filters */
2471 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2472 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2475 /* VFRE: pool enabling for receive - 16 or 32 */
2476 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2477 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2480 * MPSAR - allow pools to read specific mac addresses
2481 * In this case, all pools should be able to read from mac addr 0
2483 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2484 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2486 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2487 for (i = 0; i < cfg->nb_pool_maps; i++) {
2488 /* set vlan id in VF register and set the valid bit */
2489 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2490 (cfg->pool_map[i].vlan_id & 0xFFF)));
2492 * Put the allowed pools in VFB reg. As we only have 16 or 32
2493 * pools, we only need to use the first half of the register
2496 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2501 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2502 * @hw: pointer to hardware structure
2503 * @dcb_config: pointer to ixgbe_dcb_config structure
2506 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
2507 struct ixgbe_dcb_config *dcb_config)
2512 PMD_INIT_FUNC_TRACE();
2513 if (hw->mac.type != ixgbe_mac_82598EB) {
2514 /* Disable the Tx desc arbiter so that MTQC can be changed */
2515 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2516 reg |= IXGBE_RTTDCS_ARBDIS;
2517 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2519 /* Enable DCB for Tx with 8 TCs */
2520 if (dcb_config->num_tcs.pg_tcs == 8) {
2521 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
2524 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
2526 if (dcb_config->vt_mode)
2527 reg |= IXGBE_MTQC_VT_ENA;
2528 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
2530 /* Disable drop for all queues */
2531 for (q = 0; q < 128; q++)
2532 IXGBE_WRITE_REG(hw, IXGBE_QDE,
2533 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
2535 /* Enable the Tx desc arbiter */
2536 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2537 reg &= ~IXGBE_RTTDCS_ARBDIS;
2538 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2540 /* Enable Security TX Buffer IFG for DCB */
2541 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
2542 reg |= IXGBE_SECTX_DCB;
2543 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
2549 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
2550 * @dev: pointer to rte_eth_dev structure
2551 * @dcb_config: pointer to ixgbe_dcb_config structure
2554 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
2555 struct ixgbe_dcb_config *dcb_config)
2557 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2558 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2559 struct ixgbe_hw *hw =
2560 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2562 PMD_INIT_FUNC_TRACE();
2563 if (hw->mac.type != ixgbe_mac_82598EB)
2564 /*PF VF Transmit Enable*/
2565 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
2566 vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2568 /*Configure general DCB TX parameters*/
2569 ixgbe_dcb_tx_hw_config(hw,dcb_config);
2574 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
2575 struct ixgbe_dcb_config *dcb_config)
2577 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
2578 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2579 struct ixgbe_dcb_tc_config *tc;
2582 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
2583 if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
2584 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2585 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2588 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2589 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2591 /* User Priority to Traffic Class mapping */
2592 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2593 j = vmdq_rx_conf->dcb_queue[i];
2594 tc = &dcb_config->tc_config[j];
2595 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
2601 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
2602 struct ixgbe_dcb_config *dcb_config)
2604 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2605 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2606 struct ixgbe_dcb_tc_config *tc;
2609 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
2610 if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
2611 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2612 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2615 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2616 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2619 /* User Priority to Traffic Class mapping */
2620 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2621 j = vmdq_tx_conf->dcb_queue[i];
2622 tc = &dcb_config->tc_config[j];
2623 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
2630 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
2631 struct ixgbe_dcb_config *dcb_config)
2633 struct rte_eth_dcb_rx_conf *rx_conf =
2634 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
2635 struct ixgbe_dcb_tc_config *tc;
2638 dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
2639 dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
2641 /* User Priority to Traffic Class mapping */
2642 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2643 j = rx_conf->dcb_queue[i];
2644 tc = &dcb_config->tc_config[j];
2645 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
2651 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
2652 struct ixgbe_dcb_config *dcb_config)
2654 struct rte_eth_dcb_tx_conf *tx_conf =
2655 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
2656 struct ixgbe_dcb_tc_config *tc;
2659 dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
2660 dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
2662 /* User Priority to Traffic Class mapping */
2663 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2664 j = tx_conf->dcb_queue[i];
2665 tc = &dcb_config->tc_config[j];
2666 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
2672 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
2673 * @hw: pointer to hardware structure
2674 * @dcb_config: pointer to ixgbe_dcb_config structure
2677 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
2678 struct ixgbe_dcb_config *dcb_config)
2684 PMD_INIT_FUNC_TRACE();
2686 * Disable the arbiter before changing parameters
2687 * (always enable recycle mode; WSP)
2689 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
2690 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
2692 if (hw->mac.type != ixgbe_mac_82598EB) {
2693 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
2694 if (dcb_config->num_tcs.pg_tcs == 4) {
2695 if (dcb_config->vt_mode)
2696 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2697 IXGBE_MRQC_VMDQRT4TCEN;
2699 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
2700 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2704 if (dcb_config->num_tcs.pg_tcs == 8) {
2705 if (dcb_config->vt_mode)
2706 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2707 IXGBE_MRQC_VMDQRT8TCEN;
2709 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
2710 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2715 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
2718 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2719 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2720 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2721 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2723 /* VFTA - enable all vlan filters */
2724 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2725 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2729 * Configure Rx packet plane (recycle mode; WSP) and
2732 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
2733 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
2739 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
2740 uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
2742 switch (hw->mac.type) {
2743 case ixgbe_mac_82598EB:
2744 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
2746 case ixgbe_mac_82599EB:
2747 case ixgbe_mac_X540:
2748 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
2757 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
2758 uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
2760 switch (hw->mac.type) {
2761 case ixgbe_mac_82598EB:
2762 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
2763 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
2765 case ixgbe_mac_82599EB:
2766 case ixgbe_mac_X540:
2767 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
2768 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
2775 #define DCB_RX_CONFIG 1
2776 #define DCB_TX_CONFIG 1
2777 #define DCB_TX_PB 1024
2779 * ixgbe_dcb_hw_configure - Enable DCB and configure
2780 * general DCB in VT mode and non-VT mode parameters
2781 * @dev: pointer to rte_eth_dev structure
2782 * @dcb_config: pointer to ixgbe_dcb_config structure
2785 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
2786 struct ixgbe_dcb_config *dcb_config)
2789 uint8_t i,pfc_en,nb_tcs;
2791 uint8_t config_dcb_rx = 0;
2792 uint8_t config_dcb_tx = 0;
2793 uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2794 uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2795 uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2796 uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2797 uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2798 struct ixgbe_dcb_tc_config *tc;
2799 uint32_t max_frame = dev->data->max_frame_size;
2800 struct ixgbe_hw *hw =
2801 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2803 switch(dev->data->dev_conf.rxmode.mq_mode){
2805 dcb_config->vt_mode = true;
2806 if (hw->mac.type != ixgbe_mac_82598EB) {
2807 config_dcb_rx = DCB_RX_CONFIG;
2809 *get dcb and VT rx configuration parameters
2812 ixgbe_vmdq_dcb_rx_config(dev,dcb_config);
2813 /*Configure general VMDQ and DCB RX parameters*/
2814 ixgbe_vmdq_dcb_configure(dev);
2818 dcb_config->vt_mode = false;
2819 config_dcb_rx = DCB_RX_CONFIG;
2820 /* Get dcb TX configuration parameters from rte_eth_conf */
2821 ixgbe_dcb_rx_config(dev,dcb_config);
2822 /*Configure general DCB RX parameters*/
2823 ixgbe_dcb_rx_hw_config(hw, dcb_config);
2826 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration\n");
2829 switch (dev->data->dev_conf.txmode.mq_mode) {
2830 case ETH_VMDQ_DCB_TX:
2831 dcb_config->vt_mode = true;
2832 config_dcb_tx = DCB_TX_CONFIG;
2833 /* get DCB and VT TX configuration parameters from rte_eth_conf */
2834 ixgbe_dcb_vt_tx_config(dev,dcb_config);
2835 /*Configure general VMDQ and DCB TX parameters*/
2836 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
2840 dcb_config->vt_mode = false;
2841 config_dcb_tx = DCB_RX_CONFIG;
2842 /*get DCB TX configuration parameters from rte_eth_conf*/
2843 ixgbe_dcb_tx_config(dev,dcb_config);
2844 /*Configure general DCB TX parameters*/
2845 ixgbe_dcb_tx_hw_config(hw, dcb_config);
2848 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration\n");
2852 nb_tcs = dcb_config->num_tcs.pfc_tcs;
2854 ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
2855 if(nb_tcs == ETH_4_TCS) {
2856 /* Avoid un-configured priority mapping to TC0 */
2858 uint8_t mask = 0xFF;
2859 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
2860 mask = (uint8_t)(mask & (~ (1 << map[i])));
2861 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
2862 if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
2866 /* Re-configure 4 TCs BW */
2867 for (i = 0; i < nb_tcs; i++) {
2868 tc = &dcb_config->tc_config[i];
2869 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
2870 (uint8_t)(100 / nb_tcs);
2871 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
2872 (uint8_t)(100 / nb_tcs);
2874 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
2875 tc = &dcb_config->tc_config[i];
2876 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
2877 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
2882 /* Set RX buffer size */
2883 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2884 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
2885 for (i = 0 ; i < nb_tcs; i++) {
2886 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2888 /* zero alloc all unused TCs */
2889 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2890 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
2894 /* Only support an equally distributed Tx packet buffer strategy. */
2895 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
2896 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
2897 for (i = 0; i < nb_tcs; i++) {
2898 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
2899 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
2901 /* Clear unused TCs, if any, to zero buffer size*/
2902 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2903 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
2904 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
2908 /*Calculates traffic class credits*/
2909 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
2910 IXGBE_DCB_TX_CONFIG);
2911 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
2912 IXGBE_DCB_RX_CONFIG);
2915 /* Unpack CEE standard containers */
2916 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
2917 ixgbe_dcb_unpack_max_cee(dcb_config, max);
2918 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
2919 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
2920 /* Configure PG(ETS) RX */
2921 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
2925 /* Unpack CEE standard containers */
2926 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
2927 ixgbe_dcb_unpack_max_cee(dcb_config, max);
2928 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
2929 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
2930 /* Configure PG(ETS) TX */
2931 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
2934 /*Configure queue statistics registers*/
2935 ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
2937 /* Check if the PFC is supported */
2938 if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
2939 pbsize = (uint16_t) (NIC_RX_BUFFER_SIZE / nb_tcs);
2940 for (i = 0; i < nb_tcs; i++) {
2942 * If the TC count is 8,and the default high_water is 48,
2943 * the low_water is 16 as default.
2945 hw->fc.high_water[i] = (pbsize * 3 ) / 4;
2946 hw->fc.low_water[i] = pbsize / 4;
2947 /* Enable pfc for this TC */
2948 tc = &dcb_config->tc_config[i];
2949 tc->pfc = ixgbe_dcb_pfc_enabled;
2951 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
2952 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
2954 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
2961 * ixgbe_configure_dcb - Configure DCB Hardware
2962 * @dev: pointer to rte_eth_dev
2964 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
2966 struct ixgbe_dcb_config *dcb_cfg =
2967 IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
2969 PMD_INIT_FUNC_TRACE();
2970 /** Configure DCB hardware **/
2971 if(((dev->data->dev_conf.rxmode.mq_mode != ETH_RSS) &&
2972 (dev->data->nb_rx_queues == ETH_DCB_NUM_QUEUES))||
2973 ((dev->data->dev_conf.txmode.mq_mode != ETH_DCB_NONE) &&
2974 (dev->data->nb_tx_queues == ETH_DCB_NUM_QUEUES))) {
2975 ixgbe_dcb_hw_configure(dev,dcb_cfg);
2981 ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2983 struct igb_rx_entry *rxe = rxq->sw_ring;
2987 /* Initialize software ring entries */
2988 for (i = 0; i < rxq->nb_rx_desc; i++) {
2989 volatile union ixgbe_adv_rx_desc *rxd;
2990 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
2992 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u\n",
2993 (unsigned) rxq->queue_id);
2997 rte_mbuf_refcnt_set(mbuf, 1);
2998 mbuf->type = RTE_MBUF_PKT;
2999 mbuf->pkt.next = NULL;
3000 mbuf->pkt.data = (char *)mbuf->buf_addr + RTE_PKTMBUF_HEADROOM;
3001 mbuf->pkt.nb_segs = 1;
3002 mbuf->pkt.in_port = rxq->port_id;
3005 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
3006 rxd = &rxq->rx_ring[i];
3007 rxd->read.hdr_addr = dma_addr;
3008 rxd->read.pkt_addr = dma_addr;
3016 * Initializes Receive Unit.
3019 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
3021 struct ixgbe_hw *hw;
3022 struct igb_rx_queue *rxq;
3023 struct rte_pktmbuf_pool_private *mbp_priv;
3036 PMD_INIT_FUNC_TRACE();
3037 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3040 * Make sure receives are disabled while setting
3041 * up the RX context (registers, descriptor rings, etc.).
3043 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
3044 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
3046 /* Enable receipt of broadcasted frames */
3047 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
3048 fctrl |= IXGBE_FCTRL_BAM;
3049 fctrl |= IXGBE_FCTRL_DPF;
3050 fctrl |= IXGBE_FCTRL_PMCF;
3051 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
3054 * Configure CRC stripping, if any.
3056 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
3057 if (dev->data->dev_conf.rxmode.hw_strip_crc)
3058 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
3060 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
3063 * Configure jumbo frame support, if any.
3065 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
3066 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
3067 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
3068 maxfrs &= 0x0000FFFF;
3069 maxfrs |= (dev->data->dev_conf.rxmode.max_rx_pkt_len << 16);
3070 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
3072 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
3074 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
3076 /* Setup RX queues */
3077 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3078 rxq = dev->data->rx_queues[i];
3080 /* Allocate buffers for descriptor rings */
3081 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
3086 * Reset crc_len in case it was changed after queue setup by a
3087 * call to configure.
3089 rxq->crc_len = (uint8_t)
3090 ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
3093 /* Setup the Base and Length of the Rx Descriptor Rings */
3094 bus_addr = rxq->rx_ring_phys_addr;
3095 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
3096 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3097 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i),
3098 (uint32_t)(bus_addr >> 32));
3099 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
3100 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
3101 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
3102 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
3104 /* Configure the SRRCTL register */
3105 #ifdef RTE_HEADER_SPLIT_ENABLE
3107 * Configure Header Split
3109 if (dev->data->dev_conf.rxmode.header_split) {
3110 if (hw->mac.type == ixgbe_mac_82599EB) {
3111 /* Must setup the PSRTYPE register */
3113 psrtype = IXGBE_PSRTYPE_TCPHDR |
3114 IXGBE_PSRTYPE_UDPHDR |
3115 IXGBE_PSRTYPE_IPV4HDR |
3116 IXGBE_PSRTYPE_IPV6HDR;
3117 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(i), psrtype);
3119 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
3120 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
3121 IXGBE_SRRCTL_BSIZEHDR_MASK);
3122 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3125 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
3127 /* Set if packets are dropped when no descriptors available */
3129 srrctl |= IXGBE_SRRCTL_DROP_EN;
3132 * Configure the RX buffer size in the BSIZEPACKET field of
3133 * the SRRCTL register of the queue.
3134 * The value is in 1 KB resolution. Valid values can be from
3137 mbp_priv = (struct rte_pktmbuf_pool_private *)
3138 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
3139 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
3140 RTE_PKTMBUF_HEADROOM);
3141 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
3142 IXGBE_SRRCTL_BSIZEPKT_MASK);
3143 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3145 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
3146 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
3147 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
3148 IXGBE_RX_BUF_THRESHOLD > buf_size){
3149 dev->data->scattered_rx = 1;
3150 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
3155 * Configure RSS if device configured with multiple RX queues.
3157 if (hw->mac.type == ixgbe_mac_82599EB) {
3158 if (dev->data->nb_rx_queues > 1)
3159 switch (dev->data->dev_conf.rxmode.mq_mode) {
3161 ixgbe_rss_configure(dev);
3165 ixgbe_vmdq_dcb_configure(dev);
3168 default: ixgbe_rss_disable(dev);
3171 ixgbe_rss_disable(dev);
3175 * Setup the Checksum Register.
3176 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
3177 * Enable IP/L4 checkum computation by hardware if requested to do so.
3179 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
3180 rxcsum |= IXGBE_RXCSUM_PCSD;
3181 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
3182 rxcsum |= IXGBE_RXCSUM_IPPCSE;
3184 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
3186 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
3188 if (hw->mac.type == ixgbe_mac_82599EB) {
3189 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3190 if (dev->data->dev_conf.rxmode.hw_strip_crc)
3191 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3193 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
3194 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3195 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3202 * Initializes Transmit Unit.
3205 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
3207 struct ixgbe_hw *hw;
3208 struct igb_tx_queue *txq;
3215 PMD_INIT_FUNC_TRACE();
3216 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3218 /* Enable TX CRC (checksum offload requirement) */
3219 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
3220 hlreg0 |= IXGBE_HLREG0_TXCRCEN;
3221 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
3223 /* Setup the Base and Length of the Tx Descriptor Rings */
3224 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3225 txq = dev->data->tx_queues[i];
3227 bus_addr = txq->tx_ring_phys_addr;
3228 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3229 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3230 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i),
3231 (uint32_t)(bus_addr >> 32));
3232 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3233 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3234 /* Setup the HW Tx Head and TX Tail descriptor pointers */
3235 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3236 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3239 * Disable Tx Head Writeback RO bit, since this hoses
3240 * bookkeeping if things aren't delivered in order.
3242 switch (hw->mac.type) {
3243 case ixgbe_mac_82598EB:
3244 txctrl = IXGBE_READ_REG(hw,
3245 IXGBE_DCA_TXCTRL(i));
3246 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3247 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i),
3251 case ixgbe_mac_82599EB:
3252 case ixgbe_mac_X540:
3254 txctrl = IXGBE_READ_REG(hw,
3255 IXGBE_DCA_TXCTRL_82599(i));
3256 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3257 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i),
3263 if (hw->mac.type != ixgbe_mac_82598EB) {
3264 /* disable arbiter before setting MTQC */
3265 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3266 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3267 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3269 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3271 /* re-enable arbiter */
3272 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3273 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3278 * Start Transmit and Receive Units.
3281 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
3283 struct ixgbe_hw *hw;
3284 struct igb_tx_queue *txq;
3285 struct igb_rx_queue *rxq;
3293 PMD_INIT_FUNC_TRACE();
3294 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3296 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3297 txq = dev->data->tx_queues[i];
3298 /* Setup Transmit Threshold Registers */
3299 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3300 txdctl |= txq->pthresh & 0x7F;
3301 txdctl |= ((txq->hthresh & 0x7F) << 8);
3302 txdctl |= ((txq->wthresh & 0x7F) << 16);
3303 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
3306 if (hw->mac.type != ixgbe_mac_82598EB) {
3307 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3308 dmatxctl |= IXGBE_DMATXCTL_TE;
3309 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3312 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3313 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3314 txdctl |= IXGBE_TXDCTL_ENABLE;
3315 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
3317 /* Wait until TX Enable ready */
3318 if (hw->mac.type == ixgbe_mac_82599EB) {
3322 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3323 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
3325 PMD_INIT_LOG(ERR, "Could not enable "
3326 "Tx Queue %d\n", i);
3329 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3330 rxq = dev->data->rx_queues[i];
3331 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3332 rxdctl |= IXGBE_RXDCTL_ENABLE;
3333 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
3335 /* Wait until RX Enable ready */
3339 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3340 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
3342 PMD_INIT_LOG(ERR, "Could not enable "
3343 "Rx Queue %d\n", i);
3345 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), rxq->nb_rx_desc - 1);
3348 /* Enable Receive engine */
3349 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
3350 if (hw->mac.type == ixgbe_mac_82598EB)
3351 rxctrl |= IXGBE_RXCTRL_DMBYPS;
3352 rxctrl |= IXGBE_RXCTRL_RXEN;
3353 hw->mac.ops.enable_rx_dma(hw, rxctrl);
3358 * [VF] Initializes Receive Unit.
3361 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
3363 struct ixgbe_hw *hw;
3364 struct igb_rx_queue *rxq;
3365 struct rte_pktmbuf_pool_private *mbp_priv;
3372 PMD_INIT_FUNC_TRACE();
3373 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3375 /* Setup RX queues */
3376 dev->rx_pkt_burst = ixgbe_recv_pkts;
3377 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3378 rxq = dev->data->rx_queues[i];
3380 /* Allocate buffers for descriptor rings */
3381 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
3385 /* Setup the Base and Length of the Rx Descriptor Rings */
3386 bus_addr = rxq->rx_ring_phys_addr;
3388 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
3389 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3390 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
3391 (uint32_t)(bus_addr >> 32));
3392 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
3393 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
3394 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
3395 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
3398 /* Configure the SRRCTL register */
3399 #ifdef RTE_HEADER_SPLIT_ENABLE
3401 * Configure Header Split
3403 if (dev->data->dev_conf.rxmode.header_split) {
3405 /* Must setup the PSRTYPE register */
3407 psrtype = IXGBE_PSRTYPE_TCPHDR |
3408 IXGBE_PSRTYPE_UDPHDR |
3409 IXGBE_PSRTYPE_IPV4HDR |
3410 IXGBE_PSRTYPE_IPV6HDR;
3412 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
3414 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
3415 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
3416 IXGBE_SRRCTL_BSIZEHDR_MASK);
3417 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3420 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
3422 /* Set if packets are dropped when no descriptors available */
3424 srrctl |= IXGBE_SRRCTL_DROP_EN;
3427 * Configure the RX buffer size in the BSIZEPACKET field of
3428 * the SRRCTL register of the queue.
3429 * The value is in 1 KB resolution. Valid values can be from
3432 mbp_priv = (struct rte_pktmbuf_pool_private *)
3433 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
3434 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
3435 RTE_PKTMBUF_HEADROOM);
3436 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
3437 IXGBE_SRRCTL_BSIZEPKT_MASK);
3440 * VF modification to write virtual function SRRCTL register
3442 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
3444 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
3445 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
3446 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
3447 dev->data->scattered_rx = 1;
3448 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
3456 * [VF] Initializes Transmit Unit.
3459 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
3461 struct ixgbe_hw *hw;
3462 struct igb_tx_queue *txq;
3467 PMD_INIT_FUNC_TRACE();
3468 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3470 /* Setup the Base and Length of the Tx Descriptor Rings */
3471 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3472 txq = dev->data->tx_queues[i];
3473 bus_addr = txq->tx_ring_phys_addr;
3474 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
3475 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3476 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
3477 (uint32_t)(bus_addr >> 32));
3478 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
3479 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3480 /* Setup the HW Tx Head and TX Tail descriptor pointers */
3481 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
3482 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
3485 * Disable Tx Head Writeback RO bit, since this hoses
3486 * bookkeeping if things aren't delivered in order.
3488 txctrl = IXGBE_READ_REG(hw,
3489 IXGBE_VFDCA_TXCTRL(i));
3490 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3491 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
3497 * [VF] Start Transmit and Receive Units.
3500 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
3502 struct ixgbe_hw *hw;
3503 struct igb_tx_queue *txq;
3504 struct igb_rx_queue *rxq;
3510 PMD_INIT_FUNC_TRACE();
3511 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3513 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3514 txq = dev->data->tx_queues[i];
3515 /* Setup Transmit Threshold Registers */
3516 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3517 txdctl |= txq->pthresh & 0x7F;
3518 txdctl |= ((txq->hthresh & 0x7F) << 8);
3519 txdctl |= ((txq->wthresh & 0x7F) << 16);
3520 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
3523 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3525 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3526 txdctl |= IXGBE_TXDCTL_ENABLE;
3527 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
3530 /* Wait until TX Enable ready */
3533 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3534 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
3536 PMD_INIT_LOG(ERR, "Could not enable "
3537 "Tx Queue %d\n", i);
3539 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3541 rxq = dev->data->rx_queues[i];
3543 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
3544 rxdctl |= IXGBE_RXDCTL_ENABLE;
3545 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
3547 /* Wait until RX Enable ready */
3551 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
3552 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
3554 PMD_INIT_LOG(ERR, "Could not enable "
3555 "Rx Queue %d\n", i);
3557 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);