4 * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
47 #include <rte_byteorder.h>
48 #include <rte_common.h>
49 #include <rte_cycles.h>
51 #include <rte_debug.h>
52 #include <rte_interrupts.h>
54 #include <rte_memory.h>
55 #include <rte_memzone.h>
56 #include <rte_launch.h>
57 #include <rte_tailq.h>
59 #include <rte_per_lcore.h>
60 #include <rte_lcore.h>
61 #include <rte_atomic.h>
62 #include <rte_branch_prediction.h>
64 #include <rte_mempool.h>
65 #include <rte_malloc.h>
67 #include <rte_ether.h>
68 #include <rte_ethdev.h>
69 #include <rte_prefetch.h>
73 #include <rte_string_fns.h>
74 #include <rte_errno.h>
76 #include "ixgbe_logs.h"
77 #include "ixgbe/ixgbe_api.h"
78 #include "ixgbe/ixgbe_vf.h"
79 #include "ixgbe_ethdev.h"
80 #include "ixgbe/ixgbe_dcb.h"
83 #define RTE_PMD_IXGBE_TX_MAX_BURST 32
85 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
86 #define RTE_PMD_IXGBE_RX_MAX_BURST 32
89 static inline struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
94 m = __rte_mbuf_raw_alloc(mp);
95 __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
99 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
100 (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->pkt.data) - \
101 (char *)(mb)->buf_addr))
103 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
104 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
107 * Structure associated with each descriptor of the RX ring of a RX queue.
109 struct igb_rx_entry {
110 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
114 * Structure associated with each descriptor of the TX ring of a TX queue.
116 struct igb_tx_entry {
117 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
118 uint16_t next_id; /**< Index of next descriptor in ring. */
119 uint16_t last_id; /**< Index of last scattered descriptor. */
123 * Structure associated with each RX queue.
125 struct igb_rx_queue {
126 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
127 volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
128 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
129 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
130 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
131 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
132 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
133 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
134 uint16_t nb_rx_desc; /**< number of RX descriptors. */
135 uint16_t rx_tail; /**< current value of RDT register. */
136 uint16_t nb_rx_hold; /**< number of held free RX desc. */
137 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
138 uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
139 uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
140 uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
142 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
143 uint16_t queue_id; /**< RX queue index. */
144 uint8_t port_id; /**< Device port identifier. */
145 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
146 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
147 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
148 /** need to alloc dummy mbuf, for wraparound when scanning hw ring */
149 struct rte_mbuf fake_mbuf;
150 /** hold packets to return to application */
151 struct rte_mbuf *rx_stage[RTE_PMD_IXGBE_RX_MAX_BURST*2];
156 * IXGBE CTX Constants
158 enum ixgbe_advctx_num {
159 IXGBE_CTX_0 = 0, /**< CTX0 */
160 IXGBE_CTX_1 = 1, /**< CTX1 */
161 IXGBE_CTX_NUM = 2, /**< CTX NUMBER */
165 * Structure to check if new context need be built
168 struct ixgbe_advctx_info {
169 uint16_t flags; /**< ol_flags for context build. */
170 uint32_t cmp_mask; /**< compare mask for vlan_macip_lens */
171 union rte_vlan_macip vlan_macip_lens; /**< vlan, mac ip length. */
175 * Structure associated with each TX queue.
177 struct igb_tx_queue {
178 /** TX ring virtual address. */
179 volatile union ixgbe_adv_tx_desc *tx_ring;
180 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
181 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
182 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
183 uint16_t nb_tx_desc; /**< number of TX descriptors. */
184 uint16_t tx_tail; /**< current value of TDT reg. */
185 uint16_t tx_free_thresh;/**< minimum TX before freeing. */
186 /** Number of TX descriptors to use before RS bit is set. */
187 uint16_t tx_rs_thresh;
188 /** Number of TX descriptors used since RS bit was set. */
190 /** Index to last TX descriptor to have been cleaned. */
191 uint16_t last_desc_cleaned;
192 /** Total number of TX descriptors ready to be allocated. */
194 uint16_t tx_next_dd; /**< next desc to scan for DD bit */
195 uint16_t tx_next_rs; /**< next desc to set RS bit */
196 uint16_t queue_id; /**< TX queue index. */
197 uint8_t port_id; /**< Device port identifier. */
198 uint8_t pthresh; /**< Prefetch threshold register. */
199 uint8_t hthresh; /**< Host threshold register. */
200 uint8_t wthresh; /**< Write-back threshold reg. */
201 uint32_t txq_flags; /**< Holds flags for this TXq */
202 uint32_t ctx_curr; /**< Hardware context states. */
203 /** Hardware context0 history. */
204 struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];
209 #define RTE_PMD_USE_PREFETCH
212 #ifdef RTE_PMD_USE_PREFETCH
214 * Prefetch a cache line into all cache levels.
216 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
218 #define rte_ixgbe_prefetch(p) do {} while(0)
221 #ifdef RTE_PMD_PACKET_PREFETCH
222 #define rte_packet_prefetch(p) rte_prefetch1(p)
224 #define rte_packet_prefetch(p) do {} while(0)
227 /*********************************************************************
231 **********************************************************************/
234 * The "simple" TX queue functions require that the following
235 * flags are set when the TX queue is configured:
236 * - ETH_TXQ_FLAGS_NOMULTSEGS
237 * - ETH_TXQ_FLAGS_NOVLANOFFL
238 * - ETH_TXQ_FLAGS_NOXSUMSCTP
239 * - ETH_TXQ_FLAGS_NOXSUMUDP
240 * - ETH_TXQ_FLAGS_NOXSUMTCP
241 * and that the RS bit threshold (tx_rs_thresh) is at least equal to
242 * RTE_PMD_IXGBE_TX_MAX_BURST.
244 #define IXGBE_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
245 ETH_TXQ_FLAGS_NOOFFLOADS)
248 * Check for descriptors with their DD bit set and free mbufs.
249 * Return the total number of buffers freed.
251 static inline int __attribute__((always_inline))
252 ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
254 struct igb_tx_entry *txep;
258 /* check DD bit on threshold descriptor */
259 status = txq->tx_ring[txq->tx_next_dd].wb.status;
260 if (! (status & IXGBE_ADVTXD_STAT_DD))
264 * first buffer to free from S/W ring is at index
265 * tx_next_dd - (tx_rs_thresh-1)
267 txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
269 /* prefetch the mbufs that are about to be freed */
270 for (i = 0; i < txq->tx_rs_thresh; ++i)
271 rte_prefetch0((txep + i)->mbuf);
273 /* free buffers one at a time */
274 if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
275 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
276 rte_mempool_put(txep->mbuf->pool, txep->mbuf);
280 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
281 rte_pktmbuf_free_seg(txep->mbuf);
286 /* buffers were freed, update counters */
287 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
288 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
289 if (txq->tx_next_dd >= txq->nb_tx_desc)
290 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
292 return txq->tx_rs_thresh;
296 * Populate descriptors with the following info:
297 * 1.) buffer_addr = phys_addr + headroom
298 * 2.) cmd_type_len = DCMD_DTYP_FLAGS | pkt_len
299 * 3.) olinfo_status = pkt_len << PAYLEN_SHIFT
302 /* Defines for Tx descriptor */
303 #define DCMD_DTYP_FLAGS (IXGBE_ADVTXD_DTYP_DATA |\
304 IXGBE_ADVTXD_DCMD_IFCS |\
305 IXGBE_ADVTXD_DCMD_DEXT |\
306 IXGBE_ADVTXD_DCMD_EOP)
308 /* Populate 4 descriptors with data from 4 mbufs */
310 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
312 uint64_t buf_dma_addr;
316 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
317 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
318 pkt_len = (*pkts)->pkt.data_len;
320 /* write data to descriptor */
321 txdp->read.buffer_addr = buf_dma_addr;
322 txdp->read.cmd_type_len =
323 ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
324 txdp->read.olinfo_status =
325 (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
329 /* Populate 1 descriptor with data from 1 mbuf */
331 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
333 uint64_t buf_dma_addr;
336 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
337 pkt_len = (*pkts)->pkt.data_len;
339 /* write data to descriptor */
340 txdp->read.buffer_addr = buf_dma_addr;
341 txdp->read.cmd_type_len =
342 ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
343 txdp->read.olinfo_status =
344 (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
348 * Fill H/W descriptor ring with mbuf data.
349 * Copy mbuf pointers to the S/W ring.
352 ixgbe_tx_fill_hw_ring(struct igb_tx_queue *txq, struct rte_mbuf **pkts,
355 volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
356 struct igb_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
357 const int N_PER_LOOP = 4;
358 const int N_PER_LOOP_MASK = N_PER_LOOP-1;
359 int mainpart, leftover;
363 * Process most of the packets in chunks of N pkts. Any
364 * leftover packets will get processed one at a time.
366 mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
367 leftover = (nb_pkts & ((uint32_t) N_PER_LOOP_MASK));
368 for (i = 0; i < mainpart; i += N_PER_LOOP) {
369 /* Copy N mbuf pointers to the S/W ring */
370 for (j = 0; j < N_PER_LOOP; ++j) {
371 (txep + i + j)->mbuf = *(pkts + i + j);
373 tx4(txdp + i, pkts + i);
376 if (unlikely(leftover > 0)) {
377 for (i = 0; i < leftover; ++i) {
378 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
379 tx1(txdp + mainpart + i, pkts + mainpart + i);
384 static inline uint16_t
385 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
388 struct igb_tx_queue *txq = (struct igb_tx_queue *)tx_queue;
389 volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
393 * Begin scanning the H/W ring for done descriptors when the
394 * number of available descriptors drops below tx_free_thresh. For
395 * each done descriptor, free the associated buffer.
397 if (txq->nb_tx_free < txq->tx_free_thresh)
398 ixgbe_tx_free_bufs(txq);
400 /* Only use descriptors that are available */
401 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
402 if (unlikely(nb_pkts == 0))
405 /* Use exactly nb_pkts descriptors */
406 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
409 * At this point, we know there are enough descriptors in the
410 * ring to transmit all the packets. This assumes that each
411 * mbuf contains a single segment, and that no new offloads
412 * are expected, which would require a new context descriptor.
416 * See if we're going to wrap-around. If so, handle the top
417 * of the descriptor ring first, then do the bottom. If not,
418 * the processing looks just like the "bottom" part anyway...
420 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
421 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
422 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
425 * We know that the last descriptor in the ring will need to
426 * have its RS bit set because tx_rs_thresh has to be
427 * a divisor of the ring size
429 tx_r[txq->tx_next_rs].read.cmd_type_len |=
430 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
431 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
436 /* Fill H/W descriptor ring with mbuf data */
437 ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
438 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
441 * Determine if RS bit should be set
442 * This is what we actually want:
443 * if ((txq->tx_tail - 1) >= txq->tx_next_rs)
444 * but instead of subtracting 1 and doing >=, we can just do
445 * greater than without subtracting.
447 if (txq->tx_tail > txq->tx_next_rs) {
448 tx_r[txq->tx_next_rs].read.cmd_type_len |=
449 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
450 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
452 if (txq->tx_next_rs >= txq->nb_tx_desc)
453 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
457 * Check for wrap-around. This would only happen if we used
458 * up to the last descriptor in the ring, no more, no less.
460 if (txq->tx_tail >= txq->nb_tx_desc)
463 /* update tail pointer */
465 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
471 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
476 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
477 if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
478 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
480 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
484 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
485 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
486 nb_tx = (uint16_t)(nb_tx + ret);
487 nb_pkts = (uint16_t)(nb_pkts - ret);
496 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
497 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
498 uint16_t ol_flags, uint32_t vlan_macip_lens)
500 uint32_t type_tucmd_mlhl;
501 uint32_t mss_l4len_idx;
505 ctx_idx = txq->ctx_curr;
509 if (ol_flags & PKT_TX_VLAN_PKT) {
510 cmp_mask |= TX_VLAN_CMP_MASK;
513 if (ol_flags & PKT_TX_IP_CKSUM) {
514 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
515 cmp_mask |= TX_MAC_LEN_CMP_MASK;
518 /* Specify which HW CTX to upload. */
519 mss_l4len_idx = (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
520 switch (ol_flags & PKT_TX_L4_MASK) {
521 case PKT_TX_UDP_CKSUM:
522 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
523 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
524 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
525 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
527 case PKT_TX_TCP_CKSUM:
528 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
529 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
530 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
531 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
533 case PKT_TX_SCTP_CKSUM:
534 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
535 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
536 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
537 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
540 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
541 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
545 txq->ctx_cache[ctx_idx].flags = ol_flags;
546 txq->ctx_cache[ctx_idx].cmp_mask = cmp_mask;
547 txq->ctx_cache[ctx_idx].vlan_macip_lens.data =
548 vlan_macip_lens & cmp_mask;
550 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
551 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
552 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
553 ctx_txd->seqnum_seed = 0;
557 * Check which hardware context can be used. Use the existing match
558 * or create a new context descriptor.
560 static inline uint32_t
561 what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
562 uint32_t vlan_macip_lens)
564 /* If match with the current used context */
565 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
566 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
567 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
568 return txq->ctx_curr;
571 /* What if match with the next context */
573 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
574 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
575 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
576 return txq->ctx_curr;
579 /* Mismatch, use the previous context */
580 return (IXGBE_CTX_NUM);
583 static inline uint32_t
584 tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
586 static const uint32_t l4_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_TXSM};
587 static const uint32_t l3_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_IXSM};
590 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
591 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
595 static inline uint32_t
596 tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
598 static const uint32_t vlan_cmd[2] = {0, IXGBE_ADVTXD_DCMD_VLE};
599 return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
602 /* Default RS bit threshold values */
603 #ifndef DEFAULT_TX_RS_THRESH
604 #define DEFAULT_TX_RS_THRESH 32
606 #ifndef DEFAULT_TX_FREE_THRESH
607 #define DEFAULT_TX_FREE_THRESH 32
610 /* Reset transmit descriptors after they have been used */
612 ixgbe_xmit_cleanup(struct igb_tx_queue *txq)
614 struct igb_tx_entry *sw_ring = txq->sw_ring;
615 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
616 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
617 uint16_t nb_tx_desc = txq->nb_tx_desc;
618 uint16_t desc_to_clean_to;
619 uint16_t nb_tx_to_clean;
621 /* Determine the last descriptor needing to be cleaned */
622 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
623 if (desc_to_clean_to >= nb_tx_desc)
624 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
626 /* Check to make sure the last descriptor to clean is done */
627 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
628 if (! (txr[desc_to_clean_to].wb.status & IXGBE_TXD_STAT_DD))
630 PMD_TX_FREE_LOG(DEBUG,
631 "TX descriptor %4u is not done"
632 "(port=%d queue=%d)",
634 txq->port_id, txq->queue_id);
635 /* Failed to clean any descriptors, better luck next time */
639 /* Figure out how many descriptors will be cleaned */
640 if (last_desc_cleaned > desc_to_clean_to)
641 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
644 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
647 PMD_TX_FREE_LOG(DEBUG,
648 "Cleaning %4u TX descriptors: %4u to %4u "
649 "(port=%d queue=%d)",
650 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
651 txq->port_id, txq->queue_id);
654 * The last descriptor to clean is done, so that means all the
655 * descriptors from the last descriptor that was cleaned
656 * up to the last descriptor with the RS bit set
657 * are done. Only reset the threshold descriptor.
659 txr[desc_to_clean_to].wb.status = 0;
661 /* Update the txq to reflect the last descriptor that was cleaned */
662 txq->last_desc_cleaned = desc_to_clean_to;
663 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
670 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
673 struct igb_tx_queue *txq;
674 struct igb_tx_entry *sw_ring;
675 struct igb_tx_entry *txe, *txn;
676 volatile union ixgbe_adv_tx_desc *txr;
677 volatile union ixgbe_adv_tx_desc *txd;
678 struct rte_mbuf *tx_pkt;
679 struct rte_mbuf *m_seg;
680 uint64_t buf_dma_addr;
681 uint32_t olinfo_status;
682 uint32_t cmd_type_len;
691 uint32_t vlan_macip_lens;
696 sw_ring = txq->sw_ring;
698 tx_id = txq->tx_tail;
699 txe = &sw_ring[tx_id];
701 /* Determine if the descriptor ring needs to be cleaned. */
702 if ((txq->nb_tx_desc - txq->nb_tx_free) > txq->tx_free_thresh) {
703 ixgbe_xmit_cleanup(txq);
707 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
710 pkt_len = tx_pkt->pkt.pkt_len;
712 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
715 * Determine how many (if any) context descriptors
716 * are needed for offload functionality.
718 ol_flags = tx_pkt->ol_flags;
719 vlan_macip_lens = tx_pkt->pkt.vlan_macip.data;
721 /* If hardware offload required */
722 tx_ol_req = (uint16_t)(ol_flags & PKT_TX_OFFLOAD_MASK);
724 /* If new context need be built or reuse the exist ctx. */
725 ctx = what_advctx_update(txq, tx_ol_req,
727 /* Only allocate context descriptor if required*/
728 new_ctx = (ctx == IXGBE_CTX_NUM);
733 * Keep track of how many descriptors are used this loop
734 * This will always be the number of segments + the number of
735 * Context descriptors required to transmit the packet
737 nb_used = (uint16_t)(tx_pkt->pkt.nb_segs + new_ctx);
740 * The number of descriptors that must be allocated for a
741 * packet is the number of segments of that packet, plus 1
742 * Context Descriptor for the hardware offload, if any.
743 * Determine the last TX descriptor to allocate in the TX ring
744 * for the packet, starting from the current position (tx_id)
747 tx_last = (uint16_t) (tx_id + nb_used - 1);
750 if (tx_last >= txq->nb_tx_desc)
751 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
753 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
754 " tx_first=%u tx_last=%u\n",
755 (unsigned) txq->port_id,
756 (unsigned) txq->queue_id,
762 * Make sure there are enough TX descriptors available to
763 * transmit the entire packet.
764 * nb_used better be less than or equal to txq->tx_rs_thresh
766 if (nb_used > txq->nb_tx_free) {
767 PMD_TX_FREE_LOG(DEBUG,
768 "Not enough free TX descriptors "
769 "nb_used=%4u nb_free=%4u "
770 "(port=%d queue=%d)",
771 nb_used, txq->nb_tx_free,
772 txq->port_id, txq->queue_id);
774 if (ixgbe_xmit_cleanup(txq) != 0) {
775 /* Could not clean any descriptors */
781 /* nb_used better be <= txq->tx_rs_thresh */
782 if (unlikely(nb_used > txq->tx_rs_thresh)) {
783 PMD_TX_FREE_LOG(DEBUG,
784 "The number of descriptors needed to "
785 "transmit the packet exceeds the "
786 "RS bit threshold. This will impact "
788 "nb_used=%4u nb_free=%4u "
790 "(port=%d queue=%d)",
791 nb_used, txq->nb_tx_free,
793 txq->port_id, txq->queue_id);
795 * Loop here until there are enough TX
796 * descriptors or until the ring cannot be
799 while (nb_used > txq->nb_tx_free) {
800 if (ixgbe_xmit_cleanup(txq) != 0) {
802 * Could not clean any
814 * By now there are enough free TX descriptors to transmit
819 * Set common flags of all TX Data Descriptors.
821 * The following bits must be set in all Data Descriptors:
822 * - IXGBE_ADVTXD_DTYP_DATA
823 * - IXGBE_ADVTXD_DCMD_DEXT
825 * The following bits must be set in the first Data Descriptor
826 * and are ignored in the other ones:
827 * - IXGBE_ADVTXD_DCMD_IFCS
828 * - IXGBE_ADVTXD_MAC_1588
829 * - IXGBE_ADVTXD_DCMD_VLE
831 * The following bits must only be set in the last Data
833 * - IXGBE_TXD_CMD_EOP
835 * The following bits can be set in any Data Descriptor, but
836 * are only set in the last Data Descriptor:
839 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
840 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
841 olinfo_status = (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
842 #ifdef RTE_LIBRTE_IEEE1588
843 if (ol_flags & PKT_TX_IEEE1588_TMST)
844 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
849 * Setup the TX Advanced Context Descriptor if required
852 volatile struct ixgbe_adv_tx_context_desc *
855 ctx_txd = (volatile struct
856 ixgbe_adv_tx_context_desc *)
859 txn = &sw_ring[txe->next_id];
860 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
862 if (txe->mbuf != NULL) {
863 rte_pktmbuf_free_seg(txe->mbuf);
867 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
870 txe->last_id = tx_last;
871 tx_id = txe->next_id;
876 * Setup the TX Advanced Data Descriptor,
877 * This path will go through
878 * whatever new/reuse the context descriptor
880 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
881 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
882 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
888 txn = &sw_ring[txe->next_id];
890 if (txe->mbuf != NULL)
891 rte_pktmbuf_free_seg(txe->mbuf);
895 * Set up Transmit Data Descriptor.
897 slen = m_seg->pkt.data_len;
898 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
899 txd->read.buffer_addr =
900 rte_cpu_to_le_64(buf_dma_addr);
901 txd->read.cmd_type_len =
902 rte_cpu_to_le_32(cmd_type_len | slen);
903 txd->read.olinfo_status =
904 rte_cpu_to_le_32(olinfo_status);
905 txe->last_id = tx_last;
906 tx_id = txe->next_id;
908 m_seg = m_seg->pkt.next;
909 } while (m_seg != NULL);
912 * The last packet data descriptor needs End Of Packet (EOP)
914 cmd_type_len |= IXGBE_TXD_CMD_EOP;
915 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
916 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
918 /* Set RS bit only on threshold packets' last descriptor */
919 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
920 PMD_TX_FREE_LOG(DEBUG,
921 "Setting RS bit on TXD id="
922 "%4u (port=%d queue=%d)",
923 tx_last, txq->port_id, txq->queue_id);
925 cmd_type_len |= IXGBE_TXD_CMD_RS;
927 /* Update txq RS bit counters */
930 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
936 * Set the Transmit Descriptor Tail (TDT)
938 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
939 (unsigned) txq->port_id, (unsigned) txq->queue_id,
940 (unsigned) tx_id, (unsigned) nb_tx);
941 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
942 txq->tx_tail = tx_id;
947 /*********************************************************************
951 **********************************************************************/
952 static inline uint16_t
953 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
957 static uint16_t ip_pkt_types_map[16] = {
958 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
959 PKT_RX_IPV6_HDR, 0, 0, 0,
960 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
961 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
964 static uint16_t ip_rss_types_map[16] = {
965 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
966 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
967 PKT_RX_RSS_HASH, 0, 0, 0,
968 0, 0, 0, PKT_RX_FDIR,
971 #ifdef RTE_LIBRTE_IEEE1588
972 static uint32_t ip_pkt_etqf_map[8] = {
973 0, 0, 0, PKT_RX_IEEE1588_PTP,
977 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ?
978 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
979 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
981 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ? 0 :
982 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
985 return (uint16_t)(pkt_flags | ip_rss_types_map[hl_tp_rs & 0xF]);
988 static inline uint16_t
989 rx_desc_status_to_pkt_flags(uint32_t rx_status)
994 * Check if VLAN present only.
995 * Do not check whether L3/L4 rx checksum done by NIC or not,
996 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
998 pkt_flags = (uint16_t)((rx_status & IXGBE_RXD_STAT_VP) ?
999 PKT_RX_VLAN_PKT : 0);
1001 #ifdef RTE_LIBRTE_IEEE1588
1002 if (rx_status & IXGBE_RXD_STAT_TMST)
1003 pkt_flags = (uint16_t)(pkt_flags | PKT_RX_IEEE1588_TMST);
1008 static inline uint16_t
1009 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1012 * Bit 31: IPE, IPv4 checksum error
1013 * Bit 30: L4I, L4I integrity error
1015 static uint16_t error_to_pkt_flags_map[4] = {
1016 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1017 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1019 return error_to_pkt_flags_map[(rx_status >>
1020 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1023 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
1025 * LOOK_AHEAD defines how many desc statuses to check beyond the
1026 * current descriptor.
1027 * It must be a pound define for optimal performance.
1028 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1029 * function only works with LOOK_AHEAD=8.
1031 #define LOOK_AHEAD 8
1032 #if (LOOK_AHEAD != 8)
1033 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1036 ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq)
1038 volatile union ixgbe_adv_rx_desc *rxdp;
1039 struct igb_rx_entry *rxep;
1040 struct rte_mbuf *mb;
1042 int s[LOOK_AHEAD], nb_dd;
1043 int i, j, nb_rx = 0;
1046 /* get references to current descriptor and S/W ring entry */
1047 rxdp = &rxq->rx_ring[rxq->rx_tail];
1048 rxep = &rxq->sw_ring[rxq->rx_tail];
1050 /* check to make sure there is at least 1 packet to receive */
1051 if (! (rxdp->wb.upper.status_error & IXGBE_RXDADV_STAT_DD))
1055 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1056 * reference packets that are ready to be received.
1058 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1059 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1061 /* Read desc statuses backwards to avoid race condition */
1062 for (j = LOOK_AHEAD-1; j >= 0; --j)
1063 s[j] = rxdp[j].wb.upper.status_error;
1065 /* Clear everything but the status bits (LSB) */
1066 for (j = 0; j < LOOK_AHEAD; ++j)
1067 s[j] &= IXGBE_RXDADV_STAT_DD;
1069 /* Compute how many status bits were set */
1070 nb_dd = s[0]+s[1]+s[2]+s[3]+s[4]+s[5]+s[6]+s[7];
1073 /* Translate descriptor info to mbuf format */
1074 for (j = 0; j < nb_dd; ++j) {
1076 pkt_len = (uint16_t)(rxdp[j].wb.upper.length -
1078 mb->pkt.data_len = pkt_len;
1079 mb->pkt.pkt_len = pkt_len;
1080 mb->pkt.vlan_macip.f.vlan_tci = rxdp[j].wb.upper.vlan;
1081 mb->pkt.hash.rss = rxdp[j].wb.lower.hi_dword.rss;
1083 /* convert descriptor fields to rte mbuf flags */
1084 mb->ol_flags = rx_desc_hlen_type_rss_to_pkt_flags(
1085 rxdp[j].wb.lower.lo_dword.data);
1086 /* reuse status field from scan list */
1087 mb->ol_flags = (uint16_t)(mb->ol_flags |
1088 rx_desc_status_to_pkt_flags(s[j]));
1089 mb->ol_flags = (uint16_t)(mb->ol_flags |
1090 rx_desc_error_to_pkt_flags(s[j]));
1093 /* Move mbuf pointers from the S/W ring to the stage */
1094 for (j = 0; j < LOOK_AHEAD; ++j) {
1095 rxq->rx_stage[i + j] = rxep[j].mbuf;
1098 /* stop if all requested packets could not be received */
1099 if (nb_dd != LOOK_AHEAD)
1103 /* clear software ring entries so we can cleanup correctly */
1104 for (i = 0; i < nb_rx; ++i) {
1105 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1113 ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
1115 volatile union ixgbe_adv_rx_desc *rxdp;
1116 struct igb_rx_entry *rxep;
1117 struct rte_mbuf *mb;
1122 /* allocate buffers in bulk directly into the S/W ring */
1123 alloc_idx = (uint16_t)(rxq->rx_free_trigger -
1124 (rxq->rx_free_thresh - 1));
1125 rxep = &rxq->sw_ring[alloc_idx];
1126 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1127 rxq->rx_free_thresh);
1128 if (unlikely(diag != 0))
1131 rxdp = &rxq->rx_ring[alloc_idx];
1132 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1133 /* populate the static rte mbuf fields */
1135 rte_mbuf_refcnt_set(mb, 1);
1136 mb->type = RTE_MBUF_PKT;
1137 mb->pkt.next = NULL;
1138 mb->pkt.data = (char *)mb->buf_addr + RTE_PKTMBUF_HEADROOM;
1139 mb->pkt.nb_segs = 1;
1140 mb->pkt.in_port = rxq->port_id;
1142 /* populate the descriptors */
1143 dma_addr = (uint64_t)mb->buf_physaddr + RTE_PKTMBUF_HEADROOM;
1144 rxdp[i].read.hdr_addr = dma_addr;
1145 rxdp[i].read.pkt_addr = dma_addr;
1148 /* update tail pointer */
1150 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rxq->rx_free_trigger);
1152 /* update state of internal queue structure */
1153 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_trigger +
1154 rxq->rx_free_thresh);
1155 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1156 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
1162 static inline uint16_t
1163 ixgbe_rx_fill_from_stage(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1166 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1169 /* how many packets are ready to return? */
1170 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1172 /* copy mbuf pointers to the application's packet list */
1173 for (i = 0; i < nb_pkts; ++i)
1174 rx_pkts[i] = stage[i];
1176 /* update internal queue state */
1177 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1178 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1183 static inline uint16_t
1184 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1187 struct igb_rx_queue *rxq = (struct igb_rx_queue *)rx_queue;
1190 /* Any previously recv'd pkts will be returned from the Rx stage */
1191 if (rxq->rx_nb_avail)
1192 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1194 /* Scan the H/W ring for packets to receive */
1195 nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1197 /* update internal queue state */
1198 rxq->rx_next_avail = 0;
1199 rxq->rx_nb_avail = nb_rx;
1200 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1202 /* if required, allocate new buffers to replenish descriptors */
1203 if (rxq->rx_tail > rxq->rx_free_trigger) {
1204 if (ixgbe_rx_alloc_bufs(rxq) != 0) {
1206 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1207 "queue_id=%u\n", (unsigned) rxq->port_id,
1208 (unsigned) rxq->queue_id);
1210 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1211 rxq->rx_free_thresh;
1214 * Need to rewind any previous receives if we cannot
1215 * allocate new buffers to replenish the old ones.
1217 rxq->rx_nb_avail = 0;
1218 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1219 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1220 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1226 if (rxq->rx_tail >= rxq->nb_rx_desc)
1229 /* received any packets this loop? */
1230 if (rxq->rx_nb_avail)
1231 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1236 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1238 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1243 if (unlikely(nb_pkts == 0))
1246 if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1247 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1249 /* request is relatively large, chunk it up */
1253 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1254 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1255 nb_rx = (uint16_t)(nb_rx + ret);
1256 nb_pkts = (uint16_t)(nb_pkts - ret);
1263 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
1266 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1269 struct igb_rx_queue *rxq;
1270 volatile union ixgbe_adv_rx_desc *rx_ring;
1271 volatile union ixgbe_adv_rx_desc *rxdp;
1272 struct igb_rx_entry *sw_ring;
1273 struct igb_rx_entry *rxe;
1274 struct rte_mbuf *rxm;
1275 struct rte_mbuf *nmb;
1276 union ixgbe_adv_rx_desc rxd;
1279 uint32_t hlen_type_rss;
1289 rx_id = rxq->rx_tail;
1290 rx_ring = rxq->rx_ring;
1291 sw_ring = rxq->sw_ring;
1292 while (nb_rx < nb_pkts) {
1294 * The order of operations here is important as the DD status
1295 * bit must not be read after any other descriptor fields.
1296 * rx_ring and rxdp are pointing to volatile data so the order
1297 * of accesses cannot be reordered by the compiler. If they were
1298 * not volatile, they could be reordered which could lead to
1299 * using invalid descriptor fields when read from rxd.
1301 rxdp = &rx_ring[rx_id];
1302 staterr = rxdp->wb.upper.status_error;
1303 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1310 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1311 * is likely to be invalid and to be dropped by the various
1312 * validation checks performed by the network stack.
1314 * Allocate a new mbuf to replenish the RX ring descriptor.
1315 * If the allocation fails:
1316 * - arrange for that RX descriptor to be the first one
1317 * being parsed the next time the receive function is
1318 * invoked [on the same queue].
1320 * - Stop parsing the RX ring and return immediately.
1322 * This policy do not drop the packet received in the RX
1323 * descriptor for which the allocation of a new mbuf failed.
1324 * Thus, it allows that packet to be later retrieved if
1325 * mbuf have been freed in the mean time.
1326 * As a side effect, holding RX descriptors instead of
1327 * systematically giving them back to the NIC may lead to
1328 * RX ring exhaustion situations.
1329 * However, the NIC can gracefully prevent such situations
1330 * to happen by sending specific "back-pressure" flow control
1331 * frames to its peer(s).
1333 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1334 "ext_err_stat=0x%08x pkt_len=%u\n",
1335 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1336 (unsigned) rx_id, (unsigned) staterr,
1337 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1339 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1341 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1342 "queue_id=%u\n", (unsigned) rxq->port_id,
1343 (unsigned) rxq->queue_id);
1344 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1349 rxe = &sw_ring[rx_id];
1351 if (rx_id == rxq->nb_rx_desc)
1354 /* Prefetch next mbuf while processing current one. */
1355 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1358 * When next RX descriptor is on a cache-line boundary,
1359 * prefetch the next 4 RX descriptors and the next 8 pointers
1362 if ((rx_id & 0x3) == 0) {
1363 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1364 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1370 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1371 rxdp->read.hdr_addr = dma_addr;
1372 rxdp->read.pkt_addr = dma_addr;
1375 * Initialize the returned mbuf.
1376 * 1) setup generic mbuf fields:
1377 * - number of segments,
1380 * - RX port identifier.
1381 * 2) integrate hardware offload data, if any:
1382 * - RSS flag & hash,
1383 * - IP checksum flag,
1384 * - VLAN TCI, if any,
1387 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1389 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1390 rte_packet_prefetch(rxm->pkt.data);
1391 rxm->pkt.nb_segs = 1;
1392 rxm->pkt.next = NULL;
1393 rxm->pkt.pkt_len = pkt_len;
1394 rxm->pkt.data_len = pkt_len;
1395 rxm->pkt.in_port = rxq->port_id;
1397 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1398 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1399 rxm->pkt.vlan_macip.f.vlan_tci =
1400 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1402 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1403 pkt_flags = (uint16_t)(pkt_flags |
1404 rx_desc_status_to_pkt_flags(staterr));
1405 pkt_flags = (uint16_t)(pkt_flags |
1406 rx_desc_error_to_pkt_flags(staterr));
1407 rxm->ol_flags = pkt_flags;
1409 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1410 rxm->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1411 else if (pkt_flags & PKT_RX_FDIR) {
1412 rxm->pkt.hash.fdir.hash =
1413 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1414 & IXGBE_ATR_HASH_MASK);
1415 rxm->pkt.hash.fdir.id = rxd.wb.lower.hi_dword.csum_ip.ip_id;
1418 * Store the mbuf address into the next entry of the array
1419 * of returned packets.
1421 rx_pkts[nb_rx++] = rxm;
1423 rxq->rx_tail = rx_id;
1426 * If the number of free RX descriptors is greater than the RX free
1427 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1429 * Update the RDT with the value of the last processed RX descriptor
1430 * minus 1, to guarantee that the RDT register is never equal to the
1431 * RDH register, which creates a "full" ring situtation from the
1432 * hardware point of view...
1434 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1435 if (nb_hold > rxq->rx_free_thresh) {
1436 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1437 "nb_hold=%u nb_rx=%u\n",
1438 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1439 (unsigned) rx_id, (unsigned) nb_hold,
1441 rx_id = (uint16_t) ((rx_id == 0) ?
1442 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1443 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1446 rxq->nb_rx_hold = nb_hold;
1451 ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1454 struct igb_rx_queue *rxq;
1455 volatile union ixgbe_adv_rx_desc *rx_ring;
1456 volatile union ixgbe_adv_rx_desc *rxdp;
1457 struct igb_rx_entry *sw_ring;
1458 struct igb_rx_entry *rxe;
1459 struct rte_mbuf *first_seg;
1460 struct rte_mbuf *last_seg;
1461 struct rte_mbuf *rxm;
1462 struct rte_mbuf *nmb;
1463 union ixgbe_adv_rx_desc rxd;
1464 uint64_t dma; /* Physical address of mbuf data buffer */
1466 uint32_t hlen_type_rss;
1476 rx_id = rxq->rx_tail;
1477 rx_ring = rxq->rx_ring;
1478 sw_ring = rxq->sw_ring;
1481 * Retrieve RX context of current packet, if any.
1483 first_seg = rxq->pkt_first_seg;
1484 last_seg = rxq->pkt_last_seg;
1486 while (nb_rx < nb_pkts) {
1489 * The order of operations here is important as the DD status
1490 * bit must not be read after any other descriptor fields.
1491 * rx_ring and rxdp are pointing to volatile data so the order
1492 * of accesses cannot be reordered by the compiler. If they were
1493 * not volatile, they could be reordered which could lead to
1494 * using invalid descriptor fields when read from rxd.
1496 rxdp = &rx_ring[rx_id];
1497 staterr = rxdp->wb.upper.status_error;
1498 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1505 * Allocate a new mbuf to replenish the RX ring descriptor.
1506 * If the allocation fails:
1507 * - arrange for that RX descriptor to be the first one
1508 * being parsed the next time the receive function is
1509 * invoked [on the same queue].
1511 * - Stop parsing the RX ring and return immediately.
1513 * This policy does not drop the packet received in the RX
1514 * descriptor for which the allocation of a new mbuf failed.
1515 * Thus, it allows that packet to be later retrieved if
1516 * mbuf have been freed in the mean time.
1517 * As a side effect, holding RX descriptors instead of
1518 * systematically giving them back to the NIC may lead to
1519 * RX ring exhaustion situations.
1520 * However, the NIC can gracefully prevent such situations
1521 * to happen by sending specific "back-pressure" flow control
1522 * frames to its peer(s).
1524 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
1525 "staterr=0x%x data_len=%u\n",
1526 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1527 (unsigned) rx_id, (unsigned) staterr,
1528 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1530 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1532 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1533 "queue_id=%u\n", (unsigned) rxq->port_id,
1534 (unsigned) rxq->queue_id);
1535 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1540 rxe = &sw_ring[rx_id];
1542 if (rx_id == rxq->nb_rx_desc)
1545 /* Prefetch next mbuf while processing current one. */
1546 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1549 * When next RX descriptor is on a cache-line boundary,
1550 * prefetch the next 4 RX descriptors and the next 8 pointers
1553 if ((rx_id & 0x3) == 0) {
1554 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1555 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1559 * Update RX descriptor with the physical address of the new
1560 * data buffer of the new allocated mbuf.
1564 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1565 rxdp->read.hdr_addr = dma;
1566 rxdp->read.pkt_addr = dma;
1569 * Set data length & data buffer address of mbuf.
1571 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1572 rxm->pkt.data_len = data_len;
1573 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1576 * If this is the first buffer of the received packet,
1577 * set the pointer to the first mbuf of the packet and
1578 * initialize its context.
1579 * Otherwise, update the total length and the number of segments
1580 * of the current scattered packet, and update the pointer to
1581 * the last mbuf of the current packet.
1583 if (first_seg == NULL) {
1585 first_seg->pkt.pkt_len = data_len;
1586 first_seg->pkt.nb_segs = 1;
1588 first_seg->pkt.pkt_len = (uint16_t)(first_seg->pkt.pkt_len
1590 first_seg->pkt.nb_segs++;
1591 last_seg->pkt.next = rxm;
1595 * If this is not the last buffer of the received packet,
1596 * update the pointer to the last mbuf of the current scattered
1597 * packet and continue to parse the RX ring.
1599 if (! (staterr & IXGBE_RXDADV_STAT_EOP)) {
1605 * This is the last buffer of the received packet.
1606 * If the CRC is not stripped by the hardware:
1607 * - Subtract the CRC length from the total packet length.
1608 * - If the last buffer only contains the whole CRC or a part
1609 * of it, free the mbuf associated to the last buffer.
1610 * If part of the CRC is also contained in the previous
1611 * mbuf, subtract the length of that CRC part from the
1612 * data length of the previous mbuf.
1614 rxm->pkt.next = NULL;
1615 if (unlikely(rxq->crc_len > 0)) {
1616 first_seg->pkt.pkt_len -= ETHER_CRC_LEN;
1617 if (data_len <= ETHER_CRC_LEN) {
1618 rte_pktmbuf_free_seg(rxm);
1619 first_seg->pkt.nb_segs--;
1620 last_seg->pkt.data_len = (uint16_t)
1621 (last_seg->pkt.data_len -
1622 (ETHER_CRC_LEN - data_len));
1623 last_seg->pkt.next = NULL;
1626 (uint16_t) (data_len - ETHER_CRC_LEN);
1630 * Initialize the first mbuf of the returned packet:
1631 * - RX port identifier,
1632 * - hardware offload data, if any:
1633 * - RSS flag & hash,
1634 * - IP checksum flag,
1635 * - VLAN TCI, if any,
1638 first_seg->pkt.in_port = rxq->port_id;
1641 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1642 * set in the pkt_flags field.
1644 first_seg->pkt.vlan_macip.f.vlan_tci =
1645 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1646 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1647 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1648 pkt_flags = (uint16_t)(pkt_flags |
1649 rx_desc_status_to_pkt_flags(staterr));
1650 pkt_flags = (uint16_t)(pkt_flags |
1651 rx_desc_error_to_pkt_flags(staterr));
1652 first_seg->ol_flags = pkt_flags;
1654 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1655 first_seg->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1656 else if (pkt_flags & PKT_RX_FDIR) {
1657 first_seg->pkt.hash.fdir.hash =
1658 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1659 & IXGBE_ATR_HASH_MASK);
1660 first_seg->pkt.hash.fdir.id =
1661 rxd.wb.lower.hi_dword.csum_ip.ip_id;
1664 /* Prefetch data of first segment, if configured to do so. */
1665 rte_packet_prefetch(first_seg->pkt.data);
1668 * Store the mbuf address into the next entry of the array
1669 * of returned packets.
1671 rx_pkts[nb_rx++] = first_seg;
1674 * Setup receipt context for a new packet.
1680 * Record index of the next RX descriptor to probe.
1682 rxq->rx_tail = rx_id;
1685 * Save receive context.
1687 rxq->pkt_first_seg = first_seg;
1688 rxq->pkt_last_seg = last_seg;
1691 * If the number of free RX descriptors is greater than the RX free
1692 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1694 * Update the RDT with the value of the last processed RX descriptor
1695 * minus 1, to guarantee that the RDT register is never equal to the
1696 * RDH register, which creates a "full" ring situtation from the
1697 * hardware point of view...
1699 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1700 if (nb_hold > rxq->rx_free_thresh) {
1701 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1702 "nb_hold=%u nb_rx=%u\n",
1703 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1704 (unsigned) rx_id, (unsigned) nb_hold,
1706 rx_id = (uint16_t) ((rx_id == 0) ?
1707 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1708 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1711 rxq->nb_rx_hold = nb_hold;
1715 /*********************************************************************
1717 * Queue management functions
1719 **********************************************************************/
1722 * Rings setup and release.
1724 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1725 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
1726 * also optimize cache line size effect. H/W supports up to cache line size 128.
1728 #define IXGBE_ALIGN 128
1731 * Maximum number of Ring Descriptors.
1733 * Since RDLEN/TDLEN should be multiple of 128 bytes, the number of ring
1734 * descriptors should meet the following condition:
1735 * (num_ring_desc * sizeof(rx/tx descriptor)) % 128 == 0
1737 #define IXGBE_MIN_RING_DESC 64
1738 #define IXGBE_MAX_RING_DESC 4096
1741 * Create memzone for HW rings. malloc can't be used as the physical address is
1742 * needed. If the memzone is already created, then this function returns a ptr
1745 static const struct rte_memzone *
1746 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1747 uint16_t queue_id, uint32_t ring_size, int socket_id)
1749 char z_name[RTE_MEMZONE_NAMESIZE];
1750 const struct rte_memzone *mz;
1752 rte_snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1753 dev->driver->pci_drv.name, ring_name,
1754 dev->data->port_id, queue_id);
1756 mz = rte_memzone_lookup(z_name);
1760 return rte_memzone_reserve_aligned(z_name, ring_size,
1761 socket_id, 0, IXGBE_ALIGN);
1765 ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1769 if (txq->sw_ring != NULL) {
1770 for (i = 0; i < txq->nb_tx_desc; i++) {
1771 if (txq->sw_ring[i].mbuf != NULL) {
1772 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1773 txq->sw_ring[i].mbuf = NULL;
1780 ixgbe_tx_queue_release(struct igb_tx_queue *txq)
1783 ixgbe_tx_queue_release_mbufs(txq);
1784 rte_free(txq->sw_ring);
1790 ixgbe_dev_tx_queue_release(void *txq)
1792 ixgbe_tx_queue_release(txq);
1795 /* (Re)set dynamic igb_tx_queue fields to defaults */
1797 ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
1799 struct igb_tx_entry *txe = txq->sw_ring;
1802 /* Zero out HW ring memory */
1803 for (i = 0; i < sizeof(union ixgbe_adv_tx_desc) * txq->nb_tx_desc; i++) {
1804 ((volatile char *)txq->tx_ring)[i] = 0;
1807 /* Initialize SW ring entries */
1808 prev = (uint16_t) (txq->nb_tx_desc - 1);
1809 for (i = 0; i < txq->nb_tx_desc; i++) {
1810 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1811 txd->wb.status = IXGBE_TXD_STAT_DD;
1814 txe[prev].next_id = i;
1818 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
1819 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1822 txq->nb_tx_used = 0;
1824 * Always allow 1 descriptor to be un-allocated to avoid
1825 * a H/W race condition
1827 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1828 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1830 memset((void*)&txq->ctx_cache, 0,
1831 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1835 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1838 unsigned int socket_id,
1839 const struct rte_eth_txconf *tx_conf)
1841 const struct rte_memzone *tz;
1842 struct igb_tx_queue *txq;
1843 struct ixgbe_hw *hw;
1844 uint16_t tx_rs_thresh, tx_free_thresh;
1846 PMD_INIT_FUNC_TRACE();
1847 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1850 * Validate number of transmit descriptors.
1851 * It must not exceed hardware maximum, and must be multiple
1854 if (((nb_desc * sizeof(union ixgbe_adv_tx_desc)) % IXGBE_ALIGN) != 0 ||
1855 (nb_desc > IXGBE_MAX_RING_DESC) ||
1856 (nb_desc < IXGBE_MIN_RING_DESC)) {
1861 * The following two parameters control the setting of the RS bit on
1862 * transmit descriptors.
1863 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
1864 * descriptors have been used.
1865 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
1866 * descriptors are used or if the number of descriptors required
1867 * to transmit a packet is greater than the number of free TX
1869 * The following constraints must be satisfied:
1870 * tx_rs_thresh must be greater than 0.
1871 * tx_rs_thresh must be less than the size of the ring minus 2.
1872 * tx_rs_thresh must be less than or equal to tx_free_thresh.
1873 * tx_rs_thresh must be a divisor of the ring size.
1874 * tx_free_thresh must be greater than 0.
1875 * tx_free_thresh must be less than the size of the ring minus 3.
1876 * One descriptor in the TX ring is used as a sentinel to avoid a
1877 * H/W race condition, hence the maximum threshold constraints.
1878 * When set to zero use default values.
1880 tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
1881 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
1882 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
1883 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
1884 if (tx_rs_thresh >= (nb_desc - 2)) {
1885 RTE_LOG(ERR, PMD, "tx_rs_thresh must be less than the number "
1886 "of TX descriptors minus 2. (tx_rs_thresh=%u port=%d "
1887 "queue=%d)\n", (unsigned int)tx_rs_thresh,
1888 (int)dev->data->port_id, (int)queue_idx);
1891 if (tx_free_thresh >= (nb_desc - 3)) {
1892 RTE_LOG(ERR, PMD, "tx_rs_thresh must be less than the "
1893 "tx_free_thresh must be less than the number of TX "
1894 "descriptors minus 3. (tx_free_thresh=%u port=%d "
1895 "queue=%d)\n", (unsigned int)tx_free_thresh,
1896 (int)dev->data->port_id, (int)queue_idx);
1899 if (tx_rs_thresh > tx_free_thresh) {
1900 RTE_LOG(ERR, PMD, "tx_rs_thresh must be less than or equal to "
1901 "tx_free_thresh. (tx_free_thresh=%u tx_rs_thresh=%u "
1902 "port=%d queue=%d)\n", (unsigned int)tx_free_thresh,
1903 (unsigned int)tx_rs_thresh, (int)dev->data->port_id,
1907 if ((nb_desc % tx_rs_thresh) != 0) {
1908 RTE_LOG(ERR, PMD, "tx_rs_thresh must be a divisor of the "
1909 "number of TX descriptors. (tx_rs_thresh=%u port=%d "
1910 "queue=%d)\n", (unsigned int)tx_rs_thresh,
1911 (int)dev->data->port_id, (int)queue_idx);
1916 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
1917 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
1918 * by the NIC and all descriptors are written back after the NIC
1919 * accumulates WTHRESH descriptors.
1921 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
1922 RTE_LOG(ERR, PMD, "TX WTHRESH must be set to 0 if "
1923 "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
1924 "port=%d queue=%d)\n", (unsigned int)tx_rs_thresh,
1925 (int)dev->data->port_id, (int)queue_idx);
1929 /* Free memory prior to re-allocation if needed... */
1930 if (dev->data->tx_queues[queue_idx] != NULL)
1931 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
1933 /* First allocate the tx queue data structure */
1934 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct igb_tx_queue),
1935 CACHE_LINE_SIZE, socket_id);
1940 * Allocate TX ring hardware descriptors. A memzone large enough to
1941 * handle the maximum ring size is allocated in order to allow for
1942 * resizing in later calls to the queue setup function.
1944 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1945 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
1948 ixgbe_tx_queue_release(txq);
1952 txq->nb_tx_desc = nb_desc;
1953 txq->tx_rs_thresh = tx_rs_thresh;
1954 txq->tx_free_thresh = tx_free_thresh;
1955 txq->pthresh = tx_conf->tx_thresh.pthresh;
1956 txq->hthresh = tx_conf->tx_thresh.hthresh;
1957 txq->wthresh = tx_conf->tx_thresh.wthresh;
1958 txq->queue_id = queue_idx;
1959 txq->port_id = dev->data->port_id;
1960 txq->txq_flags = tx_conf->txq_flags;
1963 * Modification to set VFTDT for virtual function if vf is detected
1965 if (hw->mac.type == ixgbe_mac_82599_vf)
1966 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
1968 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(queue_idx));
1970 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1971 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
1973 /* Allocate software ring */
1974 txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
1975 sizeof(struct igb_tx_entry) * nb_desc,
1976 CACHE_LINE_SIZE, socket_id);
1977 if (txq->sw_ring == NULL) {
1978 ixgbe_tx_queue_release(txq);
1981 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1982 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1984 ixgbe_reset_tx_queue(txq);
1986 dev->data->tx_queues[queue_idx] = txq;
1988 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1989 if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) &&
1990 (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
1991 PMD_INIT_LOG(INFO, "Using simple tx code path\n");
1992 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
1994 PMD_INIT_LOG(INFO, "Using full-featured tx code path\n");
1995 PMD_INIT_LOG(INFO, " - txq_flags = %lx [IXGBE_SIMPLE_FLAGS=%lx]\n", (long unsigned)txq->txq_flags, (long unsigned)IXGBE_SIMPLE_FLAGS);
1996 PMD_INIT_LOG(INFO, " - tx_rs_thresh = %lu [RTE_PMD_IXGBE_TX_MAX_BURST=%lu]\n", (long unsigned)txq->tx_rs_thresh, (long unsigned)RTE_PMD_IXGBE_TX_MAX_BURST);
1997 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2004 ixgbe_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
2008 if (rxq->sw_ring != NULL) {
2009 for (i = 0; i < rxq->nb_rx_desc; i++) {
2010 if (rxq->sw_ring[i].mbuf != NULL) {
2011 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2012 rxq->sw_ring[i].mbuf = NULL;
2015 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2016 if (rxq->rx_nb_avail) {
2017 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2018 struct rte_mbuf *mb;
2019 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2020 rte_pktmbuf_free_seg(mb);
2022 rxq->rx_nb_avail = 0;
2029 ixgbe_rx_queue_release(struct igb_rx_queue *rxq)
2032 ixgbe_rx_queue_release_mbufs(rxq);
2033 rte_free(rxq->sw_ring);
2039 ixgbe_dev_rx_queue_release(void *rxq)
2041 ixgbe_rx_queue_release(rxq);
2045 * Check if Rx Burst Bulk Alloc function can be used.
2047 * 0: the preconditions are satisfied and the bulk allocation function
2049 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2050 * function must be used.
2053 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2054 check_rx_burst_bulk_alloc_preconditions(struct igb_rx_queue *rxq)
2056 check_rx_burst_bulk_alloc_preconditions(__rte_unused struct igb_rx_queue *rxq)
2062 * Make sure the following pre-conditions are satisfied:
2063 * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2064 * rxq->rx_free_thresh < rxq->nb_rx_desc
2065 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2066 * rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2067 * Scattered packets are not supported. This should be checked
2068 * outside of this function.
2070 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2071 if (! (rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST))
2073 else if (! (rxq->rx_free_thresh < rxq->nb_rx_desc))
2075 else if (! ((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0))
2077 else if (! (rxq->nb_rx_desc <
2078 (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST)))
2087 /* Reset dynamic igb_rx_queue fields back to defaults */
2089 ixgbe_reset_rx_queue(struct igb_rx_queue *rxq)
2095 * By default, the Rx queue setup function allocates enough memory for
2096 * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires
2097 * extra memory at the end of the descriptor ring to be zero'd out. A
2098 * pre-condition for using the Rx burst bulk alloc function is that the
2099 * number of descriptors is less than or equal to
2100 * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2101 * constraints here to see if we need to zero out memory after the end
2102 * of the H/W descriptor ring.
2104 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2105 if (check_rx_burst_bulk_alloc_preconditions(rxq) == 0)
2106 /* zero out extra memory */
2107 len = (uint16_t)(rxq->nb_rx_desc + RTE_PMD_IXGBE_RX_MAX_BURST);
2110 /* do not zero out extra memory */
2111 len = rxq->nb_rx_desc;
2114 * Zero out HW ring memory. Zero out extra memory at the end of
2115 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2116 * reads extra memory as zeros.
2118 for (i = 0; i < len * sizeof(union ixgbe_adv_rx_desc); i++) {
2119 ((volatile char *)rxq->rx_ring)[i] = 0;
2122 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2124 * initialize extra software ring entries. Space for these extra
2125 * entries is always allocated
2127 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2128 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST; ++i) {
2129 rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
2132 rxq->rx_nb_avail = 0;
2133 rxq->rx_next_avail = 0;
2134 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2135 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
2137 rxq->nb_rx_hold = 0;
2138 rxq->pkt_first_seg = NULL;
2139 rxq->pkt_last_seg = NULL;
2143 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2146 unsigned int socket_id,
2147 const struct rte_eth_rxconf *rx_conf,
2148 struct rte_mempool *mp)
2150 const struct rte_memzone *rz;
2151 struct igb_rx_queue *rxq;
2152 struct ixgbe_hw *hw;
2153 int use_def_burst_func = 1;
2156 PMD_INIT_FUNC_TRACE();
2157 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2160 * Validate number of receive descriptors.
2161 * It must not exceed hardware maximum, and must be multiple
2164 if (((nb_desc * sizeof(union ixgbe_adv_rx_desc)) % IXGBE_ALIGN) != 0 ||
2165 (nb_desc > IXGBE_MAX_RING_DESC) ||
2166 (nb_desc < IXGBE_MIN_RING_DESC)) {
2170 /* Free memory prior to re-allocation if needed... */
2171 if (dev->data->rx_queues[queue_idx] != NULL)
2172 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2174 /* First allocate the rx queue data structure */
2175 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct igb_rx_queue),
2176 CACHE_LINE_SIZE, socket_id);
2180 rxq->nb_rx_desc = nb_desc;
2181 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2182 rxq->queue_id = queue_idx;
2183 rxq->port_id = dev->data->port_id;
2184 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2186 rxq->drop_en = rx_conf->rx_drop_en;
2189 * Allocate RX ring hardware descriptors. A memzone large enough to
2190 * handle the maximum ring size is allocated in order to allow for
2191 * resizing in later calls to the queue setup function.
2193 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
2194 IXGBE_MAX_RING_DESC * sizeof(union ixgbe_adv_rx_desc),
2197 ixgbe_rx_queue_release(rxq);
2201 * Modified to setup VFRDT for Virtual Function
2203 if (hw->mac.type == ixgbe_mac_82599_vf)
2204 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2206 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(queue_idx));
2208 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
2209 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2212 * Allocate software ring. Allow for space at the end of the
2213 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2214 * function does not access an invalid memory region.
2216 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2217 len = (uint16_t)(nb_desc + RTE_PMD_IXGBE_RX_MAX_BURST);
2221 rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2222 sizeof(struct igb_rx_entry) * len,
2223 CACHE_LINE_SIZE, socket_id);
2224 if (rxq->sw_ring == NULL) {
2225 ixgbe_rx_queue_release(rxq);
2228 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
2229 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
2232 * Certain constaints must be met in order to use the bulk buffer
2233 * allocation Rx burst function.
2235 use_def_burst_func = check_rx_burst_bulk_alloc_preconditions(rxq);
2237 /* Check if pre-conditions are satisfied, and no Scattered Rx */
2238 if (!use_def_burst_func && !dev->data->scattered_rx) {
2239 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2240 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
2241 "satisfied. Rx Burst Bulk Alloc function will be "
2242 "used on port=%d, queue=%d.\n",
2243 rxq->port_id, rxq->queue_id);
2244 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
2247 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions "
2248 "are not satisfied, Scattered Rx is requested, "
2249 "or RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC is not "
2250 "enabled (port=%d, queue=%d).\n",
2251 rxq->port_id, rxq->queue_id);
2253 dev->data->rx_queues[queue_idx] = rxq;
2255 ixgbe_reset_rx_queue(rxq);
2260 uint32_t ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev,
2261 uint16_t rx_queue_id)
2263 struct igb_rx_queue *rxq;
2264 uint32_t nb_pkts_available;
2268 if (rx_queue_id >= dev->data->nb_rx_queues) {
2269 PMD_RX_LOG(DEBUG,"Invalid RX queue_id=%d\n", rx_queue_id);
2273 rxq = dev->data->rx_queues[rx_queue_id];
2274 rx_id = (uint16_t)((rxq->rx_tail == 0) ?
2275 (rxq->nb_rx_desc - 1) : (rxq->rx_tail - 1));
2276 rx_rdh = IXGBE_PCI_REG(rxq->rdh_reg_addr);
2278 nb_pkts_available = rx_rdh - rx_id;
2280 nb_pkts_available = rx_rdh - rx_id + rxq->nb_rx_desc;
2282 return (nb_pkts_available);
2286 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2290 PMD_INIT_FUNC_TRACE();
2292 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2293 struct igb_tx_queue *txq = dev->data->tx_queues[i];
2295 ixgbe_tx_queue_release_mbufs(txq);
2296 ixgbe_reset_tx_queue(txq);
2300 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2301 struct igb_rx_queue *rxq = dev->data->rx_queues[i];
2303 ixgbe_rx_queue_release_mbufs(rxq);
2304 ixgbe_reset_rx_queue(rxq);
2309 /*********************************************************************
2311 * Device RX/TX init functions
2313 **********************************************************************/
2316 * Receive Side Scaling (RSS)
2317 * See section 7.1.2.8 in the following document:
2318 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2321 * The source and destination IP addresses of the IP header and the source
2322 * and destination ports of TCP/UDP headers, if any, of received packets are
2323 * hashed against a configurable random key to compute a 32-bit RSS hash result.
2324 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2325 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
2326 * RSS output index which is used as the RX queue index where to store the
2328 * The following output is supplied in the RX write-back descriptor:
2329 * - 32-bit result of the Microsoft RSS hash function,
2330 * - 4-bit RSS type field.
2334 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2335 * Used as the default key.
2337 static uint8_t rss_intel_key[40] = {
2338 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2339 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2340 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2341 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2342 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2346 ixgbe_rss_disable(struct rte_eth_dev *dev)
2348 struct ixgbe_hw *hw;
2351 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2352 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
2353 mrqc &= ~IXGBE_MRQC_RSSEN;
2354 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2358 ixgbe_rss_configure(struct rte_eth_dev *dev)
2360 struct ixgbe_hw *hw;
2369 PMD_INIT_FUNC_TRACE();
2370 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2372 rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2373 if (rss_hf == 0) { /* Disable RSS */
2374 ixgbe_rss_disable(dev);
2377 hash_key = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
2378 if (hash_key == NULL)
2379 hash_key = rss_intel_key; /* Default hash key */
2381 /* Fill in RSS hash key */
2382 for (i = 0; i < 10; i++) {
2383 rss_key = hash_key[(i * 4)];
2384 rss_key |= hash_key[(i * 4) + 1] << 8;
2385 rss_key |= hash_key[(i * 4) + 2] << 16;
2386 rss_key |= hash_key[(i * 4) + 3] << 24;
2387 IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RSSRK(0), i, rss_key);
2390 /* Fill in redirection table */
2392 for (i = 0, j = 0; i < 128; i++, j++) {
2393 if (j == dev->data->nb_rx_queues) j = 0;
2394 reta = (reta << 8) | j;
2396 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), rte_bswap32(reta));
2399 /* Set configured hashing functions in MRQC register */
2400 mrqc = IXGBE_MRQC_RSSEN; /* RSS enable */
2401 if (rss_hf & ETH_RSS_IPV4)
2402 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2403 if (rss_hf & ETH_RSS_IPV4_TCP)
2404 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2405 if (rss_hf & ETH_RSS_IPV6)
2406 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2407 if (rss_hf & ETH_RSS_IPV6_EX)
2408 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2409 if (rss_hf & ETH_RSS_IPV6_TCP)
2410 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2411 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2412 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2413 if (rss_hf & ETH_RSS_IPV4_UDP)
2414 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2415 if (rss_hf & ETH_RSS_IPV6_UDP)
2416 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2417 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2418 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2419 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2422 #define NUM_VFTA_REGISTERS 128
2423 #define NIC_RX_BUFFER_SIZE 0x200
2426 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2428 struct rte_eth_vmdq_dcb_conf *cfg;
2429 struct ixgbe_hw *hw;
2430 enum rte_eth_nb_pools num_pools;
2431 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2433 uint8_t nb_tcs; /* number of traffic classes */
2436 PMD_INIT_FUNC_TRACE();
2437 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2438 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2439 num_pools = cfg->nb_queue_pools;
2440 /* Check we have a valid number of pools */
2441 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2442 ixgbe_rss_disable(dev);
2445 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2446 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2450 * split rx buffer up into sections, each for 1 traffic class
2452 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2453 for (i = 0 ; i < nb_tcs; i++) {
2454 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2455 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2456 /* clear 10 bits. */
2457 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2458 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2460 /* zero alloc all unused TCs */
2461 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2462 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2463 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2464 /* clear 10 bits. */
2465 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2468 /* MRQC: enable vmdq and dcb */
2469 mrqc = ((num_pools == ETH_16_POOLS) ? \
2470 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2471 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2473 /* PFVTCTL: turn on virtualisation and set the default pool */
2474 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2475 if (cfg->enable_default_pool) {
2476 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2478 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2480 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2482 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2484 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2486 * mapping is done with 3 bits per priority,
2487 * so shift by i*3 each time
2489 queue_mapping |= ((cfg->dcb_queue[i] & 0x07) << (i * 3));
2491 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2493 /* RTRPCS: DCB related */
2494 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2496 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2497 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2498 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2499 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2501 /* VFTA - enable all vlan filters */
2502 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2503 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2506 /* VFRE: pool enabling for receive - 16 or 32 */
2507 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2508 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2511 * MPSAR - allow pools to read specific mac addresses
2512 * In this case, all pools should be able to read from mac addr 0
2514 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2515 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2517 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2518 for (i = 0; i < cfg->nb_pool_maps; i++) {
2519 /* set vlan id in VF register and set the valid bit */
2520 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2521 (cfg->pool_map[i].vlan_id & 0xFFF)));
2523 * Put the allowed pools in VFB reg. As we only have 16 or 32
2524 * pools, we only need to use the first half of the register
2527 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2532 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2533 * @hw: pointer to hardware structure
2534 * @dcb_config: pointer to ixgbe_dcb_config structure
2537 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
2538 struct ixgbe_dcb_config *dcb_config)
2543 PMD_INIT_FUNC_TRACE();
2544 if (hw->mac.type != ixgbe_mac_82598EB) {
2545 /* Disable the Tx desc arbiter so that MTQC can be changed */
2546 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2547 reg |= IXGBE_RTTDCS_ARBDIS;
2548 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2550 /* Enable DCB for Tx with 8 TCs */
2551 if (dcb_config->num_tcs.pg_tcs == 8) {
2552 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
2555 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
2557 if (dcb_config->vt_mode)
2558 reg |= IXGBE_MTQC_VT_ENA;
2559 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
2561 /* Disable drop for all queues */
2562 for (q = 0; q < 128; q++)
2563 IXGBE_WRITE_REG(hw, IXGBE_QDE,
2564 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
2566 /* Enable the Tx desc arbiter */
2567 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2568 reg &= ~IXGBE_RTTDCS_ARBDIS;
2569 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2571 /* Enable Security TX Buffer IFG for DCB */
2572 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
2573 reg |= IXGBE_SECTX_DCB;
2574 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
2580 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
2581 * @dev: pointer to rte_eth_dev structure
2582 * @dcb_config: pointer to ixgbe_dcb_config structure
2585 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
2586 struct ixgbe_dcb_config *dcb_config)
2588 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2589 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2590 struct ixgbe_hw *hw =
2591 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2593 PMD_INIT_FUNC_TRACE();
2594 if (hw->mac.type != ixgbe_mac_82598EB)
2595 /*PF VF Transmit Enable*/
2596 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
2597 vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2599 /*Configure general DCB TX parameters*/
2600 ixgbe_dcb_tx_hw_config(hw,dcb_config);
2605 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
2606 struct ixgbe_dcb_config *dcb_config)
2608 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
2609 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2610 struct ixgbe_dcb_tc_config *tc;
2613 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
2614 if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
2615 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2616 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2619 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2620 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2622 /* User Priority to Traffic Class mapping */
2623 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2624 j = vmdq_rx_conf->dcb_queue[i];
2625 tc = &dcb_config->tc_config[j];
2626 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
2632 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
2633 struct ixgbe_dcb_config *dcb_config)
2635 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2636 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2637 struct ixgbe_dcb_tc_config *tc;
2640 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
2641 if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
2642 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2643 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2646 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2647 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2650 /* User Priority to Traffic Class mapping */
2651 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2652 j = vmdq_tx_conf->dcb_queue[i];
2653 tc = &dcb_config->tc_config[j];
2654 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
2661 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
2662 struct ixgbe_dcb_config *dcb_config)
2664 struct rte_eth_dcb_rx_conf *rx_conf =
2665 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
2666 struct ixgbe_dcb_tc_config *tc;
2669 dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
2670 dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
2672 /* User Priority to Traffic Class mapping */
2673 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2674 j = rx_conf->dcb_queue[i];
2675 tc = &dcb_config->tc_config[j];
2676 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
2682 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
2683 struct ixgbe_dcb_config *dcb_config)
2685 struct rte_eth_dcb_tx_conf *tx_conf =
2686 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
2687 struct ixgbe_dcb_tc_config *tc;
2690 dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
2691 dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
2693 /* User Priority to Traffic Class mapping */
2694 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2695 j = tx_conf->dcb_queue[i];
2696 tc = &dcb_config->tc_config[j];
2697 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
2703 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
2704 * @hw: pointer to hardware structure
2705 * @dcb_config: pointer to ixgbe_dcb_config structure
2708 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
2709 struct ixgbe_dcb_config *dcb_config)
2715 PMD_INIT_FUNC_TRACE();
2717 * Disable the arbiter before changing parameters
2718 * (always enable recycle mode; WSP)
2720 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
2721 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
2723 if (hw->mac.type != ixgbe_mac_82598EB) {
2724 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
2725 if (dcb_config->num_tcs.pg_tcs == 4) {
2726 if (dcb_config->vt_mode)
2727 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2728 IXGBE_MRQC_VMDQRT4TCEN;
2730 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
2731 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2735 if (dcb_config->num_tcs.pg_tcs == 8) {
2736 if (dcb_config->vt_mode)
2737 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2738 IXGBE_MRQC_VMDQRT8TCEN;
2740 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
2741 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2746 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
2749 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2750 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2751 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2752 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2754 /* VFTA - enable all vlan filters */
2755 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2756 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2760 * Configure Rx packet plane (recycle mode; WSP) and
2763 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
2764 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
2770 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
2771 uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
2773 switch (hw->mac.type) {
2774 case ixgbe_mac_82598EB:
2775 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
2777 case ixgbe_mac_82599EB:
2778 case ixgbe_mac_X540:
2779 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
2788 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
2789 uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
2791 switch (hw->mac.type) {
2792 case ixgbe_mac_82598EB:
2793 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
2794 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
2796 case ixgbe_mac_82599EB:
2797 case ixgbe_mac_X540:
2798 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
2799 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
2806 #define DCB_RX_CONFIG 1
2807 #define DCB_TX_CONFIG 1
2808 #define DCB_TX_PB 1024
2810 * ixgbe_dcb_hw_configure - Enable DCB and configure
2811 * general DCB in VT mode and non-VT mode parameters
2812 * @dev: pointer to rte_eth_dev structure
2813 * @dcb_config: pointer to ixgbe_dcb_config structure
2816 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
2817 struct ixgbe_dcb_config *dcb_config)
2820 uint8_t i,pfc_en,nb_tcs;
2822 uint8_t config_dcb_rx = 0;
2823 uint8_t config_dcb_tx = 0;
2824 uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2825 uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2826 uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2827 uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2828 uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2829 struct ixgbe_dcb_tc_config *tc;
2830 uint32_t max_frame = dev->data->max_frame_size;
2831 struct ixgbe_hw *hw =
2832 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2834 switch(dev->data->dev_conf.rxmode.mq_mode){
2836 dcb_config->vt_mode = true;
2837 if (hw->mac.type != ixgbe_mac_82598EB) {
2838 config_dcb_rx = DCB_RX_CONFIG;
2840 *get dcb and VT rx configuration parameters
2843 ixgbe_vmdq_dcb_rx_config(dev,dcb_config);
2844 /*Configure general VMDQ and DCB RX parameters*/
2845 ixgbe_vmdq_dcb_configure(dev);
2849 dcb_config->vt_mode = false;
2850 config_dcb_rx = DCB_RX_CONFIG;
2851 /* Get dcb TX configuration parameters from rte_eth_conf */
2852 ixgbe_dcb_rx_config(dev,dcb_config);
2853 /*Configure general DCB RX parameters*/
2854 ixgbe_dcb_rx_hw_config(hw, dcb_config);
2857 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration\n");
2860 switch (dev->data->dev_conf.txmode.mq_mode) {
2861 case ETH_VMDQ_DCB_TX:
2862 dcb_config->vt_mode = true;
2863 config_dcb_tx = DCB_TX_CONFIG;
2864 /* get DCB and VT TX configuration parameters from rte_eth_conf */
2865 ixgbe_dcb_vt_tx_config(dev,dcb_config);
2866 /*Configure general VMDQ and DCB TX parameters*/
2867 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
2871 dcb_config->vt_mode = false;
2872 config_dcb_tx = DCB_RX_CONFIG;
2873 /*get DCB TX configuration parameters from rte_eth_conf*/
2874 ixgbe_dcb_tx_config(dev,dcb_config);
2875 /*Configure general DCB TX parameters*/
2876 ixgbe_dcb_tx_hw_config(hw, dcb_config);
2879 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration\n");
2883 nb_tcs = dcb_config->num_tcs.pfc_tcs;
2885 ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
2886 if(nb_tcs == ETH_4_TCS) {
2887 /* Avoid un-configured priority mapping to TC0 */
2889 uint8_t mask = 0xFF;
2890 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
2891 mask = (uint8_t)(mask & (~ (1 << map[i])));
2892 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
2893 if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
2897 /* Re-configure 4 TCs BW */
2898 for (i = 0; i < nb_tcs; i++) {
2899 tc = &dcb_config->tc_config[i];
2900 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
2901 (uint8_t)(100 / nb_tcs);
2902 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
2903 (uint8_t)(100 / nb_tcs);
2905 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
2906 tc = &dcb_config->tc_config[i];
2907 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
2908 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
2913 /* Set RX buffer size */
2914 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2915 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
2916 for (i = 0 ; i < nb_tcs; i++) {
2917 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2919 /* zero alloc all unused TCs */
2920 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2921 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
2925 /* Only support an equally distributed Tx packet buffer strategy. */
2926 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
2927 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
2928 for (i = 0; i < nb_tcs; i++) {
2929 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
2930 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
2932 /* Clear unused TCs, if any, to zero buffer size*/
2933 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2934 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
2935 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
2939 /*Calculates traffic class credits*/
2940 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
2941 IXGBE_DCB_TX_CONFIG);
2942 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
2943 IXGBE_DCB_RX_CONFIG);
2946 /* Unpack CEE standard containers */
2947 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
2948 ixgbe_dcb_unpack_max_cee(dcb_config, max);
2949 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
2950 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
2951 /* Configure PG(ETS) RX */
2952 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
2956 /* Unpack CEE standard containers */
2957 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
2958 ixgbe_dcb_unpack_max_cee(dcb_config, max);
2959 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
2960 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
2961 /* Configure PG(ETS) TX */
2962 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
2965 /*Configure queue statistics registers*/
2966 ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
2968 /* Check if the PFC is supported */
2969 if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
2970 pbsize = (uint16_t) (NIC_RX_BUFFER_SIZE / nb_tcs);
2971 for (i = 0; i < nb_tcs; i++) {
2973 * If the TC count is 8,and the default high_water is 48,
2974 * the low_water is 16 as default.
2976 hw->fc.high_water[i] = (pbsize * 3 ) / 4;
2977 hw->fc.low_water[i] = pbsize / 4;
2978 /* Enable pfc for this TC */
2979 tc = &dcb_config->tc_config[i];
2980 tc->pfc = ixgbe_dcb_pfc_enabled;
2982 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
2983 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
2985 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
2992 * ixgbe_configure_dcb - Configure DCB Hardware
2993 * @dev: pointer to rte_eth_dev
2995 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
2997 struct ixgbe_dcb_config *dcb_cfg =
2998 IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3000 PMD_INIT_FUNC_TRACE();
3001 /** Configure DCB hardware **/
3002 if(((dev->data->dev_conf.rxmode.mq_mode != ETH_RSS) &&
3003 (dev->data->nb_rx_queues == ETH_DCB_NUM_QUEUES))||
3004 ((dev->data->dev_conf.txmode.mq_mode != ETH_DCB_NONE) &&
3005 (dev->data->nb_tx_queues == ETH_DCB_NUM_QUEUES))) {
3006 ixgbe_dcb_hw_configure(dev,dcb_cfg);
3012 ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
3014 struct igb_rx_entry *rxe = rxq->sw_ring;
3018 /* Initialize software ring entries */
3019 for (i = 0; i < rxq->nb_rx_desc; i++) {
3020 volatile union ixgbe_adv_rx_desc *rxd;
3021 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
3023 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u\n",
3024 (unsigned) rxq->queue_id);
3028 rte_mbuf_refcnt_set(mbuf, 1);
3029 mbuf->type = RTE_MBUF_PKT;
3030 mbuf->pkt.next = NULL;
3031 mbuf->pkt.data = (char *)mbuf->buf_addr + RTE_PKTMBUF_HEADROOM;
3032 mbuf->pkt.nb_segs = 1;
3033 mbuf->pkt.in_port = rxq->port_id;
3036 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
3037 rxd = &rxq->rx_ring[i];
3038 rxd->read.hdr_addr = dma_addr;
3039 rxd->read.pkt_addr = dma_addr;
3047 * Initializes Receive Unit.
3050 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
3052 struct ixgbe_hw *hw;
3053 struct igb_rx_queue *rxq;
3054 struct rte_pktmbuf_pool_private *mbp_priv;
3067 PMD_INIT_FUNC_TRACE();
3068 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3071 * Make sure receives are disabled while setting
3072 * up the RX context (registers, descriptor rings, etc.).
3074 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
3075 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
3077 /* Enable receipt of broadcasted frames */
3078 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
3079 fctrl |= IXGBE_FCTRL_BAM;
3080 fctrl |= IXGBE_FCTRL_DPF;
3081 fctrl |= IXGBE_FCTRL_PMCF;
3082 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
3085 * Configure CRC stripping, if any.
3087 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
3088 if (dev->data->dev_conf.rxmode.hw_strip_crc)
3089 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
3091 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
3094 * Configure jumbo frame support, if any.
3096 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
3097 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
3098 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
3099 maxfrs &= 0x0000FFFF;
3100 maxfrs |= (dev->data->dev_conf.rxmode.max_rx_pkt_len << 16);
3101 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
3103 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
3105 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
3107 /* Setup RX queues */
3108 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3109 rxq = dev->data->rx_queues[i];
3111 /* Allocate buffers for descriptor rings */
3112 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
3117 * Reset crc_len in case it was changed after queue setup by a
3118 * call to configure.
3120 rxq->crc_len = (uint8_t)
3121 ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
3124 /* Setup the Base and Length of the Rx Descriptor Rings */
3125 bus_addr = rxq->rx_ring_phys_addr;
3126 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
3127 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3128 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i),
3129 (uint32_t)(bus_addr >> 32));
3130 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
3131 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
3132 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
3133 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
3135 /* Configure the SRRCTL register */
3136 #ifdef RTE_HEADER_SPLIT_ENABLE
3138 * Configure Header Split
3140 if (dev->data->dev_conf.rxmode.header_split) {
3141 if (hw->mac.type == ixgbe_mac_82599EB) {
3142 /* Must setup the PSRTYPE register */
3144 psrtype = IXGBE_PSRTYPE_TCPHDR |
3145 IXGBE_PSRTYPE_UDPHDR |
3146 IXGBE_PSRTYPE_IPV4HDR |
3147 IXGBE_PSRTYPE_IPV6HDR;
3148 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(i), psrtype);
3150 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
3151 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
3152 IXGBE_SRRCTL_BSIZEHDR_MASK);
3153 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3156 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
3158 /* Set if packets are dropped when no descriptors available */
3160 srrctl |= IXGBE_SRRCTL_DROP_EN;
3163 * Configure the RX buffer size in the BSIZEPACKET field of
3164 * the SRRCTL register of the queue.
3165 * The value is in 1 KB resolution. Valid values can be from
3168 mbp_priv = (struct rte_pktmbuf_pool_private *)
3169 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
3170 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
3171 RTE_PKTMBUF_HEADROOM);
3172 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
3173 IXGBE_SRRCTL_BSIZEPKT_MASK);
3174 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3176 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
3177 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
3179 /* It adds dual VLAN length for supporting dual VLAN */
3180 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
3181 2 * IXGBE_VLAN_TAG_SIZE) > buf_size){
3182 dev->data->scattered_rx = 1;
3183 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
3188 * Configure RSS if device configured with multiple RX queues.
3190 if (hw->mac.type == ixgbe_mac_82599EB) {
3191 if (dev->data->nb_rx_queues > 1)
3192 switch (dev->data->dev_conf.rxmode.mq_mode) {
3194 ixgbe_rss_configure(dev);
3198 ixgbe_vmdq_dcb_configure(dev);
3201 default: ixgbe_rss_disable(dev);
3204 ixgbe_rss_disable(dev);
3208 * Setup the Checksum Register.
3209 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
3210 * Enable IP/L4 checkum computation by hardware if requested to do so.
3212 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
3213 rxcsum |= IXGBE_RXCSUM_PCSD;
3214 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
3215 rxcsum |= IXGBE_RXCSUM_IPPCSE;
3217 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
3219 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
3221 if (hw->mac.type == ixgbe_mac_82599EB) {
3222 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3223 if (dev->data->dev_conf.rxmode.hw_strip_crc)
3224 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3226 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
3227 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3228 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3235 * Initializes Transmit Unit.
3238 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
3240 struct ixgbe_hw *hw;
3241 struct igb_tx_queue *txq;
3248 PMD_INIT_FUNC_TRACE();
3249 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3251 /* Enable TX CRC (checksum offload requirement) */
3252 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
3253 hlreg0 |= IXGBE_HLREG0_TXCRCEN;
3254 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
3256 /* Setup the Base and Length of the Tx Descriptor Rings */
3257 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3258 txq = dev->data->tx_queues[i];
3260 bus_addr = txq->tx_ring_phys_addr;
3261 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3262 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3263 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i),
3264 (uint32_t)(bus_addr >> 32));
3265 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3266 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3267 /* Setup the HW Tx Head and TX Tail descriptor pointers */
3268 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3269 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3272 * Disable Tx Head Writeback RO bit, since this hoses
3273 * bookkeeping if things aren't delivered in order.
3275 switch (hw->mac.type) {
3276 case ixgbe_mac_82598EB:
3277 txctrl = IXGBE_READ_REG(hw,
3278 IXGBE_DCA_TXCTRL(i));
3279 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3280 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i),
3284 case ixgbe_mac_82599EB:
3285 case ixgbe_mac_X540:
3287 txctrl = IXGBE_READ_REG(hw,
3288 IXGBE_DCA_TXCTRL_82599(i));
3289 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3290 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i),
3296 if (hw->mac.type != ixgbe_mac_82598EB) {
3297 /* disable arbiter before setting MTQC */
3298 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3299 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3300 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3302 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3304 /* re-enable arbiter */
3305 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3306 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3311 * Start Transmit and Receive Units.
3314 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
3316 struct ixgbe_hw *hw;
3317 struct igb_tx_queue *txq;
3318 struct igb_rx_queue *rxq;
3326 PMD_INIT_FUNC_TRACE();
3327 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3329 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3330 txq = dev->data->tx_queues[i];
3331 /* Setup Transmit Threshold Registers */
3332 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3333 txdctl |= txq->pthresh & 0x7F;
3334 txdctl |= ((txq->hthresh & 0x7F) << 8);
3335 txdctl |= ((txq->wthresh & 0x7F) << 16);
3336 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
3339 if (hw->mac.type != ixgbe_mac_82598EB) {
3340 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3341 dmatxctl |= IXGBE_DMATXCTL_TE;
3342 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3345 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3346 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3347 txdctl |= IXGBE_TXDCTL_ENABLE;
3348 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
3350 /* Wait until TX Enable ready */
3351 if (hw->mac.type == ixgbe_mac_82599EB) {
3355 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3356 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
3358 PMD_INIT_LOG(ERR, "Could not enable "
3359 "Tx Queue %d\n", i);
3362 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3363 rxq = dev->data->rx_queues[i];
3364 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3365 rxdctl |= IXGBE_RXDCTL_ENABLE;
3366 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
3368 /* Wait until RX Enable ready */
3372 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3373 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
3375 PMD_INIT_LOG(ERR, "Could not enable "
3376 "Rx Queue %d\n", i);
3378 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), rxq->nb_rx_desc - 1);
3381 /* Enable Receive engine */
3382 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
3383 if (hw->mac.type == ixgbe_mac_82598EB)
3384 rxctrl |= IXGBE_RXCTRL_DMBYPS;
3385 rxctrl |= IXGBE_RXCTRL_RXEN;
3386 hw->mac.ops.enable_rx_dma(hw, rxctrl);
3391 * [VF] Initializes Receive Unit.
3394 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
3396 struct ixgbe_hw *hw;
3397 struct igb_rx_queue *rxq;
3398 struct rte_pktmbuf_pool_private *mbp_priv;
3405 PMD_INIT_FUNC_TRACE();
3406 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3408 /* Setup RX queues */
3409 dev->rx_pkt_burst = ixgbe_recv_pkts;
3410 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3411 rxq = dev->data->rx_queues[i];
3413 /* Allocate buffers for descriptor rings */
3414 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
3418 /* Setup the Base and Length of the Rx Descriptor Rings */
3419 bus_addr = rxq->rx_ring_phys_addr;
3421 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
3422 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3423 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
3424 (uint32_t)(bus_addr >> 32));
3425 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
3426 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
3427 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
3428 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
3431 /* Configure the SRRCTL register */
3432 #ifdef RTE_HEADER_SPLIT_ENABLE
3434 * Configure Header Split
3436 if (dev->data->dev_conf.rxmode.header_split) {
3438 /* Must setup the PSRTYPE register */
3440 psrtype = IXGBE_PSRTYPE_TCPHDR |
3441 IXGBE_PSRTYPE_UDPHDR |
3442 IXGBE_PSRTYPE_IPV4HDR |
3443 IXGBE_PSRTYPE_IPV6HDR;
3445 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
3447 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
3448 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
3449 IXGBE_SRRCTL_BSIZEHDR_MASK);
3450 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3453 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
3455 /* Set if packets are dropped when no descriptors available */
3457 srrctl |= IXGBE_SRRCTL_DROP_EN;
3460 * Configure the RX buffer size in the BSIZEPACKET field of
3461 * the SRRCTL register of the queue.
3462 * The value is in 1 KB resolution. Valid values can be from
3465 mbp_priv = (struct rte_pktmbuf_pool_private *)
3466 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
3467 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
3468 RTE_PKTMBUF_HEADROOM);
3469 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
3470 IXGBE_SRRCTL_BSIZEPKT_MASK);
3473 * VF modification to write virtual function SRRCTL register
3475 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
3477 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
3478 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
3480 /* It adds dual VLAN length for supporting dual VLAN */
3481 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
3482 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
3483 dev->data->scattered_rx = 1;
3484 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
3492 * [VF] Initializes Transmit Unit.
3495 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
3497 struct ixgbe_hw *hw;
3498 struct igb_tx_queue *txq;
3503 PMD_INIT_FUNC_TRACE();
3504 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3506 /* Setup the Base and Length of the Tx Descriptor Rings */
3507 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3508 txq = dev->data->tx_queues[i];
3509 bus_addr = txq->tx_ring_phys_addr;
3510 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
3511 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3512 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
3513 (uint32_t)(bus_addr >> 32));
3514 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
3515 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3516 /* Setup the HW Tx Head and TX Tail descriptor pointers */
3517 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
3518 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
3521 * Disable Tx Head Writeback RO bit, since this hoses
3522 * bookkeeping if things aren't delivered in order.
3524 txctrl = IXGBE_READ_REG(hw,
3525 IXGBE_VFDCA_TXCTRL(i));
3526 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3527 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
3533 * [VF] Start Transmit and Receive Units.
3536 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
3538 struct ixgbe_hw *hw;
3539 struct igb_tx_queue *txq;
3540 struct igb_rx_queue *rxq;
3546 PMD_INIT_FUNC_TRACE();
3547 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3549 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3550 txq = dev->data->tx_queues[i];
3551 /* Setup Transmit Threshold Registers */
3552 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3553 txdctl |= txq->pthresh & 0x7F;
3554 txdctl |= ((txq->hthresh & 0x7F) << 8);
3555 txdctl |= ((txq->wthresh & 0x7F) << 16);
3556 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
3559 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3561 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3562 txdctl |= IXGBE_TXDCTL_ENABLE;
3563 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
3566 /* Wait until TX Enable ready */
3569 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3570 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
3572 PMD_INIT_LOG(ERR, "Could not enable "
3573 "Tx Queue %d\n", i);
3575 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3577 rxq = dev->data->rx_queues[i];
3579 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
3580 rxdctl |= IXGBE_RXDCTL_ENABLE;
3581 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
3583 /* Wait until RX Enable ready */
3587 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
3588 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
3590 PMD_INIT_LOG(ERR, "Could not enable "
3591 "Rx Queue %d\n", i);
3593 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);