4 * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
47 #include <rte_byteorder.h>
48 #include <rte_common.h>
49 #include <rte_cycles.h>
51 #include <rte_debug.h>
52 #include <rte_interrupts.h>
54 #include <rte_memory.h>
55 #include <rte_memzone.h>
56 #include <rte_launch.h>
57 #include <rte_tailq.h>
59 #include <rte_per_lcore.h>
60 #include <rte_lcore.h>
61 #include <rte_atomic.h>
62 #include <rte_branch_prediction.h>
64 #include <rte_mempool.h>
65 #include <rte_malloc.h>
67 #include <rte_ether.h>
68 #include <rte_ethdev.h>
69 #include <rte_prefetch.h>
73 #include <rte_string_fns.h>
74 #include <rte_errno.h>
76 #include "ixgbe_logs.h"
77 #include "ixgbe/ixgbe_api.h"
78 #include "ixgbe/ixgbe_vf.h"
79 #include "ixgbe_ethdev.h"
80 #include "ixgbe/ixgbe_dcb.h"
83 #define RTE_PMD_IXGBE_TX_MAX_BURST 32
85 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
86 #define RTE_PMD_IXGBE_RX_MAX_BURST 32
89 static inline struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
94 m = __rte_mbuf_raw_alloc(mp);
95 __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
99 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
100 (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->pkt.data) - \
101 (char *)(mb)->buf_addr))
103 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
104 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
107 * Structure associated with each descriptor of the RX ring of a RX queue.
109 struct igb_rx_entry {
110 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
114 * Structure associated with each descriptor of the TX ring of a TX queue.
116 struct igb_tx_entry {
117 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
118 uint16_t next_id; /**< Index of next descriptor in ring. */
119 uint16_t last_id; /**< Index of last scattered descriptor. */
123 * Structure associated with each RX queue.
125 struct igb_rx_queue {
126 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
127 volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
128 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
129 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
130 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
131 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
132 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
133 uint16_t nb_rx_desc; /**< number of RX descriptors. */
134 uint16_t rx_tail; /**< current value of RDT register. */
135 uint16_t nb_rx_hold; /**< number of held free RX desc. */
136 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
137 uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
138 uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
139 uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
141 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
142 uint16_t queue_id; /**< RX queue index. */
143 uint8_t port_id; /**< Device port identifier. */
144 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
145 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
146 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
147 /** need to alloc dummy mbuf, for wraparound when scanning hw ring */
148 struct rte_mbuf fake_mbuf;
149 /** hold packets to return to application */
150 struct rte_mbuf *rx_stage[RTE_PMD_IXGBE_RX_MAX_BURST*2];
155 * IXGBE CTX Constants
157 enum ixgbe_advctx_num {
158 IXGBE_CTX_0 = 0, /**< CTX0 */
159 IXGBE_CTX_1 = 1, /**< CTX1 */
160 IXGBE_CTX_NUM = 2, /**< CTX NUMBER */
164 * Structure to check if new context need be built
167 struct ixgbe_advctx_info {
168 uint16_t flags; /**< ol_flags for context build. */
169 uint32_t cmp_mask; /**< compare mask for vlan_macip_lens */
170 union rte_vlan_macip vlan_macip_lens; /**< vlan, mac ip length. */
174 * Structure associated with each TX queue.
176 struct igb_tx_queue {
177 /** TX ring virtual address. */
178 volatile union ixgbe_adv_tx_desc *tx_ring;
179 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
180 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
181 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
182 uint16_t nb_tx_desc; /**< number of TX descriptors. */
183 uint16_t tx_tail; /**< current value of TDT reg. */
184 uint16_t tx_free_thresh;/**< minimum TX before freeing. */
185 /** Number of TX descriptors to use before RS bit is set. */
186 uint16_t tx_rs_thresh;
187 /** Number of TX descriptors used since RS bit was set. */
189 /** Index to last TX descriptor to have been cleaned. */
190 uint16_t last_desc_cleaned;
191 /** Total number of TX descriptors ready to be allocated. */
193 uint16_t tx_next_dd; /**< next desc to scan for DD bit */
194 uint16_t tx_next_rs; /**< next desc to set RS bit */
195 uint16_t queue_id; /**< TX queue index. */
196 uint8_t port_id; /**< Device port identifier. */
197 uint8_t pthresh; /**< Prefetch threshold register. */
198 uint8_t hthresh; /**< Host threshold register. */
199 uint8_t wthresh; /**< Write-back threshold reg. */
200 uint32_t txq_flags; /**< Holds flags for this TXq */
201 uint32_t ctx_curr; /**< Hardware context states. */
202 /** Hardware context0 history. */
203 struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];
208 #define RTE_PMD_USE_PREFETCH
211 #ifdef RTE_PMD_USE_PREFETCH
213 * Prefetch a cache line into all cache levels.
215 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
217 #define rte_ixgbe_prefetch(p) do {} while(0)
220 #ifdef RTE_PMD_PACKET_PREFETCH
221 #define rte_packet_prefetch(p) rte_prefetch1(p)
223 #define rte_packet_prefetch(p) do {} while(0)
226 /*********************************************************************
230 **********************************************************************/
233 * The "simple" TX queue functions require that the following
234 * flags are set when the TX queue is configured:
235 * - ETH_TXQ_FLAGS_NOMULTSEGS
236 * - ETH_TXQ_FLAGS_NOVLANOFFL
237 * - ETH_TXQ_FLAGS_NOXSUMSCTP
238 * - ETH_TXQ_FLAGS_NOXSUMUDP
239 * - ETH_TXQ_FLAGS_NOXSUMTCP
240 * and that the RS bit threshold (tx_rs_thresh) is at least equal to
241 * RTE_PMD_IXGBE_TX_MAX_BURST.
243 #define IXGBE_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
244 ETH_TXQ_FLAGS_NOOFFLOADS)
247 * Check for descriptors with their DD bit set and free mbufs.
248 * Return the total number of buffers freed.
250 static inline int __attribute__((always_inline))
251 ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
253 struct igb_tx_entry *txep;
257 /* check DD bit on threshold descriptor */
258 status = txq->tx_ring[txq->tx_next_dd].wb.status;
259 if (! (status & IXGBE_ADVTXD_STAT_DD))
263 * first buffer to free from S/W ring is at index
264 * tx_next_dd - (tx_rs_thresh-1)
266 txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
268 /* prefetch the mbufs that are about to be freed */
269 for (i = 0; i < txq->tx_rs_thresh; ++i)
270 rte_prefetch0((txep + i)->mbuf);
272 /* free buffers one at a time */
273 if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
274 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
275 rte_mempool_put(txep->mbuf->pool, txep->mbuf);
279 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
280 rte_pktmbuf_free_seg(txep->mbuf);
285 /* buffers were freed, update counters */
286 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
287 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
288 if (txq->tx_next_dd >= txq->nb_tx_desc)
289 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
291 return txq->tx_rs_thresh;
295 * Populate descriptors with the following info:
296 * 1.) buffer_addr = phys_addr + headroom
297 * 2.) cmd_type_len = DCMD_DTYP_FLAGS | pkt_len
298 * 3.) olinfo_status = pkt_len << PAYLEN_SHIFT
301 /* Defines for Tx descriptor */
302 #define DCMD_DTYP_FLAGS (IXGBE_ADVTXD_DTYP_DATA |\
303 IXGBE_ADVTXD_DCMD_IFCS |\
304 IXGBE_ADVTXD_DCMD_DEXT |\
305 IXGBE_ADVTXD_DCMD_EOP)
307 /* Populate 4 descriptors with data from 4 mbufs */
309 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
311 uint64_t buf_dma_addr;
315 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
316 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
317 pkt_len = (*pkts)->pkt.data_len;
319 /* write data to descriptor */
320 txdp->read.buffer_addr = buf_dma_addr;
321 txdp->read.cmd_type_len =
322 ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
323 txdp->read.olinfo_status =
324 (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
328 /* Populate 1 descriptor with data from 1 mbuf */
330 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
332 uint64_t buf_dma_addr;
335 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
336 pkt_len = (*pkts)->pkt.data_len;
338 /* write data to descriptor */
339 txdp->read.buffer_addr = buf_dma_addr;
340 txdp->read.cmd_type_len =
341 ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
342 txdp->read.olinfo_status =
343 (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
347 * Fill H/W descriptor ring with mbuf data.
348 * Copy mbuf pointers to the S/W ring.
351 ixgbe_tx_fill_hw_ring(struct igb_tx_queue *txq, struct rte_mbuf **pkts,
354 volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
355 struct igb_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
356 const int N_PER_LOOP = 4;
357 const int N_PER_LOOP_MASK = N_PER_LOOP-1;
358 int mainpart, leftover;
362 * Process most of the packets in chunks of N pkts. Any
363 * leftover packets will get processed one at a time.
365 mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
366 leftover = (nb_pkts & ((uint32_t) N_PER_LOOP_MASK));
367 for (i = 0; i < mainpart; i += N_PER_LOOP) {
368 /* Copy N mbuf pointers to the S/W ring */
369 for (j = 0; j < N_PER_LOOP; ++j) {
370 (txep + i + j)->mbuf = *(pkts + i + j);
372 tx4(txdp + i, pkts + i);
375 if (unlikely(leftover > 0)) {
376 for (i = 0; i < leftover; ++i) {
377 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
378 tx1(txdp + mainpart + i, pkts + mainpart + i);
383 static inline uint16_t
384 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
387 struct igb_tx_queue *txq = (struct igb_tx_queue *)tx_queue;
388 volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
392 * Begin scanning the H/W ring for done descriptors when the
393 * number of available descriptors drops below tx_free_thresh. For
394 * each done descriptor, free the associated buffer.
396 if (txq->nb_tx_free < txq->tx_free_thresh)
397 ixgbe_tx_free_bufs(txq);
399 /* Only use descriptors that are available */
400 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
401 if (unlikely(nb_pkts == 0))
404 /* Use exactly nb_pkts descriptors */
405 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
408 * At this point, we know there are enough descriptors in the
409 * ring to transmit all the packets. This assumes that each
410 * mbuf contains a single segment, and that no new offloads
411 * are expected, which would require a new context descriptor.
415 * See if we're going to wrap-around. If so, handle the top
416 * of the descriptor ring first, then do the bottom. If not,
417 * the processing looks just like the "bottom" part anyway...
419 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
420 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
421 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
424 * We know that the last descriptor in the ring will need to
425 * have its RS bit set because tx_rs_thresh has to be
426 * a divisor of the ring size
428 tx_r[txq->tx_next_rs].read.cmd_type_len |=
429 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
430 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
435 /* Fill H/W descriptor ring with mbuf data */
436 ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
437 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
440 * Determine if RS bit should be set
441 * This is what we actually want:
442 * if ((txq->tx_tail - 1) >= txq->tx_next_rs)
443 * but instead of subtracting 1 and doing >=, we can just do
444 * greater than without subtracting.
446 if (txq->tx_tail > txq->tx_next_rs) {
447 tx_r[txq->tx_next_rs].read.cmd_type_len |=
448 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
449 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
451 if (txq->tx_next_rs >= txq->nb_tx_desc)
452 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
456 * Check for wrap-around. This would only happen if we used
457 * up to the last descriptor in the ring, no more, no less.
459 if (txq->tx_tail >= txq->nb_tx_desc)
462 /* update tail pointer */
464 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
470 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
475 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
476 if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
477 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
479 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
483 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
484 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
485 nb_tx = (uint16_t)(nb_tx + ret);
486 nb_pkts = (uint16_t)(nb_pkts - ret);
495 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
496 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
497 uint16_t ol_flags, uint32_t vlan_macip_lens)
499 uint32_t type_tucmd_mlhl;
500 uint32_t mss_l4len_idx;
504 ctx_idx = txq->ctx_curr;
508 if (ol_flags & PKT_TX_VLAN_PKT) {
509 cmp_mask |= TX_VLAN_CMP_MASK;
512 if (ol_flags & PKT_TX_IP_CKSUM) {
513 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
514 cmp_mask |= TX_MAC_LEN_CMP_MASK;
517 /* Specify which HW CTX to upload. */
518 mss_l4len_idx = (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
519 switch (ol_flags & PKT_TX_L4_MASK) {
520 case PKT_TX_UDP_CKSUM:
521 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
522 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
523 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
524 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
526 case PKT_TX_TCP_CKSUM:
527 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
528 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
529 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
530 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
532 case PKT_TX_SCTP_CKSUM:
533 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
534 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
535 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
536 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
539 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
540 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
544 txq->ctx_cache[ctx_idx].flags = ol_flags;
545 txq->ctx_cache[ctx_idx].cmp_mask = cmp_mask;
546 txq->ctx_cache[ctx_idx].vlan_macip_lens.data =
547 vlan_macip_lens & cmp_mask;
549 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
550 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
551 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
552 ctx_txd->seqnum_seed = 0;
556 * Check which hardware context can be used. Use the existing match
557 * or create a new context descriptor.
559 static inline uint32_t
560 what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
561 uint32_t vlan_macip_lens)
563 /* If match with the current used context */
564 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
565 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
566 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
567 return txq->ctx_curr;
570 /* What if match with the next context */
572 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
573 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
574 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
575 return txq->ctx_curr;
578 /* Mismatch, use the previous context */
579 return (IXGBE_CTX_NUM);
582 static inline uint32_t
583 tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
585 static const uint32_t l4_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_TXSM};
586 static const uint32_t l3_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_IXSM};
589 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
590 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
594 static inline uint32_t
595 tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
597 static const uint32_t vlan_cmd[2] = {0, IXGBE_ADVTXD_DCMD_VLE};
598 return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
601 /* Default RS bit threshold values */
602 #ifndef DEFAULT_TX_RS_THRESH
603 #define DEFAULT_TX_RS_THRESH 32
605 #ifndef DEFAULT_TX_FREE_THRESH
606 #define DEFAULT_TX_FREE_THRESH 32
609 /* Reset transmit descriptors after they have been used */
611 ixgbe_xmit_cleanup(struct igb_tx_queue *txq)
613 struct igb_tx_entry *sw_ring = txq->sw_ring;
614 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
615 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
616 uint16_t nb_tx_desc = txq->nb_tx_desc;
617 uint16_t desc_to_clean_to;
618 uint16_t nb_tx_to_clean;
620 /* Determine the last descriptor needing to be cleaned */
621 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
622 if (desc_to_clean_to >= nb_tx_desc)
623 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
625 /* Check to make sure the last descriptor to clean is done */
626 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
627 if (! (txr[desc_to_clean_to].wb.status & IXGBE_TXD_STAT_DD))
629 PMD_TX_FREE_LOG(DEBUG,
630 "TX descriptor %4u is not done"
631 "(port=%d queue=%d)",
633 txq->port_id, txq->queue_id);
634 /* Failed to clean any descriptors, better luck next time */
638 /* Figure out how many descriptors will be cleaned */
639 if (last_desc_cleaned > desc_to_clean_to)
640 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
643 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
646 PMD_TX_FREE_LOG(DEBUG,
647 "Cleaning %4u TX descriptors: %4u to %4u "
648 "(port=%d queue=%d)",
649 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
650 txq->port_id, txq->queue_id);
653 * The last descriptor to clean is done, so that means all the
654 * descriptors from the last descriptor that was cleaned
655 * up to the last descriptor with the RS bit set
656 * are done. Only reset the threshold descriptor.
658 txr[desc_to_clean_to].wb.status = 0;
660 /* Update the txq to reflect the last descriptor that was cleaned */
661 txq->last_desc_cleaned = desc_to_clean_to;
662 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
669 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
672 struct igb_tx_queue *txq;
673 struct igb_tx_entry *sw_ring;
674 struct igb_tx_entry *txe, *txn;
675 volatile union ixgbe_adv_tx_desc *txr;
676 volatile union ixgbe_adv_tx_desc *txd;
677 struct rte_mbuf *tx_pkt;
678 struct rte_mbuf *m_seg;
679 uint64_t buf_dma_addr;
680 uint32_t olinfo_status;
681 uint32_t cmd_type_len;
690 uint32_t vlan_macip_lens;
695 sw_ring = txq->sw_ring;
697 tx_id = txq->tx_tail;
698 txe = &sw_ring[tx_id];
700 /* Determine if the descriptor ring needs to be cleaned. */
701 if ((txq->nb_tx_desc - txq->nb_tx_free) > txq->tx_free_thresh) {
702 ixgbe_xmit_cleanup(txq);
706 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
709 pkt_len = tx_pkt->pkt.pkt_len;
711 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
714 * Determine how many (if any) context descriptors
715 * are needed for offload functionality.
717 ol_flags = tx_pkt->ol_flags;
718 vlan_macip_lens = tx_pkt->pkt.vlan_macip.data;
720 /* If hardware offload required */
721 tx_ol_req = (uint16_t)(ol_flags & PKT_TX_OFFLOAD_MASK);
723 /* If new context need be built or reuse the exist ctx. */
724 ctx = what_advctx_update(txq, tx_ol_req,
726 /* Only allocate context descriptor if required*/
727 new_ctx = (ctx == IXGBE_CTX_NUM);
732 * Keep track of how many descriptors are used this loop
733 * This will always be the number of segments + the number of
734 * Context descriptors required to transmit the packet
736 nb_used = (uint16_t)(tx_pkt->pkt.nb_segs + new_ctx);
739 * The number of descriptors that must be allocated for a
740 * packet is the number of segments of that packet, plus 1
741 * Context Descriptor for the hardware offload, if any.
742 * Determine the last TX descriptor to allocate in the TX ring
743 * for the packet, starting from the current position (tx_id)
746 tx_last = (uint16_t) (tx_id + nb_used - 1);
749 if (tx_last >= txq->nb_tx_desc)
750 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
752 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
753 " tx_first=%u tx_last=%u\n",
754 (unsigned) txq->port_id,
755 (unsigned) txq->queue_id,
761 * Make sure there are enough TX descriptors available to
762 * transmit the entire packet.
763 * nb_used better be less than or equal to txq->tx_rs_thresh
765 if (nb_used > txq->nb_tx_free) {
766 PMD_TX_FREE_LOG(DEBUG,
767 "Not enough free TX descriptors "
768 "nb_used=%4u nb_free=%4u "
769 "(port=%d queue=%d)",
770 nb_used, txq->nb_tx_free,
771 txq->port_id, txq->queue_id);
773 if (ixgbe_xmit_cleanup(txq) != 0) {
774 /* Could not clean any descriptors */
780 /* nb_used better be <= txq->tx_rs_thresh */
781 if (unlikely(nb_used > txq->tx_rs_thresh)) {
782 PMD_TX_FREE_LOG(DEBUG,
783 "The number of descriptors needed to "
784 "transmit the packet exceeds the "
785 "RS bit threshold. This will impact "
787 "nb_used=%4u nb_free=%4u "
789 "(port=%d queue=%d)",
790 nb_used, txq->nb_tx_free,
792 txq->port_id, txq->queue_id);
794 * Loop here until there are enough TX
795 * descriptors or until the ring cannot be
798 while (nb_used > txq->nb_tx_free) {
799 if (ixgbe_xmit_cleanup(txq) != 0) {
801 * Could not clean any
813 * By now there are enough free TX descriptors to transmit
818 * Set common flags of all TX Data Descriptors.
820 * The following bits must be set in all Data Descriptors:
821 * - IXGBE_ADVTXD_DTYP_DATA
822 * - IXGBE_ADVTXD_DCMD_DEXT
824 * The following bits must be set in the first Data Descriptor
825 * and are ignored in the other ones:
826 * - IXGBE_ADVTXD_DCMD_IFCS
827 * - IXGBE_ADVTXD_MAC_1588
828 * - IXGBE_ADVTXD_DCMD_VLE
830 * The following bits must only be set in the last Data
832 * - IXGBE_TXD_CMD_EOP
834 * The following bits can be set in any Data Descriptor, but
835 * are only set in the last Data Descriptor:
838 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
839 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
840 olinfo_status = (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
841 #ifdef RTE_LIBRTE_IEEE1588
842 if (ol_flags & PKT_TX_IEEE1588_TMST)
843 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
848 * Setup the TX Advanced Context Descriptor if required
851 volatile struct ixgbe_adv_tx_context_desc *
854 ctx_txd = (volatile struct
855 ixgbe_adv_tx_context_desc *)
858 txn = &sw_ring[txe->next_id];
859 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
861 if (txe->mbuf != NULL) {
862 rte_pktmbuf_free_seg(txe->mbuf);
866 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
869 txe->last_id = tx_last;
870 tx_id = txe->next_id;
875 * Setup the TX Advanced Data Descriptor,
876 * This path will go through
877 * whatever new/reuse the context descriptor
879 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
880 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
881 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
887 txn = &sw_ring[txe->next_id];
889 if (txe->mbuf != NULL)
890 rte_pktmbuf_free_seg(txe->mbuf);
894 * Set up Transmit Data Descriptor.
896 slen = m_seg->pkt.data_len;
897 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
898 txd->read.buffer_addr =
899 rte_cpu_to_le_64(buf_dma_addr);
900 txd->read.cmd_type_len =
901 rte_cpu_to_le_32(cmd_type_len | slen);
902 txd->read.olinfo_status =
903 rte_cpu_to_le_32(olinfo_status);
904 txe->last_id = tx_last;
905 tx_id = txe->next_id;
907 m_seg = m_seg->pkt.next;
908 } while (m_seg != NULL);
911 * The last packet data descriptor needs End Of Packet (EOP)
913 cmd_type_len |= IXGBE_TXD_CMD_EOP;
914 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
915 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
917 /* Set RS bit only on threshold packets' last descriptor */
918 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
919 PMD_TX_FREE_LOG(DEBUG,
920 "Setting RS bit on TXD id="
921 "%4u (port=%d queue=%d)",
922 tx_last, txq->port_id, txq->queue_id);
924 cmd_type_len |= IXGBE_TXD_CMD_RS;
926 /* Update txq RS bit counters */
929 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
935 * Set the Transmit Descriptor Tail (TDT)
937 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
938 (unsigned) txq->port_id, (unsigned) txq->queue_id,
939 (unsigned) tx_id, (unsigned) nb_tx);
940 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
941 txq->tx_tail = tx_id;
946 /*********************************************************************
950 **********************************************************************/
951 static inline uint16_t
952 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
956 static uint16_t ip_pkt_types_map[16] = {
957 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
958 PKT_RX_IPV6_HDR, 0, 0, 0,
959 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
960 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
963 static uint16_t ip_rss_types_map[16] = {
964 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
965 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
966 PKT_RX_RSS_HASH, 0, 0, 0,
967 0, 0, 0, PKT_RX_FDIR,
970 #ifdef RTE_LIBRTE_IEEE1588
971 static uint32_t ip_pkt_etqf_map[8] = {
972 0, 0, 0, PKT_RX_IEEE1588_PTP,
976 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ?
977 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
978 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
980 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ? 0 :
981 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
984 return (uint16_t)(pkt_flags | ip_rss_types_map[hl_tp_rs & 0xF]);
987 static inline uint16_t
988 rx_desc_status_to_pkt_flags(uint32_t rx_status)
993 * Check if VLAN present only.
994 * Do not check whether L3/L4 rx checksum done by NIC or not,
995 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
997 pkt_flags = (uint16_t)((rx_status & IXGBE_RXD_STAT_VP) ?
998 PKT_RX_VLAN_PKT : 0);
1000 #ifdef RTE_LIBRTE_IEEE1588
1001 if (rx_status & IXGBE_RXD_STAT_TMST)
1002 pkt_flags = (uint16_t)(pkt_flags | PKT_RX_IEEE1588_TMST);
1007 static inline uint16_t
1008 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1011 * Bit 31: IPE, IPv4 checksum error
1012 * Bit 30: L4I, L4I integrity error
1014 static uint16_t error_to_pkt_flags_map[4] = {
1015 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1016 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1018 return error_to_pkt_flags_map[(rx_status >>
1019 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1022 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
1024 * LOOK_AHEAD defines how many desc statuses to check beyond the
1025 * current descriptor.
1026 * It must be a pound define for optimal performance.
1027 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1028 * function only works with LOOK_AHEAD=8.
1030 #define LOOK_AHEAD 8
1031 #if (LOOK_AHEAD != 8)
1032 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1035 ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq)
1037 volatile union ixgbe_adv_rx_desc *rxdp;
1038 struct igb_rx_entry *rxep;
1039 struct rte_mbuf *mb;
1041 int s[LOOK_AHEAD], nb_dd;
1042 int i, j, nb_rx = 0;
1045 /* get references to current descriptor and S/W ring entry */
1046 rxdp = &rxq->rx_ring[rxq->rx_tail];
1047 rxep = &rxq->sw_ring[rxq->rx_tail];
1049 /* check to make sure there is at least 1 packet to receive */
1050 if (! (rxdp->wb.upper.status_error & IXGBE_RXDADV_STAT_DD))
1054 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1055 * reference packets that are ready to be received.
1057 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1058 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1060 /* Read desc statuses backwards to avoid race condition */
1061 for (j = LOOK_AHEAD-1; j >= 0; --j)
1062 s[j] = rxdp[j].wb.upper.status_error;
1064 /* Clear everything but the status bits (LSB) */
1065 for (j = 0; j < LOOK_AHEAD; ++j)
1066 s[j] &= IXGBE_RXDADV_STAT_DD;
1068 /* Compute how many status bits were set */
1069 nb_dd = s[0]+s[1]+s[2]+s[3]+s[4]+s[5]+s[6]+s[7];
1072 /* Translate descriptor info to mbuf format */
1073 for (j = 0; j < nb_dd; ++j) {
1075 pkt_len = (uint16_t)(rxdp[j].wb.upper.length -
1077 mb->pkt.data_len = pkt_len;
1078 mb->pkt.pkt_len = pkt_len;
1079 mb->pkt.vlan_macip.f.vlan_tci = rxdp[j].wb.upper.vlan;
1080 mb->pkt.hash.rss = rxdp[j].wb.lower.hi_dword.rss;
1082 /* convert descriptor fields to rte mbuf flags */
1083 mb->ol_flags = rx_desc_hlen_type_rss_to_pkt_flags(
1084 rxdp[j].wb.lower.lo_dword.data);
1085 /* reuse status field from scan list */
1086 mb->ol_flags = (uint16_t)(mb->ol_flags |
1087 rx_desc_status_to_pkt_flags(s[j]));
1088 mb->ol_flags = (uint16_t)(mb->ol_flags |
1089 rx_desc_error_to_pkt_flags(s[j]));
1092 /* Move mbuf pointers from the S/W ring to the stage */
1093 for (j = 0; j < LOOK_AHEAD; ++j) {
1094 rxq->rx_stage[i + j] = rxep[j].mbuf;
1097 /* stop if all requested packets could not be received */
1098 if (nb_dd != LOOK_AHEAD)
1102 /* clear software ring entries so we can cleanup correctly */
1103 for (i = 0; i < nb_rx; ++i) {
1104 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1112 ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
1114 volatile union ixgbe_adv_rx_desc *rxdp;
1115 struct igb_rx_entry *rxep;
1116 struct rte_mbuf *mb;
1121 /* allocate buffers in bulk directly into the S/W ring */
1122 alloc_idx = (uint16_t)(rxq->rx_free_trigger -
1123 (rxq->rx_free_thresh - 1));
1124 rxep = &rxq->sw_ring[alloc_idx];
1125 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1126 rxq->rx_free_thresh);
1127 if (unlikely(diag != 0))
1130 rxdp = &rxq->rx_ring[alloc_idx];
1131 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1132 /* populate the static rte mbuf fields */
1134 rte_mbuf_refcnt_set(mb, 1);
1135 mb->type = RTE_MBUF_PKT;
1136 mb->pkt.next = NULL;
1137 mb->pkt.data = (char *)mb->buf_addr + RTE_PKTMBUF_HEADROOM;
1138 mb->pkt.nb_segs = 1;
1139 mb->pkt.in_port = rxq->port_id;
1141 /* populate the descriptors */
1142 dma_addr = (uint64_t)mb->buf_physaddr + RTE_PKTMBUF_HEADROOM;
1143 rxdp[i].read.hdr_addr = dma_addr;
1144 rxdp[i].read.pkt_addr = dma_addr;
1147 /* update tail pointer */
1149 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rxq->rx_free_trigger);
1151 /* update state of internal queue structure */
1152 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_trigger +
1153 rxq->rx_free_thresh);
1154 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1155 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
1161 static inline uint16_t
1162 ixgbe_rx_fill_from_stage(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1165 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1168 /* how many packets are ready to return? */
1169 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1171 /* copy mbuf pointers to the application's packet list */
1172 for (i = 0; i < nb_pkts; ++i)
1173 rx_pkts[i] = stage[i];
1175 /* update internal queue state */
1176 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1177 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1182 static inline uint16_t
1183 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1186 struct igb_rx_queue *rxq = (struct igb_rx_queue *)rx_queue;
1189 /* Any previously recv'd pkts will be returned from the Rx stage */
1190 if (rxq->rx_nb_avail)
1191 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1193 /* Scan the H/W ring for packets to receive */
1194 nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1196 /* update internal queue state */
1197 rxq->rx_next_avail = 0;
1198 rxq->rx_nb_avail = nb_rx;
1199 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1201 /* if required, allocate new buffers to replenish descriptors */
1202 if (rxq->rx_tail > rxq->rx_free_trigger) {
1203 if (ixgbe_rx_alloc_bufs(rxq) != 0) {
1205 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1206 "queue_id=%u\n", (unsigned) rxq->port_id,
1207 (unsigned) rxq->queue_id);
1209 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1210 rxq->rx_free_thresh;
1213 * Need to rewind any previous receives if we cannot
1214 * allocate new buffers to replenish the old ones.
1216 rxq->rx_nb_avail = 0;
1217 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1218 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1219 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1225 if (rxq->rx_tail >= rxq->nb_rx_desc)
1228 /* received any packets this loop? */
1229 if (rxq->rx_nb_avail)
1230 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1235 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1237 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1242 if (unlikely(nb_pkts == 0))
1245 if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1246 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1248 /* request is relatively large, chunk it up */
1252 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1253 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1254 nb_rx = (uint16_t)(nb_rx + ret);
1255 nb_pkts = (uint16_t)(nb_pkts - ret);
1262 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
1265 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1268 struct igb_rx_queue *rxq;
1269 volatile union ixgbe_adv_rx_desc *rx_ring;
1270 volatile union ixgbe_adv_rx_desc *rxdp;
1271 struct igb_rx_entry *sw_ring;
1272 struct igb_rx_entry *rxe;
1273 struct rte_mbuf *rxm;
1274 struct rte_mbuf *nmb;
1275 union ixgbe_adv_rx_desc rxd;
1278 uint32_t hlen_type_rss;
1288 rx_id = rxq->rx_tail;
1289 rx_ring = rxq->rx_ring;
1290 sw_ring = rxq->sw_ring;
1291 while (nb_rx < nb_pkts) {
1293 * The order of operations here is important as the DD status
1294 * bit must not be read after any other descriptor fields.
1295 * rx_ring and rxdp are pointing to volatile data so the order
1296 * of accesses cannot be reordered by the compiler. If they were
1297 * not volatile, they could be reordered which could lead to
1298 * using invalid descriptor fields when read from rxd.
1300 rxdp = &rx_ring[rx_id];
1301 staterr = rxdp->wb.upper.status_error;
1302 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1309 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1310 * is likely to be invalid and to be dropped by the various
1311 * validation checks performed by the network stack.
1313 * Allocate a new mbuf to replenish the RX ring descriptor.
1314 * If the allocation fails:
1315 * - arrange for that RX descriptor to be the first one
1316 * being parsed the next time the receive function is
1317 * invoked [on the same queue].
1319 * - Stop parsing the RX ring and return immediately.
1321 * This policy do not drop the packet received in the RX
1322 * descriptor for which the allocation of a new mbuf failed.
1323 * Thus, it allows that packet to be later retrieved if
1324 * mbuf have been freed in the mean time.
1325 * As a side effect, holding RX descriptors instead of
1326 * systematically giving them back to the NIC may lead to
1327 * RX ring exhaustion situations.
1328 * However, the NIC can gracefully prevent such situations
1329 * to happen by sending specific "back-pressure" flow control
1330 * frames to its peer(s).
1332 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1333 "ext_err_stat=0x%08x pkt_len=%u\n",
1334 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1335 (unsigned) rx_id, (unsigned) staterr,
1336 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1338 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1340 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1341 "queue_id=%u\n", (unsigned) rxq->port_id,
1342 (unsigned) rxq->queue_id);
1343 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1348 rxe = &sw_ring[rx_id];
1350 if (rx_id == rxq->nb_rx_desc)
1353 /* Prefetch next mbuf while processing current one. */
1354 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1357 * When next RX descriptor is on a cache-line boundary,
1358 * prefetch the next 4 RX descriptors and the next 8 pointers
1361 if ((rx_id & 0x3) == 0) {
1362 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1363 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1369 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1370 rxdp->read.hdr_addr = dma_addr;
1371 rxdp->read.pkt_addr = dma_addr;
1374 * Initialize the returned mbuf.
1375 * 1) setup generic mbuf fields:
1376 * - number of segments,
1379 * - RX port identifier.
1380 * 2) integrate hardware offload data, if any:
1381 * - RSS flag & hash,
1382 * - IP checksum flag,
1383 * - VLAN TCI, if any,
1386 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1388 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1389 rte_packet_prefetch(rxm->pkt.data);
1390 rxm->pkt.nb_segs = 1;
1391 rxm->pkt.next = NULL;
1392 rxm->pkt.pkt_len = pkt_len;
1393 rxm->pkt.data_len = pkt_len;
1394 rxm->pkt.in_port = rxq->port_id;
1396 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1397 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1398 rxm->pkt.vlan_macip.f.vlan_tci =
1399 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1401 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1402 pkt_flags = (uint16_t)(pkt_flags |
1403 rx_desc_status_to_pkt_flags(staterr));
1404 pkt_flags = (uint16_t)(pkt_flags |
1405 rx_desc_error_to_pkt_flags(staterr));
1406 rxm->ol_flags = pkt_flags;
1408 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1409 rxm->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1410 else if (pkt_flags & PKT_RX_FDIR) {
1411 rxm->pkt.hash.fdir.hash =
1412 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1413 & IXGBE_ATR_HASH_MASK);
1414 rxm->pkt.hash.fdir.id = rxd.wb.lower.hi_dword.csum_ip.ip_id;
1417 * Store the mbuf address into the next entry of the array
1418 * of returned packets.
1420 rx_pkts[nb_rx++] = rxm;
1422 rxq->rx_tail = rx_id;
1425 * If the number of free RX descriptors is greater than the RX free
1426 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1428 * Update the RDT with the value of the last processed RX descriptor
1429 * minus 1, to guarantee that the RDT register is never equal to the
1430 * RDH register, which creates a "full" ring situtation from the
1431 * hardware point of view...
1433 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1434 if (nb_hold > rxq->rx_free_thresh) {
1435 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1436 "nb_hold=%u nb_rx=%u\n",
1437 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1438 (unsigned) rx_id, (unsigned) nb_hold,
1440 rx_id = (uint16_t) ((rx_id == 0) ?
1441 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1442 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1445 rxq->nb_rx_hold = nb_hold;
1450 ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1453 struct igb_rx_queue *rxq;
1454 volatile union ixgbe_adv_rx_desc *rx_ring;
1455 volatile union ixgbe_adv_rx_desc *rxdp;
1456 struct igb_rx_entry *sw_ring;
1457 struct igb_rx_entry *rxe;
1458 struct rte_mbuf *first_seg;
1459 struct rte_mbuf *last_seg;
1460 struct rte_mbuf *rxm;
1461 struct rte_mbuf *nmb;
1462 union ixgbe_adv_rx_desc rxd;
1463 uint64_t dma; /* Physical address of mbuf data buffer */
1465 uint32_t hlen_type_rss;
1475 rx_id = rxq->rx_tail;
1476 rx_ring = rxq->rx_ring;
1477 sw_ring = rxq->sw_ring;
1480 * Retrieve RX context of current packet, if any.
1482 first_seg = rxq->pkt_first_seg;
1483 last_seg = rxq->pkt_last_seg;
1485 while (nb_rx < nb_pkts) {
1488 * The order of operations here is important as the DD status
1489 * bit must not be read after any other descriptor fields.
1490 * rx_ring and rxdp are pointing to volatile data so the order
1491 * of accesses cannot be reordered by the compiler. If they were
1492 * not volatile, they could be reordered which could lead to
1493 * using invalid descriptor fields when read from rxd.
1495 rxdp = &rx_ring[rx_id];
1496 staterr = rxdp->wb.upper.status_error;
1497 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1504 * Allocate a new mbuf to replenish the RX ring descriptor.
1505 * If the allocation fails:
1506 * - arrange for that RX descriptor to be the first one
1507 * being parsed the next time the receive function is
1508 * invoked [on the same queue].
1510 * - Stop parsing the RX ring and return immediately.
1512 * This policy does not drop the packet received in the RX
1513 * descriptor for which the allocation of a new mbuf failed.
1514 * Thus, it allows that packet to be later retrieved if
1515 * mbuf have been freed in the mean time.
1516 * As a side effect, holding RX descriptors instead of
1517 * systematically giving them back to the NIC may lead to
1518 * RX ring exhaustion situations.
1519 * However, the NIC can gracefully prevent such situations
1520 * to happen by sending specific "back-pressure" flow control
1521 * frames to its peer(s).
1523 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
1524 "staterr=0x%x data_len=%u\n",
1525 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1526 (unsigned) rx_id, (unsigned) staterr,
1527 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1529 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1531 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1532 "queue_id=%u\n", (unsigned) rxq->port_id,
1533 (unsigned) rxq->queue_id);
1534 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1539 rxe = &sw_ring[rx_id];
1541 if (rx_id == rxq->nb_rx_desc)
1544 /* Prefetch next mbuf while processing current one. */
1545 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1548 * When next RX descriptor is on a cache-line boundary,
1549 * prefetch the next 4 RX descriptors and the next 8 pointers
1552 if ((rx_id & 0x3) == 0) {
1553 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1554 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1558 * Update RX descriptor with the physical address of the new
1559 * data buffer of the new allocated mbuf.
1563 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1564 rxdp->read.hdr_addr = dma;
1565 rxdp->read.pkt_addr = dma;
1568 * Set data length & data buffer address of mbuf.
1570 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1571 rxm->pkt.data_len = data_len;
1572 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1575 * If this is the first buffer of the received packet,
1576 * set the pointer to the first mbuf of the packet and
1577 * initialize its context.
1578 * Otherwise, update the total length and the number of segments
1579 * of the current scattered packet, and update the pointer to
1580 * the last mbuf of the current packet.
1582 if (first_seg == NULL) {
1584 first_seg->pkt.pkt_len = data_len;
1585 first_seg->pkt.nb_segs = 1;
1587 first_seg->pkt.pkt_len = (uint16_t)(first_seg->pkt.pkt_len
1589 first_seg->pkt.nb_segs++;
1590 last_seg->pkt.next = rxm;
1594 * If this is not the last buffer of the received packet,
1595 * update the pointer to the last mbuf of the current scattered
1596 * packet and continue to parse the RX ring.
1598 if (! (staterr & IXGBE_RXDADV_STAT_EOP)) {
1604 * This is the last buffer of the received packet.
1605 * If the CRC is not stripped by the hardware:
1606 * - Subtract the CRC length from the total packet length.
1607 * - If the last buffer only contains the whole CRC or a part
1608 * of it, free the mbuf associated to the last buffer.
1609 * If part of the CRC is also contained in the previous
1610 * mbuf, subtract the length of that CRC part from the
1611 * data length of the previous mbuf.
1613 rxm->pkt.next = NULL;
1614 if (unlikely(rxq->crc_len > 0)) {
1615 first_seg->pkt.pkt_len -= ETHER_CRC_LEN;
1616 if (data_len <= ETHER_CRC_LEN) {
1617 rte_pktmbuf_free_seg(rxm);
1618 first_seg->pkt.nb_segs--;
1619 last_seg->pkt.data_len = (uint16_t)
1620 (last_seg->pkt.data_len -
1621 (ETHER_CRC_LEN - data_len));
1622 last_seg->pkt.next = NULL;
1625 (uint16_t) (data_len - ETHER_CRC_LEN);
1629 * Initialize the first mbuf of the returned packet:
1630 * - RX port identifier,
1631 * - hardware offload data, if any:
1632 * - RSS flag & hash,
1633 * - IP checksum flag,
1634 * - VLAN TCI, if any,
1637 first_seg->pkt.in_port = rxq->port_id;
1640 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1641 * set in the pkt_flags field.
1643 first_seg->pkt.vlan_macip.f.vlan_tci =
1644 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1645 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1646 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1647 pkt_flags = (uint16_t)(pkt_flags |
1648 rx_desc_status_to_pkt_flags(staterr));
1649 pkt_flags = (uint16_t)(pkt_flags |
1650 rx_desc_error_to_pkt_flags(staterr));
1651 first_seg->ol_flags = pkt_flags;
1653 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1654 first_seg->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1655 else if (pkt_flags & PKT_RX_FDIR) {
1656 first_seg->pkt.hash.fdir.hash =
1657 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1658 & IXGBE_ATR_HASH_MASK);
1659 first_seg->pkt.hash.fdir.id =
1660 rxd.wb.lower.hi_dword.csum_ip.ip_id;
1663 /* Prefetch data of first segment, if configured to do so. */
1664 rte_packet_prefetch(first_seg->pkt.data);
1667 * Store the mbuf address into the next entry of the array
1668 * of returned packets.
1670 rx_pkts[nb_rx++] = first_seg;
1673 * Setup receipt context for a new packet.
1679 * Record index of the next RX descriptor to probe.
1681 rxq->rx_tail = rx_id;
1684 * Save receive context.
1686 rxq->pkt_first_seg = first_seg;
1687 rxq->pkt_last_seg = last_seg;
1690 * If the number of free RX descriptors is greater than the RX free
1691 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1693 * Update the RDT with the value of the last processed RX descriptor
1694 * minus 1, to guarantee that the RDT register is never equal to the
1695 * RDH register, which creates a "full" ring situtation from the
1696 * hardware point of view...
1698 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1699 if (nb_hold > rxq->rx_free_thresh) {
1700 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1701 "nb_hold=%u nb_rx=%u\n",
1702 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1703 (unsigned) rx_id, (unsigned) nb_hold,
1705 rx_id = (uint16_t) ((rx_id == 0) ?
1706 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1707 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1710 rxq->nb_rx_hold = nb_hold;
1714 /*********************************************************************
1716 * Queue management functions
1718 **********************************************************************/
1721 * Rings setup and release.
1723 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1724 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
1725 * also optimize cache line size effect. H/W supports up to cache line size 128.
1727 #define IXGBE_ALIGN 128
1730 * Maximum number of Ring Descriptors.
1732 * Since RDLEN/TDLEN should be multiple of 128 bytes, the number of ring
1733 * descriptors should meet the following condition:
1734 * (num_ring_desc * sizeof(rx/tx descriptor)) % 128 == 0
1736 #define IXGBE_MIN_RING_DESC 64
1737 #define IXGBE_MAX_RING_DESC 4096
1740 * Create memzone for HW rings. malloc can't be used as the physical address is
1741 * needed. If the memzone is already created, then this function returns a ptr
1744 static const struct rte_memzone *
1745 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1746 uint16_t queue_id, uint32_t ring_size, int socket_id)
1748 char z_name[RTE_MEMZONE_NAMESIZE];
1749 const struct rte_memzone *mz;
1751 rte_snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1752 dev->driver->pci_drv.name, ring_name,
1753 dev->data->port_id, queue_id);
1755 mz = rte_memzone_lookup(z_name);
1759 return rte_memzone_reserve_aligned(z_name, ring_size,
1760 socket_id, 0, IXGBE_ALIGN);
1764 ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1768 if (txq->sw_ring != NULL) {
1769 for (i = 0; i < txq->nb_tx_desc; i++) {
1770 if (txq->sw_ring[i].mbuf != NULL) {
1771 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1772 txq->sw_ring[i].mbuf = NULL;
1779 ixgbe_tx_queue_release(struct igb_tx_queue *txq)
1782 ixgbe_tx_queue_release_mbufs(txq);
1783 rte_free(txq->sw_ring);
1789 ixgbe_dev_tx_queue_release(void *txq)
1791 ixgbe_tx_queue_release(txq);
1794 /* (Re)set dynamic igb_tx_queue fields to defaults */
1796 ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
1798 struct igb_tx_entry *txe = txq->sw_ring;
1801 /* Zero out HW ring memory */
1802 for (i = 0; i < sizeof(union ixgbe_adv_tx_desc) * txq->nb_tx_desc; i++) {
1803 ((volatile char *)txq->tx_ring)[i] = 0;
1806 /* Initialize SW ring entries */
1807 prev = (uint16_t) (txq->nb_tx_desc - 1);
1808 for (i = 0; i < txq->nb_tx_desc; i++) {
1809 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1810 txd->wb.status = IXGBE_TXD_STAT_DD;
1813 txe[prev].next_id = i;
1817 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
1818 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1821 txq->nb_tx_used = 0;
1823 * Always allow 1 descriptor to be un-allocated to avoid
1824 * a H/W race condition
1826 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1827 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1829 memset((void*)&txq->ctx_cache, 0,
1830 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1834 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1837 unsigned int socket_id,
1838 const struct rte_eth_txconf *tx_conf)
1840 const struct rte_memzone *tz;
1841 struct igb_tx_queue *txq;
1842 struct ixgbe_hw *hw;
1843 uint16_t tx_rs_thresh, tx_free_thresh;
1845 PMD_INIT_FUNC_TRACE();
1846 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1849 * Validate number of transmit descriptors.
1850 * It must not exceed hardware maximum, and must be multiple
1853 if (((nb_desc * sizeof(union ixgbe_adv_tx_desc)) % IXGBE_ALIGN) != 0 ||
1854 (nb_desc > IXGBE_MAX_RING_DESC) ||
1855 (nb_desc < IXGBE_MIN_RING_DESC)) {
1860 * The following two parameters control the setting of the RS bit on
1861 * transmit descriptors.
1862 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
1863 * descriptors have been used.
1864 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
1865 * descriptors are used or if the number of descriptors required
1866 * to transmit a packet is greater than the number of free TX
1868 * The following constraints must be satisfied:
1869 * tx_rs_thresh must be greater than 0.
1870 * tx_rs_thresh must be less than the size of the ring minus 2.
1871 * tx_rs_thresh must be less than or equal to tx_free_thresh.
1872 * tx_rs_thresh must be a divisor of the ring size.
1873 * tx_free_thresh must be greater than 0.
1874 * tx_free_thresh must be less than the size of the ring minus 3.
1875 * One descriptor in the TX ring is used as a sentinel to avoid a
1876 * H/W race condition, hence the maximum threshold constraints.
1877 * When set to zero use default values.
1879 tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
1880 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
1881 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
1882 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
1883 if (tx_rs_thresh >= (nb_desc - 2)) {
1884 RTE_LOG(ERR, PMD, "tx_rs_thresh must be less than the number "
1885 "of TX descriptors minus 2. (tx_rs_thresh=%u port=%d "
1886 "queue=%d)\n", (unsigned int)tx_rs_thresh,
1887 (int)dev->data->port_id, (int)queue_idx);
1890 if (tx_free_thresh >= (nb_desc - 3)) {
1891 RTE_LOG(ERR, PMD, "tx_rs_thresh must be less than the "
1892 "tx_free_thresh must be less than the number of TX "
1893 "descriptors minus 3. (tx_free_thresh=%u port=%d "
1894 "queue=%d)\n", (unsigned int)tx_free_thresh,
1895 (int)dev->data->port_id, (int)queue_idx);
1898 if (tx_rs_thresh > tx_free_thresh) {
1899 RTE_LOG(ERR, PMD, "tx_rs_thresh must be less than or equal to "
1900 "tx_free_thresh. (tx_free_thresh=%u tx_rs_thresh=%u "
1901 "port=%d queue=%d)\n", (unsigned int)tx_free_thresh,
1902 (unsigned int)tx_rs_thresh, (int)dev->data->port_id,
1906 if ((nb_desc % tx_rs_thresh) != 0) {
1907 RTE_LOG(ERR, PMD, "tx_rs_thresh must be a divisor of the "
1908 "number of TX descriptors. (tx_rs_thresh=%u port=%d "
1909 "queue=%d)\n", (unsigned int)tx_rs_thresh,
1910 (int)dev->data->port_id, (int)queue_idx);
1915 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
1916 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
1917 * by the NIC and all descriptors are written back after the NIC
1918 * accumulates WTHRESH descriptors.
1920 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
1921 RTE_LOG(ERR, PMD, "TX WTHRESH must be set to 0 if "
1922 "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
1923 "port=%d queue=%d)\n", (unsigned int)tx_rs_thresh,
1924 (int)dev->data->port_id, (int)queue_idx);
1928 /* Free memory prior to re-allocation if needed... */
1929 if (dev->data->tx_queues[queue_idx] != NULL)
1930 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
1932 /* First allocate the tx queue data structure */
1933 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct igb_tx_queue),
1934 CACHE_LINE_SIZE, socket_id);
1939 * Allocate TX ring hardware descriptors. A memzone large enough to
1940 * handle the maximum ring size is allocated in order to allow for
1941 * resizing in later calls to the queue setup function.
1943 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1944 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
1947 ixgbe_tx_queue_release(txq);
1951 txq->nb_tx_desc = nb_desc;
1952 txq->tx_rs_thresh = tx_rs_thresh;
1953 txq->tx_free_thresh = tx_free_thresh;
1954 txq->pthresh = tx_conf->tx_thresh.pthresh;
1955 txq->hthresh = tx_conf->tx_thresh.hthresh;
1956 txq->wthresh = tx_conf->tx_thresh.wthresh;
1957 txq->queue_id = queue_idx;
1958 txq->port_id = dev->data->port_id;
1959 txq->txq_flags = tx_conf->txq_flags;
1962 * Modification to set VFTDT for virtual function if vf is detected
1964 if (hw->mac.type == ixgbe_mac_82599_vf)
1965 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
1967 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(queue_idx));
1969 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1970 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
1972 /* Allocate software ring */
1973 txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
1974 sizeof(struct igb_tx_entry) * nb_desc,
1975 CACHE_LINE_SIZE, socket_id);
1976 if (txq->sw_ring == NULL) {
1977 ixgbe_tx_queue_release(txq);
1980 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1981 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1983 ixgbe_reset_tx_queue(txq);
1985 dev->data->tx_queues[queue_idx] = txq;
1987 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1988 if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) &&
1989 (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
1990 PMD_INIT_LOG(INFO, "Using simple tx code path\n");
1991 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
1993 PMD_INIT_LOG(INFO, "Using full-featured tx code path\n");
1994 PMD_INIT_LOG(INFO, " - txq_flags = %lx [IXGBE_SIMPLE_FLAGS=%lx]\n", (long unsigned)txq->txq_flags, (long unsigned)IXGBE_SIMPLE_FLAGS);
1995 PMD_INIT_LOG(INFO, " - tx_rs_thresh = %lu [RTE_PMD_IXGBE_TX_MAX_BURST=%lu]\n", (long unsigned)txq->tx_rs_thresh, (long unsigned)RTE_PMD_IXGBE_TX_MAX_BURST);
1996 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2003 ixgbe_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
2007 if (rxq->sw_ring != NULL) {
2008 for (i = 0; i < rxq->nb_rx_desc; i++) {
2009 if (rxq->sw_ring[i].mbuf != NULL) {
2010 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2011 rxq->sw_ring[i].mbuf = NULL;
2014 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2015 if (rxq->rx_nb_avail) {
2016 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2017 struct rte_mbuf *mb;
2018 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2019 rte_pktmbuf_free_seg(mb);
2021 rxq->rx_nb_avail = 0;
2028 ixgbe_rx_queue_release(struct igb_rx_queue *rxq)
2031 ixgbe_rx_queue_release_mbufs(rxq);
2032 rte_free(rxq->sw_ring);
2038 ixgbe_dev_rx_queue_release(void *rxq)
2040 ixgbe_rx_queue_release(rxq);
2044 * Check if Rx Burst Bulk Alloc function can be used.
2046 * 0: the preconditions are satisfied and the bulk allocation function
2048 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2049 * function must be used.
2052 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2053 check_rx_burst_bulk_alloc_preconditions(struct igb_rx_queue *rxq)
2055 check_rx_burst_bulk_alloc_preconditions(__rte_unused struct igb_rx_queue *rxq)
2061 * Make sure the following pre-conditions are satisfied:
2062 * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2063 * rxq->rx_free_thresh < rxq->nb_rx_desc
2064 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2065 * rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2066 * Scattered packets are not supported. This should be checked
2067 * outside of this function.
2069 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2070 if (! (rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST))
2072 else if (! (rxq->rx_free_thresh < rxq->nb_rx_desc))
2074 else if (! ((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0))
2076 else if (! (rxq->nb_rx_desc <
2077 (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST)))
2086 /* Reset dynamic igb_rx_queue fields back to defaults */
2088 ixgbe_reset_rx_queue(struct igb_rx_queue *rxq)
2094 * By default, the Rx queue setup function allocates enough memory for
2095 * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires
2096 * extra memory at the end of the descriptor ring to be zero'd out. A
2097 * pre-condition for using the Rx burst bulk alloc function is that the
2098 * number of descriptors is less than or equal to
2099 * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2100 * constraints here to see if we need to zero out memory after the end
2101 * of the H/W descriptor ring.
2103 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2104 if (check_rx_burst_bulk_alloc_preconditions(rxq) == 0)
2105 /* zero out extra memory */
2106 len = (uint16_t)(rxq->nb_rx_desc + RTE_PMD_IXGBE_RX_MAX_BURST);
2109 /* do not zero out extra memory */
2110 len = rxq->nb_rx_desc;
2113 * Zero out HW ring memory. Zero out extra memory at the end of
2114 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2115 * reads extra memory as zeros.
2117 for (i = 0; i < len * sizeof(union ixgbe_adv_rx_desc); i++) {
2118 ((volatile char *)rxq->rx_ring)[i] = 0;
2121 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2123 * initialize extra software ring entries. Space for these extra
2124 * entries is always allocated
2126 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2127 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST; ++i) {
2128 rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
2131 rxq->rx_nb_avail = 0;
2132 rxq->rx_next_avail = 0;
2133 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2134 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
2136 rxq->nb_rx_hold = 0;
2137 rxq->pkt_first_seg = NULL;
2138 rxq->pkt_last_seg = NULL;
2142 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2145 unsigned int socket_id,
2146 const struct rte_eth_rxconf *rx_conf,
2147 struct rte_mempool *mp)
2149 const struct rte_memzone *rz;
2150 struct igb_rx_queue *rxq;
2151 struct ixgbe_hw *hw;
2152 int use_def_burst_func = 1;
2155 PMD_INIT_FUNC_TRACE();
2156 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2159 * Validate number of receive descriptors.
2160 * It must not exceed hardware maximum, and must be multiple
2163 if (((nb_desc * sizeof(union ixgbe_adv_rx_desc)) % IXGBE_ALIGN) != 0 ||
2164 (nb_desc > IXGBE_MAX_RING_DESC) ||
2165 (nb_desc < IXGBE_MIN_RING_DESC)) {
2169 /* Free memory prior to re-allocation if needed... */
2170 if (dev->data->rx_queues[queue_idx] != NULL)
2171 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2173 /* First allocate the rx queue data structure */
2174 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct igb_rx_queue),
2175 CACHE_LINE_SIZE, socket_id);
2179 rxq->nb_rx_desc = nb_desc;
2180 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2181 rxq->queue_id = queue_idx;
2182 rxq->port_id = dev->data->port_id;
2183 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2185 rxq->drop_en = rx_conf->rx_drop_en;
2188 * Allocate RX ring hardware descriptors. A memzone large enough to
2189 * handle the maximum ring size is allocated in order to allow for
2190 * resizing in later calls to the queue setup function.
2192 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
2193 IXGBE_MAX_RING_DESC * sizeof(union ixgbe_adv_rx_desc),
2196 ixgbe_rx_queue_release(rxq);
2200 * Modified to setup VFRDT for Virtual Function
2202 if (hw->mac.type == ixgbe_mac_82599_vf)
2203 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2205 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(queue_idx));
2207 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
2208 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2211 * Allocate software ring. Allow for space at the end of the
2212 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2213 * function does not access an invalid memory region.
2215 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2216 len = (uint16_t)(nb_desc + RTE_PMD_IXGBE_RX_MAX_BURST);
2220 rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2221 sizeof(struct igb_rx_entry) * len,
2222 CACHE_LINE_SIZE, socket_id);
2223 if (rxq->sw_ring == NULL) {
2224 ixgbe_rx_queue_release(rxq);
2227 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
2228 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
2231 * Certain constaints must be met in order to use the bulk buffer
2232 * allocation Rx burst function.
2234 use_def_burst_func = check_rx_burst_bulk_alloc_preconditions(rxq);
2236 /* Check if pre-conditions are satisfied, and no Scattered Rx */
2237 if (!use_def_burst_func && !dev->data->scattered_rx) {
2238 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2239 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
2240 "satisfied. Rx Burst Bulk Alloc function will be "
2241 "used on port=%d, queue=%d.\n",
2242 rxq->port_id, rxq->queue_id);
2243 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
2246 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions "
2247 "are not satisfied, Scattered Rx is requested, "
2248 "or RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC is not "
2249 "enabled (port=%d, queue=%d).\n",
2250 rxq->port_id, rxq->queue_id);
2252 dev->data->rx_queues[queue_idx] = rxq;
2254 ixgbe_reset_rx_queue(rxq);
2260 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2264 PMD_INIT_FUNC_TRACE();
2266 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2267 struct igb_tx_queue *txq = dev->data->tx_queues[i];
2269 ixgbe_tx_queue_release_mbufs(txq);
2270 ixgbe_reset_tx_queue(txq);
2274 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2275 struct igb_rx_queue *rxq = dev->data->rx_queues[i];
2277 ixgbe_rx_queue_release_mbufs(rxq);
2278 ixgbe_reset_rx_queue(rxq);
2283 /*********************************************************************
2285 * Device RX/TX init functions
2287 **********************************************************************/
2290 * Receive Side Scaling (RSS)
2291 * See section 7.1.2.8 in the following document:
2292 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2295 * The source and destination IP addresses of the IP header and the source
2296 * and destination ports of TCP/UDP headers, if any, of received packets are
2297 * hashed against a configurable random key to compute a 32-bit RSS hash result.
2298 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2299 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
2300 * RSS output index which is used as the RX queue index where to store the
2302 * The following output is supplied in the RX write-back descriptor:
2303 * - 32-bit result of the Microsoft RSS hash function,
2304 * - 4-bit RSS type field.
2308 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2309 * Used as the default key.
2311 static uint8_t rss_intel_key[40] = {
2312 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2313 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2314 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2315 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2316 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2320 ixgbe_rss_disable(struct rte_eth_dev *dev)
2322 struct ixgbe_hw *hw;
2325 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2326 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
2327 mrqc &= ~IXGBE_MRQC_RSSEN;
2328 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2332 ixgbe_rss_configure(struct rte_eth_dev *dev)
2334 struct ixgbe_hw *hw;
2343 PMD_INIT_FUNC_TRACE();
2344 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2346 rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2347 if (rss_hf == 0) { /* Disable RSS */
2348 ixgbe_rss_disable(dev);
2351 hash_key = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
2352 if (hash_key == NULL)
2353 hash_key = rss_intel_key; /* Default hash key */
2355 /* Fill in RSS hash key */
2356 for (i = 0; i < 10; i++) {
2357 rss_key = hash_key[(i * 4)];
2358 rss_key |= hash_key[(i * 4) + 1] << 8;
2359 rss_key |= hash_key[(i * 4) + 2] << 16;
2360 rss_key |= hash_key[(i * 4) + 3] << 24;
2361 IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RSSRK(0), i, rss_key);
2364 /* Fill in redirection table */
2366 for (i = 0, j = 0; i < 128; i++, j++) {
2367 if (j == dev->data->nb_rx_queues) j = 0;
2368 reta = (reta << 8) | j;
2370 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), rte_bswap32(reta));
2373 /* Set configured hashing functions in MRQC register */
2374 mrqc = IXGBE_MRQC_RSSEN; /* RSS enable */
2375 if (rss_hf & ETH_RSS_IPV4)
2376 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2377 if (rss_hf & ETH_RSS_IPV4_TCP)
2378 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2379 if (rss_hf & ETH_RSS_IPV6)
2380 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2381 if (rss_hf & ETH_RSS_IPV6_EX)
2382 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2383 if (rss_hf & ETH_RSS_IPV6_TCP)
2384 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2385 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2386 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2387 if (rss_hf & ETH_RSS_IPV4_UDP)
2388 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2389 if (rss_hf & ETH_RSS_IPV6_UDP)
2390 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2391 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2392 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2393 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2396 #define NUM_VFTA_REGISTERS 128
2397 #define NIC_RX_BUFFER_SIZE 0x200
2400 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2402 struct rte_eth_vmdq_dcb_conf *cfg;
2403 struct ixgbe_hw *hw;
2404 enum rte_eth_nb_pools num_pools;
2405 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2407 uint8_t nb_tcs; /* number of traffic classes */
2410 PMD_INIT_FUNC_TRACE();
2411 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2412 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2413 num_pools = cfg->nb_queue_pools;
2414 /* Check we have a valid number of pools */
2415 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2416 ixgbe_rss_disable(dev);
2419 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2420 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2424 * split rx buffer up into sections, each for 1 traffic class
2426 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2427 for (i = 0 ; i < nb_tcs; i++) {
2428 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2429 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2430 /* clear 10 bits. */
2431 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2432 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2434 /* zero alloc all unused TCs */
2435 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2436 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2437 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2438 /* clear 10 bits. */
2439 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2442 /* MRQC: enable vmdq and dcb */
2443 mrqc = ((num_pools == ETH_16_POOLS) ? \
2444 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2445 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2447 /* PFVTCTL: turn on virtualisation and set the default pool */
2448 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2449 if (cfg->enable_default_pool) {
2450 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2452 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2454 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2456 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2458 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2460 * mapping is done with 3 bits per priority,
2461 * so shift by i*3 each time
2463 queue_mapping |= ((cfg->dcb_queue[i] & 0x07) << (i * 3));
2465 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2467 /* RTRPCS: DCB related */
2468 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2470 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2471 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2472 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2473 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2475 /* VFTA - enable all vlan filters */
2476 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2477 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2480 /* VFRE: pool enabling for receive - 16 or 32 */
2481 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2482 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2485 * MPSAR - allow pools to read specific mac addresses
2486 * In this case, all pools should be able to read from mac addr 0
2488 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2489 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2491 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2492 for (i = 0; i < cfg->nb_pool_maps; i++) {
2493 /* set vlan id in VF register and set the valid bit */
2494 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2495 (cfg->pool_map[i].vlan_id & 0xFFF)));
2497 * Put the allowed pools in VFB reg. As we only have 16 or 32
2498 * pools, we only need to use the first half of the register
2501 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2506 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2507 * @hw: pointer to hardware structure
2508 * @dcb_config: pointer to ixgbe_dcb_config structure
2511 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
2512 struct ixgbe_dcb_config *dcb_config)
2517 PMD_INIT_FUNC_TRACE();
2518 if (hw->mac.type != ixgbe_mac_82598EB) {
2519 /* Disable the Tx desc arbiter so that MTQC can be changed */
2520 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2521 reg |= IXGBE_RTTDCS_ARBDIS;
2522 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2524 /* Enable DCB for Tx with 8 TCs */
2525 if (dcb_config->num_tcs.pg_tcs == 8) {
2526 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
2529 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
2531 if (dcb_config->vt_mode)
2532 reg |= IXGBE_MTQC_VT_ENA;
2533 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
2535 /* Disable drop for all queues */
2536 for (q = 0; q < 128; q++)
2537 IXGBE_WRITE_REG(hw, IXGBE_QDE,
2538 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
2540 /* Enable the Tx desc arbiter */
2541 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2542 reg &= ~IXGBE_RTTDCS_ARBDIS;
2543 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2545 /* Enable Security TX Buffer IFG for DCB */
2546 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
2547 reg |= IXGBE_SECTX_DCB;
2548 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
2554 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
2555 * @dev: pointer to rte_eth_dev structure
2556 * @dcb_config: pointer to ixgbe_dcb_config structure
2559 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
2560 struct ixgbe_dcb_config *dcb_config)
2562 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2563 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2564 struct ixgbe_hw *hw =
2565 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2567 PMD_INIT_FUNC_TRACE();
2568 if (hw->mac.type != ixgbe_mac_82598EB)
2569 /*PF VF Transmit Enable*/
2570 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
2571 vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2573 /*Configure general DCB TX parameters*/
2574 ixgbe_dcb_tx_hw_config(hw,dcb_config);
2579 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
2580 struct ixgbe_dcb_config *dcb_config)
2582 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
2583 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2584 struct ixgbe_dcb_tc_config *tc;
2587 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
2588 if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
2589 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2590 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2593 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2594 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2596 /* User Priority to Traffic Class mapping */
2597 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2598 j = vmdq_rx_conf->dcb_queue[i];
2599 tc = &dcb_config->tc_config[j];
2600 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
2606 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
2607 struct ixgbe_dcb_config *dcb_config)
2609 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2610 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2611 struct ixgbe_dcb_tc_config *tc;
2614 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
2615 if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
2616 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2617 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2620 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2621 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2624 /* User Priority to Traffic Class mapping */
2625 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2626 j = vmdq_tx_conf->dcb_queue[i];
2627 tc = &dcb_config->tc_config[j];
2628 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
2635 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
2636 struct ixgbe_dcb_config *dcb_config)
2638 struct rte_eth_dcb_rx_conf *rx_conf =
2639 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
2640 struct ixgbe_dcb_tc_config *tc;
2643 dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
2644 dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
2646 /* User Priority to Traffic Class mapping */
2647 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2648 j = rx_conf->dcb_queue[i];
2649 tc = &dcb_config->tc_config[j];
2650 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
2656 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
2657 struct ixgbe_dcb_config *dcb_config)
2659 struct rte_eth_dcb_tx_conf *tx_conf =
2660 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
2661 struct ixgbe_dcb_tc_config *tc;
2664 dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
2665 dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
2667 /* User Priority to Traffic Class mapping */
2668 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2669 j = tx_conf->dcb_queue[i];
2670 tc = &dcb_config->tc_config[j];
2671 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
2677 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
2678 * @hw: pointer to hardware structure
2679 * @dcb_config: pointer to ixgbe_dcb_config structure
2682 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
2683 struct ixgbe_dcb_config *dcb_config)
2689 PMD_INIT_FUNC_TRACE();
2691 * Disable the arbiter before changing parameters
2692 * (always enable recycle mode; WSP)
2694 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
2695 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
2697 if (hw->mac.type != ixgbe_mac_82598EB) {
2698 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
2699 if (dcb_config->num_tcs.pg_tcs == 4) {
2700 if (dcb_config->vt_mode)
2701 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2702 IXGBE_MRQC_VMDQRT4TCEN;
2704 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
2705 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2709 if (dcb_config->num_tcs.pg_tcs == 8) {
2710 if (dcb_config->vt_mode)
2711 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2712 IXGBE_MRQC_VMDQRT8TCEN;
2714 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
2715 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2720 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
2723 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2724 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2725 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2726 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2728 /* VFTA - enable all vlan filters */
2729 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2730 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2734 * Configure Rx packet plane (recycle mode; WSP) and
2737 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
2738 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
2744 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
2745 uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
2747 switch (hw->mac.type) {
2748 case ixgbe_mac_82598EB:
2749 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
2751 case ixgbe_mac_82599EB:
2752 case ixgbe_mac_X540:
2753 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
2762 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
2763 uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
2765 switch (hw->mac.type) {
2766 case ixgbe_mac_82598EB:
2767 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
2768 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
2770 case ixgbe_mac_82599EB:
2771 case ixgbe_mac_X540:
2772 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
2773 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
2780 #define DCB_RX_CONFIG 1
2781 #define DCB_TX_CONFIG 1
2782 #define DCB_TX_PB 1024
2784 * ixgbe_dcb_hw_configure - Enable DCB and configure
2785 * general DCB in VT mode and non-VT mode parameters
2786 * @dev: pointer to rte_eth_dev structure
2787 * @dcb_config: pointer to ixgbe_dcb_config structure
2790 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
2791 struct ixgbe_dcb_config *dcb_config)
2794 uint8_t i,pfc_en,nb_tcs;
2796 uint8_t config_dcb_rx = 0;
2797 uint8_t config_dcb_tx = 0;
2798 uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2799 uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2800 uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2801 uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2802 uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2803 struct ixgbe_dcb_tc_config *tc;
2804 uint32_t max_frame = dev->data->max_frame_size;
2805 struct ixgbe_hw *hw =
2806 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2808 switch(dev->data->dev_conf.rxmode.mq_mode){
2810 dcb_config->vt_mode = true;
2811 if (hw->mac.type != ixgbe_mac_82598EB) {
2812 config_dcb_rx = DCB_RX_CONFIG;
2814 *get dcb and VT rx configuration parameters
2817 ixgbe_vmdq_dcb_rx_config(dev,dcb_config);
2818 /*Configure general VMDQ and DCB RX parameters*/
2819 ixgbe_vmdq_dcb_configure(dev);
2823 dcb_config->vt_mode = false;
2824 config_dcb_rx = DCB_RX_CONFIG;
2825 /* Get dcb TX configuration parameters from rte_eth_conf */
2826 ixgbe_dcb_rx_config(dev,dcb_config);
2827 /*Configure general DCB RX parameters*/
2828 ixgbe_dcb_rx_hw_config(hw, dcb_config);
2831 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration\n");
2834 switch (dev->data->dev_conf.txmode.mq_mode) {
2835 case ETH_VMDQ_DCB_TX:
2836 dcb_config->vt_mode = true;
2837 config_dcb_tx = DCB_TX_CONFIG;
2838 /* get DCB and VT TX configuration parameters from rte_eth_conf */
2839 ixgbe_dcb_vt_tx_config(dev,dcb_config);
2840 /*Configure general VMDQ and DCB TX parameters*/
2841 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
2845 dcb_config->vt_mode = false;
2846 config_dcb_tx = DCB_RX_CONFIG;
2847 /*get DCB TX configuration parameters from rte_eth_conf*/
2848 ixgbe_dcb_tx_config(dev,dcb_config);
2849 /*Configure general DCB TX parameters*/
2850 ixgbe_dcb_tx_hw_config(hw, dcb_config);
2853 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration\n");
2857 nb_tcs = dcb_config->num_tcs.pfc_tcs;
2859 ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
2860 if(nb_tcs == ETH_4_TCS) {
2861 /* Avoid un-configured priority mapping to TC0 */
2863 uint8_t mask = 0xFF;
2864 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
2865 mask = (uint8_t)(mask & (~ (1 << map[i])));
2866 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
2867 if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
2871 /* Re-configure 4 TCs BW */
2872 for (i = 0; i < nb_tcs; i++) {
2873 tc = &dcb_config->tc_config[i];
2874 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
2875 (uint8_t)(100 / nb_tcs);
2876 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
2877 (uint8_t)(100 / nb_tcs);
2879 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
2880 tc = &dcb_config->tc_config[i];
2881 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
2882 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
2887 /* Set RX buffer size */
2888 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2889 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
2890 for (i = 0 ; i < nb_tcs; i++) {
2891 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2893 /* zero alloc all unused TCs */
2894 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2895 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
2899 /* Only support an equally distributed Tx packet buffer strategy. */
2900 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
2901 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
2902 for (i = 0; i < nb_tcs; i++) {
2903 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
2904 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
2906 /* Clear unused TCs, if any, to zero buffer size*/
2907 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2908 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
2909 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
2913 /*Calculates traffic class credits*/
2914 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
2915 IXGBE_DCB_TX_CONFIG);
2916 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
2917 IXGBE_DCB_RX_CONFIG);
2920 /* Unpack CEE standard containers */
2921 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
2922 ixgbe_dcb_unpack_max_cee(dcb_config, max);
2923 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
2924 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
2925 /* Configure PG(ETS) RX */
2926 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
2930 /* Unpack CEE standard containers */
2931 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
2932 ixgbe_dcb_unpack_max_cee(dcb_config, max);
2933 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
2934 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
2935 /* Configure PG(ETS) TX */
2936 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
2939 /*Configure queue statistics registers*/
2940 ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
2942 /* Check if the PFC is supported */
2943 if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
2944 pbsize = (uint16_t) (NIC_RX_BUFFER_SIZE / nb_tcs);
2945 for (i = 0; i < nb_tcs; i++) {
2947 * If the TC count is 8,and the default high_water is 48,
2948 * the low_water is 16 as default.
2950 hw->fc.high_water[i] = (pbsize * 3 ) / 4;
2951 hw->fc.low_water[i] = pbsize / 4;
2952 /* Enable pfc for this TC */
2953 tc = &dcb_config->tc_config[i];
2954 tc->pfc = ixgbe_dcb_pfc_enabled;
2956 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
2957 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
2959 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
2966 * ixgbe_configure_dcb - Configure DCB Hardware
2967 * @dev: pointer to rte_eth_dev
2969 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
2971 struct ixgbe_dcb_config *dcb_cfg =
2972 IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
2974 PMD_INIT_FUNC_TRACE();
2975 /** Configure DCB hardware **/
2976 if(((dev->data->dev_conf.rxmode.mq_mode != ETH_RSS) &&
2977 (dev->data->nb_rx_queues == ETH_DCB_NUM_QUEUES))||
2978 ((dev->data->dev_conf.txmode.mq_mode != ETH_DCB_NONE) &&
2979 (dev->data->nb_tx_queues == ETH_DCB_NUM_QUEUES))) {
2980 ixgbe_dcb_hw_configure(dev,dcb_cfg);
2986 ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2988 struct igb_rx_entry *rxe = rxq->sw_ring;
2992 /* Initialize software ring entries */
2993 for (i = 0; i < rxq->nb_rx_desc; i++) {
2994 volatile union ixgbe_adv_rx_desc *rxd;
2995 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
2997 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u\n",
2998 (unsigned) rxq->queue_id);
3002 rte_mbuf_refcnt_set(mbuf, 1);
3003 mbuf->type = RTE_MBUF_PKT;
3004 mbuf->pkt.next = NULL;
3005 mbuf->pkt.data = (char *)mbuf->buf_addr + RTE_PKTMBUF_HEADROOM;
3006 mbuf->pkt.nb_segs = 1;
3007 mbuf->pkt.in_port = rxq->port_id;
3010 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
3011 rxd = &rxq->rx_ring[i];
3012 rxd->read.hdr_addr = dma_addr;
3013 rxd->read.pkt_addr = dma_addr;
3021 * Initializes Receive Unit.
3024 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
3026 struct ixgbe_hw *hw;
3027 struct igb_rx_queue *rxq;
3028 struct rte_pktmbuf_pool_private *mbp_priv;
3041 PMD_INIT_FUNC_TRACE();
3042 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3045 * Make sure receives are disabled while setting
3046 * up the RX context (registers, descriptor rings, etc.).
3048 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
3049 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
3051 /* Enable receipt of broadcasted frames */
3052 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
3053 fctrl |= IXGBE_FCTRL_BAM;
3054 fctrl |= IXGBE_FCTRL_DPF;
3055 fctrl |= IXGBE_FCTRL_PMCF;
3056 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
3059 * Configure CRC stripping, if any.
3061 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
3062 if (dev->data->dev_conf.rxmode.hw_strip_crc)
3063 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
3065 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
3068 * Configure jumbo frame support, if any.
3070 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
3071 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
3072 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
3073 maxfrs &= 0x0000FFFF;
3074 maxfrs |= (dev->data->dev_conf.rxmode.max_rx_pkt_len << 16);
3075 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
3077 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
3079 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
3081 /* Setup RX queues */
3082 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3083 rxq = dev->data->rx_queues[i];
3085 /* Allocate buffers for descriptor rings */
3086 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
3091 * Reset crc_len in case it was changed after queue setup by a
3092 * call to configure.
3094 rxq->crc_len = (uint8_t)
3095 ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
3098 /* Setup the Base and Length of the Rx Descriptor Rings */
3099 bus_addr = rxq->rx_ring_phys_addr;
3100 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
3101 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3102 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i),
3103 (uint32_t)(bus_addr >> 32));
3104 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
3105 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
3106 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
3107 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
3109 /* Configure the SRRCTL register */
3110 #ifdef RTE_HEADER_SPLIT_ENABLE
3112 * Configure Header Split
3114 if (dev->data->dev_conf.rxmode.header_split) {
3115 if (hw->mac.type == ixgbe_mac_82599EB) {
3116 /* Must setup the PSRTYPE register */
3118 psrtype = IXGBE_PSRTYPE_TCPHDR |
3119 IXGBE_PSRTYPE_UDPHDR |
3120 IXGBE_PSRTYPE_IPV4HDR |
3121 IXGBE_PSRTYPE_IPV6HDR;
3122 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(i), psrtype);
3124 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
3125 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
3126 IXGBE_SRRCTL_BSIZEHDR_MASK);
3127 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3130 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
3132 /* Set if packets are dropped when no descriptors available */
3134 srrctl |= IXGBE_SRRCTL_DROP_EN;
3137 * Configure the RX buffer size in the BSIZEPACKET field of
3138 * the SRRCTL register of the queue.
3139 * The value is in 1 KB resolution. Valid values can be from
3142 mbp_priv = (struct rte_pktmbuf_pool_private *)
3143 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
3144 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
3145 RTE_PKTMBUF_HEADROOM);
3146 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
3147 IXGBE_SRRCTL_BSIZEPKT_MASK);
3148 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3150 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
3151 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
3152 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
3153 IXGBE_RX_BUF_THRESHOLD > buf_size){
3154 dev->data->scattered_rx = 1;
3155 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
3160 * Configure RSS if device configured with multiple RX queues.
3162 if (hw->mac.type == ixgbe_mac_82599EB) {
3163 if (dev->data->nb_rx_queues > 1)
3164 switch (dev->data->dev_conf.rxmode.mq_mode) {
3166 ixgbe_rss_configure(dev);
3170 ixgbe_vmdq_dcb_configure(dev);
3173 default: ixgbe_rss_disable(dev);
3176 ixgbe_rss_disable(dev);
3180 * Setup the Checksum Register.
3181 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
3182 * Enable IP/L4 checkum computation by hardware if requested to do so.
3184 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
3185 rxcsum |= IXGBE_RXCSUM_PCSD;
3186 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
3187 rxcsum |= IXGBE_RXCSUM_IPPCSE;
3189 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
3191 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
3193 if (hw->mac.type == ixgbe_mac_82599EB) {
3194 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3195 if (dev->data->dev_conf.rxmode.hw_strip_crc)
3196 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3198 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
3199 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3200 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3207 * Initializes Transmit Unit.
3210 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
3212 struct ixgbe_hw *hw;
3213 struct igb_tx_queue *txq;
3220 PMD_INIT_FUNC_TRACE();
3221 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3223 /* Enable TX CRC (checksum offload requirement) */
3224 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
3225 hlreg0 |= IXGBE_HLREG0_TXCRCEN;
3226 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
3228 /* Setup the Base and Length of the Tx Descriptor Rings */
3229 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3230 txq = dev->data->tx_queues[i];
3232 bus_addr = txq->tx_ring_phys_addr;
3233 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3234 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3235 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i),
3236 (uint32_t)(bus_addr >> 32));
3237 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3238 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3239 /* Setup the HW Tx Head and TX Tail descriptor pointers */
3240 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3241 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3244 * Disable Tx Head Writeback RO bit, since this hoses
3245 * bookkeeping if things aren't delivered in order.
3247 switch (hw->mac.type) {
3248 case ixgbe_mac_82598EB:
3249 txctrl = IXGBE_READ_REG(hw,
3250 IXGBE_DCA_TXCTRL(i));
3251 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3252 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i),
3256 case ixgbe_mac_82599EB:
3257 case ixgbe_mac_X540:
3259 txctrl = IXGBE_READ_REG(hw,
3260 IXGBE_DCA_TXCTRL_82599(i));
3261 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3262 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i),
3268 if (hw->mac.type != ixgbe_mac_82598EB) {
3269 /* disable arbiter before setting MTQC */
3270 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3271 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3272 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3274 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3276 /* re-enable arbiter */
3277 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3278 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3283 * Start Transmit and Receive Units.
3286 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
3288 struct ixgbe_hw *hw;
3289 struct igb_tx_queue *txq;
3290 struct igb_rx_queue *rxq;
3298 PMD_INIT_FUNC_TRACE();
3299 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3301 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3302 txq = dev->data->tx_queues[i];
3303 /* Setup Transmit Threshold Registers */
3304 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3305 txdctl |= txq->pthresh & 0x7F;
3306 txdctl |= ((txq->hthresh & 0x7F) << 8);
3307 txdctl |= ((txq->wthresh & 0x7F) << 16);
3308 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
3311 if (hw->mac.type != ixgbe_mac_82598EB) {
3312 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3313 dmatxctl |= IXGBE_DMATXCTL_TE;
3314 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3317 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3318 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3319 txdctl |= IXGBE_TXDCTL_ENABLE;
3320 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
3322 /* Wait until TX Enable ready */
3323 if (hw->mac.type == ixgbe_mac_82599EB) {
3327 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3328 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
3330 PMD_INIT_LOG(ERR, "Could not enable "
3331 "Tx Queue %d\n", i);
3334 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3335 rxq = dev->data->rx_queues[i];
3336 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3337 rxdctl |= IXGBE_RXDCTL_ENABLE;
3338 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
3340 /* Wait until RX Enable ready */
3344 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3345 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
3347 PMD_INIT_LOG(ERR, "Could not enable "
3348 "Rx Queue %d\n", i);
3350 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), rxq->nb_rx_desc - 1);
3353 /* Enable Receive engine */
3354 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
3355 if (hw->mac.type == ixgbe_mac_82598EB)
3356 rxctrl |= IXGBE_RXCTRL_DMBYPS;
3357 rxctrl |= IXGBE_RXCTRL_RXEN;
3358 hw->mac.ops.enable_rx_dma(hw, rxctrl);
3363 * [VF] Initializes Receive Unit.
3366 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
3368 struct ixgbe_hw *hw;
3369 struct igb_rx_queue *rxq;
3370 struct rte_pktmbuf_pool_private *mbp_priv;
3377 PMD_INIT_FUNC_TRACE();
3378 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3380 /* Setup RX queues */
3381 dev->rx_pkt_burst = ixgbe_recv_pkts;
3382 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3383 rxq = dev->data->rx_queues[i];
3385 /* Allocate buffers for descriptor rings */
3386 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
3390 /* Setup the Base and Length of the Rx Descriptor Rings */
3391 bus_addr = rxq->rx_ring_phys_addr;
3393 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
3394 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3395 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
3396 (uint32_t)(bus_addr >> 32));
3397 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
3398 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
3399 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
3400 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
3403 /* Configure the SRRCTL register */
3404 #ifdef RTE_HEADER_SPLIT_ENABLE
3406 * Configure Header Split
3408 if (dev->data->dev_conf.rxmode.header_split) {
3410 /* Must setup the PSRTYPE register */
3412 psrtype = IXGBE_PSRTYPE_TCPHDR |
3413 IXGBE_PSRTYPE_UDPHDR |
3414 IXGBE_PSRTYPE_IPV4HDR |
3415 IXGBE_PSRTYPE_IPV6HDR;
3417 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
3419 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
3420 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
3421 IXGBE_SRRCTL_BSIZEHDR_MASK);
3422 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3425 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
3427 /* Set if packets are dropped when no descriptors available */
3429 srrctl |= IXGBE_SRRCTL_DROP_EN;
3432 * Configure the RX buffer size in the BSIZEPACKET field of
3433 * the SRRCTL register of the queue.
3434 * The value is in 1 KB resolution. Valid values can be from
3437 mbp_priv = (struct rte_pktmbuf_pool_private *)
3438 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
3439 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
3440 RTE_PKTMBUF_HEADROOM);
3441 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
3442 IXGBE_SRRCTL_BSIZEPKT_MASK);
3445 * VF modification to write virtual function SRRCTL register
3447 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
3449 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
3450 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
3451 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
3452 dev->data->scattered_rx = 1;
3453 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
3461 * [VF] Initializes Transmit Unit.
3464 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
3466 struct ixgbe_hw *hw;
3467 struct igb_tx_queue *txq;
3472 PMD_INIT_FUNC_TRACE();
3473 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3475 /* Setup the Base and Length of the Tx Descriptor Rings */
3476 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3477 txq = dev->data->tx_queues[i];
3478 bus_addr = txq->tx_ring_phys_addr;
3479 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
3480 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3481 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
3482 (uint32_t)(bus_addr >> 32));
3483 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
3484 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3485 /* Setup the HW Tx Head and TX Tail descriptor pointers */
3486 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
3487 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
3490 * Disable Tx Head Writeback RO bit, since this hoses
3491 * bookkeeping if things aren't delivered in order.
3493 txctrl = IXGBE_READ_REG(hw,
3494 IXGBE_VFDCA_TXCTRL(i));
3495 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3496 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
3502 * [VF] Start Transmit and Receive Units.
3505 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
3507 struct ixgbe_hw *hw;
3508 struct igb_tx_queue *txq;
3509 struct igb_rx_queue *rxq;
3515 PMD_INIT_FUNC_TRACE();
3516 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3518 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3519 txq = dev->data->tx_queues[i];
3520 /* Setup Transmit Threshold Registers */
3521 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3522 txdctl |= txq->pthresh & 0x7F;
3523 txdctl |= ((txq->hthresh & 0x7F) << 8);
3524 txdctl |= ((txq->wthresh & 0x7F) << 16);
3525 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
3528 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3530 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3531 txdctl |= IXGBE_TXDCTL_ENABLE;
3532 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
3535 /* Wait until TX Enable ready */
3538 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3539 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
3541 PMD_INIT_LOG(ERR, "Could not enable "
3542 "Tx Queue %d\n", i);
3544 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3546 rxq = dev->data->rx_queues[i];
3548 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
3549 rxdctl |= IXGBE_RXDCTL_ENABLE;
3550 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
3552 /* Wait until RX Enable ready */
3556 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
3557 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
3559 PMD_INIT_LOG(ERR, "Could not enable "
3560 "Rx Queue %d\n", i);
3562 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);