4 * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
47 #include <rte_byteorder.h>
48 #include <rte_common.h>
49 #include <rte_cycles.h>
51 #include <rte_debug.h>
52 #include <rte_interrupts.h>
54 #include <rte_memory.h>
55 #include <rte_memzone.h>
56 #include <rte_launch.h>
57 #include <rte_tailq.h>
59 #include <rte_per_lcore.h>
60 #include <rte_lcore.h>
61 #include <rte_atomic.h>
62 #include <rte_branch_prediction.h>
64 #include <rte_mempool.h>
65 #include <rte_malloc.h>
67 #include <rte_ether.h>
68 #include <rte_ethdev.h>
69 #include <rte_prefetch.h>
73 #include <rte_string_fns.h>
74 #include <rte_errno.h>
76 #include "ixgbe_logs.h"
77 #include "ixgbe/ixgbe_api.h"
78 #include "ixgbe/ixgbe_vf.h"
79 #include "ixgbe_ethdev.h"
80 #include "ixgbe/ixgbe_dcb.h"
83 #define RTE_PMD_IXGBE_TX_MAX_BURST 32
85 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
86 #define RTE_PMD_IXGBE_RX_MAX_BURST 32
89 static inline struct rte_mbuf *
90 rte_rxmbuf_alloc(struct rte_mempool *mp)
94 m = __rte_mbuf_raw_alloc(mp);
95 __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
99 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
100 (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->pkt.data) - \
101 (char *)(mb)->buf_addr))
103 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
104 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
107 * Structure associated with each descriptor of the RX ring of a RX queue.
109 struct igb_rx_entry {
110 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
114 * Structure associated with each descriptor of the TX ring of a TX queue.
116 struct igb_tx_entry {
117 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
118 uint16_t next_id; /**< Index of next descriptor in ring. */
119 uint16_t last_id; /**< Index of last scattered descriptor. */
123 * Structure associated with each RX queue.
125 struct igb_rx_queue {
126 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
127 volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
128 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
129 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
130 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
131 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
132 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
133 uint16_t nb_rx_desc; /**< number of RX descriptors. */
134 uint16_t rx_tail; /**< current value of RDT register. */
135 uint16_t nb_rx_hold; /**< number of held free RX desc. */
136 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
137 uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
138 uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
139 uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
141 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
142 uint16_t queue_id; /**< RX queue index. */
143 uint8_t port_id; /**< Device port identifier. */
144 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
145 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
146 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
147 /** need to alloc dummy mbuf, for wraparound when scanning hw ring */
148 struct rte_mbuf fake_mbuf;
149 /** hold packets to return to application */
150 struct rte_mbuf *rx_stage[RTE_PMD_IXGBE_RX_MAX_BURST*2];
155 * IXGBE CTX Constants
157 enum ixgbe_advctx_num {
158 IXGBE_CTX_0 = 0, /**< CTX0 */
159 IXGBE_CTX_1 = 1, /**< CTX1 */
160 IXGBE_CTX_NUM = 2, /**< CTX NUMBER */
164 * Structure to check if new context need be built
167 struct ixgbe_advctx_info {
168 uint16_t flags; /**< ol_flags for context build. */
169 uint32_t cmp_mask; /**< compare mask for vlan_macip_lens */
170 union rte_vlan_macip vlan_macip_lens; /**< vlan, mac ip length. */
174 * Structure associated with each TX queue.
176 struct igb_tx_queue {
177 /** TX ring virtual address. */
178 volatile union ixgbe_adv_tx_desc *tx_ring;
179 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
180 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
181 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
182 uint16_t nb_tx_desc; /**< number of TX descriptors. */
183 uint16_t tx_tail; /**< current value of TDT reg. */
184 uint16_t tx_free_thresh;/**< minimum TX before freeing. */
185 /** Number of TX descriptors to use before RS bit is set. */
186 uint16_t tx_rs_thresh;
187 /** Number of TX descriptors used since RS bit was set. */
189 /** Index to last TX descriptor to have been cleaned. */
190 uint16_t last_desc_cleaned;
191 /** Total number of TX descriptors ready to be allocated. */
193 uint16_t tx_next_dd; /**< next desc to scan for DD bit */
194 uint16_t tx_next_rs; /**< next desc to set RS bit */
195 uint16_t queue_id; /**< TX queue index. */
196 uint8_t port_id; /**< Device port identifier. */
197 uint8_t pthresh; /**< Prefetch threshold register. */
198 uint8_t hthresh; /**< Host threshold register. */
199 uint8_t wthresh; /**< Write-back threshold reg. */
200 uint32_t txq_flags; /**< Holds flags for this TXq */
201 uint32_t ctx_curr; /**< Hardware context states. */
202 /** Hardware context0 history. */
203 struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];
208 #define RTE_PMD_USE_PREFETCH
211 #ifdef RTE_PMD_USE_PREFETCH
213 * Prefetch a cache line into all cache levels.
215 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
217 #define rte_ixgbe_prefetch(p) do {} while(0)
220 #ifdef RTE_PMD_PACKET_PREFETCH
221 #define rte_packet_prefetch(p) rte_prefetch1(p)
223 #define rte_packet_prefetch(p) do {} while(0)
226 /*********************************************************************
230 **********************************************************************/
233 * The "simple" TX queue functions require that the following
234 * flags are set when the TX queue is configured:
235 * - ETH_TXQ_FLAGS_NOMULTSEGS
236 * - ETH_TXQ_FLAGS_NOVLANOFFL
237 * - ETH_TXQ_FLAGS_NOXSUMSCTP
238 * - ETH_TXQ_FLAGS_NOXSUMUDP
239 * - ETH_TXQ_FLAGS_NOXSUMTCP
240 * and that the RS bit threshold (tx_rs_thresh) is at least equal to
241 * RTE_PMD_IXGBE_TX_MAX_BURST.
243 #define IXGBE_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
244 ETH_TXQ_FLAGS_NOOFFLOADS)
247 * Check for descriptors with their DD bit set and free mbufs.
248 * Return the total number of buffers freed.
251 ixgbe_tx_free_bufs(struct igb_tx_queue *txq)
253 struct igb_tx_entry *txep;
257 /* check DD bit on threshold descriptor */
258 status = txq->tx_ring[txq->tx_next_dd].wb.status;
259 if (! (status & IXGBE_ADVTXD_STAT_DD))
263 * first buffer to free from S/W ring is at index
264 * tx_next_dd - (tx_rs_thresh-1)
266 txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
268 /* prefetch the mbufs that are about to be freed */
269 for (i = 0; i < txq->tx_rs_thresh; ++i)
270 rte_prefetch0((txep + i)->mbuf);
272 /* free buffers one at a time */
273 if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
274 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
275 rte_mempool_put(txep->mbuf->pool, txep->mbuf);
279 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
280 rte_pktmbuf_free_seg(txep->mbuf);
285 /* buffers were freed, update counters */
286 txq->nb_tx_free += txq->tx_rs_thresh;
287 txq->tx_next_dd += txq->tx_rs_thresh;
288 if (txq->tx_next_dd >= txq->nb_tx_desc)
289 txq->tx_next_dd = txq->tx_rs_thresh - 1;
291 return txq->tx_rs_thresh;
295 * Populate descriptors with the following info:
296 * 1.) buffer_addr = phys_addr + headroom
297 * 2.) cmd_type_len = DCMD_DTYP_FLAGS | pkt_len
298 * 3.) olinfo_status = pkt_len << PAYLEN_SHIFT
301 /* Defines for Tx descriptor */
302 #define DCMD_DTYP_FLAGS (IXGBE_ADVTXD_DTYP_DATA |\
303 IXGBE_ADVTXD_DCMD_IFCS |\
304 IXGBE_ADVTXD_DCMD_DEXT |\
305 IXGBE_ADVTXD_DCMD_EOP)
307 /* Populate 4 descriptors with data from 4 mbufs */
309 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
311 uint64_t buf_dma_addr;
315 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
316 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
317 pkt_len = (*pkts)->pkt.data_len;
319 /* write data to descriptor */
320 txdp->read.buffer_addr = buf_dma_addr;
321 txdp->read.cmd_type_len =
322 ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
323 txdp->read.olinfo_status =
324 (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
328 /* Populate 1 descriptor with data from 1 mbuf */
330 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
332 uint64_t buf_dma_addr;
335 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(*pkts);
336 pkt_len = (*pkts)->pkt.data_len;
338 /* write data to descriptor */
339 txdp->read.buffer_addr = buf_dma_addr;
340 txdp->read.cmd_type_len =
341 ((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
342 txdp->read.olinfo_status =
343 (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
347 * Fill H/W descriptor ring with mbuf data.
348 * Copy mbuf pointers to the S/W ring.
351 ixgbe_tx_fill_hw_ring(struct igb_tx_queue *txq, struct rte_mbuf **pkts,
354 volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
355 struct igb_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
356 const int N_PER_LOOP = 4;
357 const int N_PER_LOOP_MASK = N_PER_LOOP-1;
358 int mainpart, leftover;
362 * Process most of the packets in chunks of N pkts. Any
363 * leftover packets will get processed one at a time.
365 mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
366 leftover = (nb_pkts & ((uint32_t) N_PER_LOOP_MASK));
367 for (i = 0; i < mainpart; i += N_PER_LOOP) {
368 /* Copy N mbuf pointers to the S/W ring */
369 for (j = 0; j < N_PER_LOOP; ++j) {
370 (txep + i + j)->mbuf = *(pkts + i + j);
372 tx4(txdp + i, pkts + i);
375 if (unlikely(leftover > 0)) {
376 for (i = 0; i < leftover; ++i) {
377 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
378 tx1(txdp + mainpart + i, pkts + mainpart + i);
383 static inline uint16_t
384 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
387 struct igb_tx_queue *txq = (struct igb_tx_queue *)tx_queue;
388 volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
392 * Begin scanning the H/W ring for done descriptors when the
393 * number of available descriptors drops below tx_free_thresh. For
394 * each done descriptor, free the associated buffer.
396 if (txq->nb_tx_free < txq->tx_free_thresh)
397 ixgbe_tx_free_bufs(txq);
399 /* Only use descriptors that are available */
400 nb_pkts = RTE_MIN(txq->nb_tx_free, nb_pkts);
401 if (unlikely(nb_pkts == 0))
404 /* Use exactly nb_pkts descriptors */
405 txq->nb_tx_free -= nb_pkts;
408 * At this point, we know there are enough descriptors in the
409 * ring to transmit all the packets. This assumes that each
410 * mbuf contains a single segment, and that no new offloads
411 * are expected, which would require a new context descriptor.
415 * See if we're going to wrap-around. If so, handle the top
416 * of the descriptor ring first, then do the bottom. If not,
417 * the processing looks just like the "bottom" part anyway...
419 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
420 n = txq->nb_tx_desc - txq->tx_tail;
421 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
424 * We know that the last descriptor in the ring will need to
425 * have its RS bit set because tx_rs_thresh has to be
426 * a divisor of the ring size
428 tx_r[txq->tx_next_rs].read.cmd_type_len |=
429 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
430 txq->tx_next_rs = txq->tx_rs_thresh - 1;
435 /* Fill H/W descriptor ring with mbuf data */
436 ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, nb_pkts - n);
437 txq->tx_tail += (nb_pkts - n);
440 * Determine if RS bit should be set
441 * This is what we actually want:
442 * if ((txq->tx_tail - 1) >= txq->tx_next_rs)
443 * but instead of subtracting 1 and doing >=, we can just do
444 * greater than without subtracting.
446 if (txq->tx_tail > txq->tx_next_rs) {
447 tx_r[txq->tx_next_rs].read.cmd_type_len |=
448 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
449 txq->tx_next_rs += txq->tx_rs_thresh;
450 if (txq->tx_next_rs >= txq->nb_tx_desc)
451 txq->tx_next_rs = txq->tx_rs_thresh - 1;
455 * Check for wrap-around. This would only happen if we used
456 * up to the last descriptor in the ring, no more, no less.
458 if (txq->tx_tail >= txq->nb_tx_desc)
461 /* update tail pointer */
463 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
469 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
474 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
475 if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
476 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
478 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
482 n = RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
483 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
494 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
495 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
496 uint16_t ol_flags, uint32_t vlan_macip_lens)
498 uint32_t type_tucmd_mlhl;
499 uint32_t mss_l4len_idx;
503 ctx_idx = txq->ctx_curr;
507 if (ol_flags & PKT_TX_VLAN_PKT) {
508 cmp_mask |= TX_VLAN_CMP_MASK;
511 if (ol_flags & PKT_TX_IP_CKSUM) {
512 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
513 cmp_mask |= TX_MAC_LEN_CMP_MASK;
516 /* Specify which HW CTX to upload. */
517 mss_l4len_idx = (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
518 switch (ol_flags & PKT_TX_L4_MASK) {
519 case PKT_TX_UDP_CKSUM:
520 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
521 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
522 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
523 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
525 case PKT_TX_TCP_CKSUM:
526 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
527 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
528 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
529 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
531 case PKT_TX_SCTP_CKSUM:
532 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
533 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
534 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
535 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
538 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
539 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
543 txq->ctx_cache[ctx_idx].flags = ol_flags;
544 txq->ctx_cache[ctx_idx].cmp_mask = cmp_mask;
545 txq->ctx_cache[ctx_idx].vlan_macip_lens.data =
546 vlan_macip_lens & cmp_mask;
548 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
549 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
550 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
551 ctx_txd->seqnum_seed = 0;
555 * Check which hardware context can be used. Use the existing match
556 * or create a new context descriptor.
558 static inline uint32_t
559 what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
560 uint32_t vlan_macip_lens)
562 /* If match with the current used context */
563 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
564 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
565 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
566 return txq->ctx_curr;
569 /* What if match with the next context */
571 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
572 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
573 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
574 return txq->ctx_curr;
577 /* Mismatch, use the previous context */
578 return (IXGBE_CTX_NUM);
581 static inline uint32_t
582 tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
584 static const uint32_t l4_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_TXSM};
585 static const uint32_t l3_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_IXSM};
588 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
589 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
593 static inline uint32_t
594 tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
596 static const uint32_t vlan_cmd[2] = {0, IXGBE_ADVTXD_DCMD_VLE};
597 return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
600 /* Default RS bit threshold values */
601 #ifndef DEFAULT_TX_RS_THRESH
602 #define DEFAULT_TX_RS_THRESH 32
604 #ifndef DEFAULT_TX_FREE_THRESH
605 #define DEFAULT_TX_FREE_THRESH 32
608 /* Reset transmit descriptors after they have been used */
610 ixgbe_xmit_cleanup(struct igb_tx_queue *txq)
612 struct igb_tx_entry *sw_ring = txq->sw_ring;
613 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
614 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
615 uint16_t nb_tx_desc = txq->nb_tx_desc;
616 uint16_t desc_to_clean_to;
617 uint16_t nb_tx_to_clean;
619 /* Determine the last descriptor needing to be cleaned */
620 desc_to_clean_to = last_desc_cleaned + txq->tx_rs_thresh;
621 if (desc_to_clean_to >= nb_tx_desc)
622 desc_to_clean_to = desc_to_clean_to - nb_tx_desc;
624 /* Check to make sure the last descriptor to clean is done */
625 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
626 if (! (txr[desc_to_clean_to].wb.status & IXGBE_TXD_STAT_DD))
628 PMD_TX_FREE_LOG(DEBUG,
629 "TX descriptor %4u is not done"
630 "(port=%d queue=%d)",
632 txq->port_id, txq->queue_id);
633 /* Failed to clean any descriptors, better luck next time */
637 /* Figure out how many descriptors will be cleaned */
638 if (last_desc_cleaned > desc_to_clean_to)
639 nb_tx_to_clean = ((nb_tx_desc - last_desc_cleaned) +
642 nb_tx_to_clean = desc_to_clean_to - last_desc_cleaned;
644 PMD_TX_FREE_LOG(DEBUG,
645 "Cleaning %4u TX descriptors: %4u to %4u "
646 "(port=%d queue=%d)",
647 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
648 txq->port_id, txq->queue_id);
651 * The last descriptor to clean is done, so that means all the
652 * descriptors from the last descriptor that was cleaned
653 * up to the last descriptor with the RS bit set
654 * are done. Only reset the threshold descriptor.
656 txr[desc_to_clean_to].wb.status = 0;
658 /* Update the txq to reflect the last descriptor that was cleaned */
659 txq->last_desc_cleaned = desc_to_clean_to;
660 txq->nb_tx_free += nb_tx_to_clean;
667 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
670 struct igb_tx_queue *txq;
671 struct igb_tx_entry *sw_ring;
672 struct igb_tx_entry *txe, *txn;
673 volatile union ixgbe_adv_tx_desc *txr;
674 volatile union ixgbe_adv_tx_desc *txd;
675 struct rte_mbuf *tx_pkt;
676 struct rte_mbuf *m_seg;
677 uint64_t buf_dma_addr;
678 uint32_t olinfo_status;
679 uint32_t cmd_type_len;
688 uint32_t vlan_macip_lens;
693 sw_ring = txq->sw_ring;
695 tx_id = txq->tx_tail;
696 txe = &sw_ring[tx_id];
698 /* Determine if the descriptor ring needs to be cleaned. */
699 if ((txq->nb_tx_desc - txq->nb_tx_free) > txq->tx_free_thresh) {
700 ixgbe_xmit_cleanup(txq);
704 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
707 pkt_len = tx_pkt->pkt.pkt_len;
709 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
712 * Determine how many (if any) context descriptors
713 * are needed for offload functionality.
715 ol_flags = tx_pkt->ol_flags;
716 vlan_macip_lens = tx_pkt->pkt.vlan_macip.data;
718 /* If hardware offload required */
719 tx_ol_req = ol_flags & PKT_TX_OFFLOAD_MASK;
721 /* If new context need be built or reuse the exist ctx. */
722 ctx = what_advctx_update(txq, tx_ol_req,
724 /* Only allocate context descriptor if required*/
725 new_ctx = (ctx == IXGBE_CTX_NUM);
730 * Keep track of how many descriptors are used this loop
731 * This will always be the number of segments + the number of
732 * Context descriptors required to transmit the packet
734 nb_used = tx_pkt->pkt.nb_segs + new_ctx;
737 * The number of descriptors that must be allocated for a
738 * packet is the number of segments of that packet, plus 1
739 * Context Descriptor for the hardware offload, if any.
740 * Determine the last TX descriptor to allocate in the TX ring
741 * for the packet, starting from the current position (tx_id)
744 tx_last = (uint16_t) (tx_id + nb_used - 1);
747 if (tx_last >= txq->nb_tx_desc)
748 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
750 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
751 " tx_first=%u tx_last=%u\n",
752 (unsigned) txq->port_id,
753 (unsigned) txq->queue_id,
759 * Make sure there are enough TX descriptors available to
760 * transmit the entire packet.
761 * nb_used better be less than or equal to txq->tx_rs_thresh
763 if (nb_used > txq->nb_tx_free) {
764 PMD_TX_FREE_LOG(DEBUG,
765 "Not enough free TX descriptors "
766 "nb_used=%4u nb_free=%4u "
767 "(port=%d queue=%d)",
768 nb_used, txq->nb_tx_free,
769 txq->port_id, txq->queue_id);
771 if (ixgbe_xmit_cleanup(txq) != 0) {
772 /* Could not clean any descriptors */
778 /* nb_used better be <= txq->tx_rs_thresh */
779 if (unlikely(nb_used > txq->tx_rs_thresh)) {
780 PMD_TX_FREE_LOG(DEBUG,
781 "The number of descriptors needed to "
782 "transmit the packet exceeds the "
783 "RS bit threshold. This will impact "
785 "nb_used=%4u nb_free=%4u "
787 "(port=%d queue=%d)",
788 nb_used, txq->nb_tx_free,
790 txq->port_id, txq->queue_id);
792 * Loop here until there are enough TX
793 * descriptors or until the ring cannot be
796 while (nb_used > txq->nb_tx_free) {
797 if (ixgbe_xmit_cleanup(txq) != 0) {
799 * Could not clean any
811 * By now there are enough free TX descriptors to transmit
816 * Set common flags of all TX Data Descriptors.
818 * The following bits must be set in all Data Descriptors:
819 * - IXGBE_ADVTXD_DTYP_DATA
820 * - IXGBE_ADVTXD_DCMD_DEXT
822 * The following bits must be set in the first Data Descriptor
823 * and are ignored in the other ones:
824 * - IXGBE_ADVTXD_DCMD_IFCS
825 * - IXGBE_ADVTXD_MAC_1588
826 * - IXGBE_ADVTXD_DCMD_VLE
828 * The following bits must only be set in the last Data
830 * - IXGBE_TXD_CMD_EOP
832 * The following bits can be set in any Data Descriptor, but
833 * are only set in the last Data Descriptor:
836 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
837 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
838 olinfo_status = (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
839 #ifdef RTE_LIBRTE_IEEE1588
840 if (ol_flags & PKT_TX_IEEE1588_TMST)
841 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
846 * Setup the TX Advanced Context Descriptor if required
849 volatile struct ixgbe_adv_tx_context_desc *
852 ctx_txd = (volatile struct
853 ixgbe_adv_tx_context_desc *)
856 txn = &sw_ring[txe->next_id];
857 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
859 if (txe->mbuf != NULL) {
860 rte_pktmbuf_free_seg(txe->mbuf);
864 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
867 txe->last_id = tx_last;
868 tx_id = txe->next_id;
873 * Setup the TX Advanced Data Descriptor,
874 * This path will go through
875 * whatever new/reuse the context descriptor
877 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
878 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
879 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
885 txn = &sw_ring[txe->next_id];
887 if (txe->mbuf != NULL)
888 rte_pktmbuf_free_seg(txe->mbuf);
892 * Set up Transmit Data Descriptor.
894 slen = m_seg->pkt.data_len;
895 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
896 txd->read.buffer_addr =
897 rte_cpu_to_le_64(buf_dma_addr);
898 txd->read.cmd_type_len =
899 rte_cpu_to_le_32(cmd_type_len | slen);
900 txd->read.olinfo_status =
901 rte_cpu_to_le_32(olinfo_status);
902 txe->last_id = tx_last;
903 tx_id = txe->next_id;
905 m_seg = m_seg->pkt.next;
906 } while (m_seg != NULL);
909 * The last packet data descriptor needs End Of Packet (EOP)
911 cmd_type_len |= IXGBE_TXD_CMD_EOP;
912 txq->nb_tx_used += nb_used;
913 txq->nb_tx_free -= nb_used;
915 /* Set RS bit only on threshold packets' last descriptor */
916 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
917 PMD_TX_FREE_LOG(DEBUG,
918 "Setting RS bit on TXD id="
919 "%4u (port=%d queue=%d)",
920 tx_last, txq->port_id, txq->queue_id);
922 cmd_type_len |= IXGBE_TXD_CMD_RS;
924 /* Update txq RS bit counters */
927 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
933 * Set the Transmit Descriptor Tail (TDT)
935 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
936 (unsigned) txq->port_id, (unsigned) txq->queue_id,
937 (unsigned) tx_id, (unsigned) nb_tx);
938 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
939 txq->tx_tail = tx_id;
944 /*********************************************************************
948 **********************************************************************/
949 static inline uint16_t
950 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
954 static uint16_t ip_pkt_types_map[16] = {
955 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
956 PKT_RX_IPV6_HDR, 0, 0, 0,
957 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
958 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
961 static uint16_t ip_rss_types_map[16] = {
962 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
963 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
964 PKT_RX_RSS_HASH, 0, 0, 0,
965 0, 0, 0, PKT_RX_FDIR,
968 #ifdef RTE_LIBRTE_IEEE1588
969 static uint32_t ip_pkt_etqf_map[8] = {
970 0, 0, 0, PKT_RX_IEEE1588_PTP,
974 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ?
975 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
976 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
978 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ? 0 :
979 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
982 return (pkt_flags | ip_rss_types_map[hl_tp_rs & 0xF]);
985 static inline uint16_t
986 rx_desc_status_to_pkt_flags(uint32_t rx_status)
991 * Check if VLAN present only.
992 * Do not check whether L3/L4 rx checksum done by NIC or not,
993 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
995 pkt_flags = (uint16_t) (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
997 #ifdef RTE_LIBRTE_IEEE1588
998 if (rx_status & IXGBE_RXD_STAT_TMST)
999 pkt_flags = (pkt_flags | PKT_RX_IEEE1588_TMST);
1004 static inline uint16_t
1005 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1008 * Bit 31: IPE, IPv4 checksum error
1009 * Bit 30: L4I, L4I integrity error
1011 static uint16_t error_to_pkt_flags_map[4] = {
1012 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1013 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1015 return error_to_pkt_flags_map[(rx_status >>
1016 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1019 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
1021 * LOOK_AHEAD defines how many desc statuses to check beyond the
1022 * current descriptor.
1023 * It must be a pound define for optimal performance.
1024 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1025 * function only works with LOOK_AHEAD=8.
1027 #define LOOK_AHEAD 8
1028 #if (LOOK_AHEAD != 8)
1029 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1032 ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq)
1034 volatile union ixgbe_adv_rx_desc *rxdp;
1035 struct igb_rx_entry *rxep;
1036 struct rte_mbuf *mb;
1038 int s[LOOK_AHEAD], nb_dd;
1039 int i, j, nb_rx = 0;
1042 /* get references to current descriptor and S/W ring entry */
1043 rxdp = &rxq->rx_ring[rxq->rx_tail];
1044 rxep = &rxq->sw_ring[rxq->rx_tail];
1046 /* check to make sure there is at least 1 packet to receive */
1047 if (! (rxdp->wb.upper.status_error & IXGBE_RXDADV_STAT_DD))
1051 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1052 * reference packets that are ready to be received.
1054 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1055 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
1057 /* Read desc statuses backwards to avoid race condition */
1058 for (j = LOOK_AHEAD-1; j >= 0; --j)
1059 s[j] = rxdp[j].wb.upper.status_error;
1061 /* Clear everything but the status bits (LSB) */
1062 for (j = 0; j < LOOK_AHEAD; ++j)
1063 s[j] &= IXGBE_RXDADV_STAT_DD;
1065 /* Compute how many status bits were set */
1066 nb_dd = s[0]+s[1]+s[2]+s[3]+s[4]+s[5]+s[6]+s[7];
1069 /* Translate descriptor info to mbuf format */
1070 for (j = 0; j < nb_dd; ++j) {
1072 pkt_len = rxdp[j].wb.upper.length - rxq->crc_len;
1073 mb->pkt.data_len = pkt_len;
1074 mb->pkt.pkt_len = pkt_len;
1075 mb->pkt.vlan_macip.f.vlan_tci = rxdp[j].wb.upper.vlan;
1076 mb->pkt.hash.rss = rxdp[j].wb.lower.hi_dword.rss;
1078 /* convert descriptor fields to rte mbuf flags */
1079 mb->ol_flags = rx_desc_hlen_type_rss_to_pkt_flags(
1080 rxdp[j].wb.lower.lo_dword.data);
1081 /* reuse status field from scan list */
1082 mb->ol_flags |= rx_desc_status_to_pkt_flags(s[j]);
1083 mb->ol_flags |= rx_desc_error_to_pkt_flags(s[j]);
1086 /* Move mbuf pointers from the S/W ring to the stage */
1087 for (j = 0; j < LOOK_AHEAD; ++j) {
1088 rxq->rx_stage[i + j] = rxep[j].mbuf;
1091 /* stop if all requested packets could not be received */
1092 if (nb_dd != LOOK_AHEAD)
1096 /* clear software ring entries so we can cleanup correctly */
1097 for (i = 0; i < nb_rx; ++i)
1098 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1104 ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
1106 volatile union ixgbe_adv_rx_desc *rxdp;
1107 struct igb_rx_entry *rxep;
1108 struct rte_mbuf *mb;
1113 /* allocate buffers in bulk directly into the S/W ring */
1114 alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1115 rxep = &rxq->sw_ring[alloc_idx];
1116 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1117 rxq->rx_free_thresh);
1118 if (unlikely(diag != 0))
1121 rxdp = &rxq->rx_ring[alloc_idx];
1122 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1123 /* populate the static rte mbuf fields */
1125 rte_mbuf_refcnt_set(mb, 1);
1126 mb->type = RTE_MBUF_PKT;
1127 mb->pkt.next = NULL;
1128 mb->pkt.data = (char *)mb->buf_addr + RTE_PKTMBUF_HEADROOM;
1129 mb->pkt.nb_segs = 1;
1130 mb->pkt.in_port = rxq->port_id;
1132 /* populate the descriptors */
1133 dma_addr = (uint64_t)mb->buf_physaddr + RTE_PKTMBUF_HEADROOM;
1134 rxdp[i].read.hdr_addr = dma_addr;
1135 rxdp[i].read.pkt_addr = dma_addr;
1138 /* update tail pointer */
1140 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rxq->rx_free_trigger);
1142 /* update state of internal queue structure */
1143 rxq->rx_free_trigger += rxq->rx_free_thresh;
1144 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1145 rxq->rx_free_trigger = (rxq->rx_free_thresh - 1);
1151 static inline uint16_t
1152 ixgbe_rx_fill_from_stage(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1155 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1158 /* how many packets are ready to return? */
1159 nb_pkts = RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1161 /* copy mbuf pointers to the application's packet list */
1162 for (i = 0; i < nb_pkts; ++i)
1163 rx_pkts[i] = stage[i];
1165 /* update internal queue state */
1166 rxq->rx_nb_avail -= nb_pkts;
1167 rxq->rx_next_avail += nb_pkts;
1172 static inline uint16_t
1173 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1176 struct igb_rx_queue *rxq = (struct igb_rx_queue *)rx_queue;
1179 /* Any previously recv'd pkts will be returned from the Rx stage */
1180 if (rxq->rx_nb_avail)
1181 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1183 /* Scan the H/W ring for packets to receive */
1184 nb_rx = ixgbe_rx_scan_hw_ring(rxq);
1186 /* update internal queue state */
1187 rxq->rx_next_avail = 0;
1188 rxq->rx_nb_avail = nb_rx;
1189 rxq->rx_tail += nb_rx;
1191 /* if required, allocate new buffers to replenish descriptors */
1192 if (rxq->rx_tail > rxq->rx_free_trigger) {
1193 if (ixgbe_rx_alloc_bufs(rxq) != 0) {
1195 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1196 "queue_id=%u\n", (unsigned) rxq->port_id,
1197 (unsigned) rxq->queue_id);
1199 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1200 rxq->rx_free_thresh;
1203 * Need to rewind any previous receives if we cannot
1204 * allocate new buffers to replenish the old ones.
1206 rxq->rx_nb_avail = 0;
1207 rxq->rx_tail -= nb_rx;
1208 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1209 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1215 if (rxq->rx_tail >= rxq->nb_rx_desc)
1218 /* received any packets this loop? */
1219 if (rxq->rx_nb_avail)
1220 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1225 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1227 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1232 if (unlikely(nb_pkts == 0))
1235 if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1236 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1238 /* request is relatively large, chunk it up */
1242 n = RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1243 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1252 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
1255 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1258 struct igb_rx_queue *rxq;
1259 volatile union ixgbe_adv_rx_desc *rx_ring;
1260 volatile union ixgbe_adv_rx_desc *rxdp;
1261 struct igb_rx_entry *sw_ring;
1262 struct igb_rx_entry *rxe;
1263 struct rte_mbuf *rxm;
1264 struct rte_mbuf *nmb;
1265 union ixgbe_adv_rx_desc rxd;
1268 uint32_t hlen_type_rss;
1278 rx_id = rxq->rx_tail;
1279 rx_ring = rxq->rx_ring;
1280 sw_ring = rxq->sw_ring;
1281 while (nb_rx < nb_pkts) {
1283 * The order of operations here is important as the DD status
1284 * bit must not be read after any other descriptor fields.
1285 * rx_ring and rxdp are pointing to volatile data so the order
1286 * of accesses cannot be reordered by the compiler. If they were
1287 * not volatile, they could be reordered which could lead to
1288 * using invalid descriptor fields when read from rxd.
1290 rxdp = &rx_ring[rx_id];
1291 staterr = rxdp->wb.upper.status_error;
1292 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1299 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1300 * is likely to be invalid and to be dropped by the various
1301 * validation checks performed by the network stack.
1303 * Allocate a new mbuf to replenish the RX ring descriptor.
1304 * If the allocation fails:
1305 * - arrange for that RX descriptor to be the first one
1306 * being parsed the next time the receive function is
1307 * invoked [on the same queue].
1309 * - Stop parsing the RX ring and return immediately.
1311 * This policy do not drop the packet received in the RX
1312 * descriptor for which the allocation of a new mbuf failed.
1313 * Thus, it allows that packet to be later retrieved if
1314 * mbuf have been freed in the mean time.
1315 * As a side effect, holding RX descriptors instead of
1316 * systematically giving them back to the NIC may lead to
1317 * RX ring exhaustion situations.
1318 * However, the NIC can gracefully prevent such situations
1319 * to happen by sending specific "back-pressure" flow control
1320 * frames to its peer(s).
1322 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1323 "ext_err_stat=0x%08x pkt_len=%u\n",
1324 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1325 (unsigned) rx_id, (unsigned) staterr,
1326 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1328 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1330 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1331 "queue_id=%u\n", (unsigned) rxq->port_id,
1332 (unsigned) rxq->queue_id);
1333 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1338 rxe = &sw_ring[rx_id];
1340 if (rx_id == rxq->nb_rx_desc)
1343 /* Prefetch next mbuf while processing current one. */
1344 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1347 * When next RX descriptor is on a cache-line boundary,
1348 * prefetch the next 4 RX descriptors and the next 8 pointers
1351 if ((rx_id & 0x3) == 0) {
1352 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1353 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1359 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1360 rxdp->read.hdr_addr = dma_addr;
1361 rxdp->read.pkt_addr = dma_addr;
1364 * Initialize the returned mbuf.
1365 * 1) setup generic mbuf fields:
1366 * - number of segments,
1369 * - RX port identifier.
1370 * 2) integrate hardware offload data, if any:
1371 * - RSS flag & hash,
1372 * - IP checksum flag,
1373 * - VLAN TCI, if any,
1376 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1378 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1379 rte_packet_prefetch(rxm->pkt.data);
1380 rxm->pkt.nb_segs = 1;
1381 rxm->pkt.next = NULL;
1382 rxm->pkt.pkt_len = pkt_len;
1383 rxm->pkt.data_len = pkt_len;
1384 rxm->pkt.in_port = rxq->port_id;
1386 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1387 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1388 rxm->pkt.vlan_macip.f.vlan_tci =
1389 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1391 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1392 pkt_flags = (pkt_flags | rx_desc_status_to_pkt_flags(staterr));
1393 pkt_flags = (pkt_flags | rx_desc_error_to_pkt_flags(staterr));
1394 rxm->ol_flags = pkt_flags;
1396 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1397 rxm->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1398 else if (pkt_flags & PKT_RX_FDIR) {
1399 rxm->pkt.hash.fdir.hash =
1400 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1401 & IXGBE_ATR_HASH_MASK);
1402 rxm->pkt.hash.fdir.id = rxd.wb.lower.hi_dword.csum_ip.ip_id;
1405 * Store the mbuf address into the next entry of the array
1406 * of returned packets.
1408 rx_pkts[nb_rx++] = rxm;
1410 rxq->rx_tail = rx_id;
1413 * If the number of free RX descriptors is greater than the RX free
1414 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1416 * Update the RDT with the value of the last processed RX descriptor
1417 * minus 1, to guarantee that the RDT register is never equal to the
1418 * RDH register, which creates a "full" ring situtation from the
1419 * hardware point of view...
1421 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1422 if (nb_hold > rxq->rx_free_thresh) {
1423 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1424 "nb_hold=%u nb_rx=%u\n",
1425 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1426 (unsigned) rx_id, (unsigned) nb_hold,
1428 rx_id = (uint16_t) ((rx_id == 0) ?
1429 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1430 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1433 rxq->nb_rx_hold = nb_hold;
1438 ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1441 struct igb_rx_queue *rxq;
1442 volatile union ixgbe_adv_rx_desc *rx_ring;
1443 volatile union ixgbe_adv_rx_desc *rxdp;
1444 struct igb_rx_entry *sw_ring;
1445 struct igb_rx_entry *rxe;
1446 struct rte_mbuf *first_seg;
1447 struct rte_mbuf *last_seg;
1448 struct rte_mbuf *rxm;
1449 struct rte_mbuf *nmb;
1450 union ixgbe_adv_rx_desc rxd;
1451 uint64_t dma; /* Physical address of mbuf data buffer */
1453 uint32_t hlen_type_rss;
1463 rx_id = rxq->rx_tail;
1464 rx_ring = rxq->rx_ring;
1465 sw_ring = rxq->sw_ring;
1468 * Retrieve RX context of current packet, if any.
1470 first_seg = rxq->pkt_first_seg;
1471 last_seg = rxq->pkt_last_seg;
1473 while (nb_rx < nb_pkts) {
1476 * The order of operations here is important as the DD status
1477 * bit must not be read after any other descriptor fields.
1478 * rx_ring and rxdp are pointing to volatile data so the order
1479 * of accesses cannot be reordered by the compiler. If they were
1480 * not volatile, they could be reordered which could lead to
1481 * using invalid descriptor fields when read from rxd.
1483 rxdp = &rx_ring[rx_id];
1484 staterr = rxdp->wb.upper.status_error;
1485 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1492 * Allocate a new mbuf to replenish the RX ring descriptor.
1493 * If the allocation fails:
1494 * - arrange for that RX descriptor to be the first one
1495 * being parsed the next time the receive function is
1496 * invoked [on the same queue].
1498 * - Stop parsing the RX ring and return immediately.
1500 * This policy does not drop the packet received in the RX
1501 * descriptor for which the allocation of a new mbuf failed.
1502 * Thus, it allows that packet to be later retrieved if
1503 * mbuf have been freed in the mean time.
1504 * As a side effect, holding RX descriptors instead of
1505 * systematically giving them back to the NIC may lead to
1506 * RX ring exhaustion situations.
1507 * However, the NIC can gracefully prevent such situations
1508 * to happen by sending specific "back-pressure" flow control
1509 * frames to its peer(s).
1511 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
1512 "staterr=0x%x data_len=%u\n",
1513 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1514 (unsigned) rx_id, (unsigned) staterr,
1515 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1517 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1519 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1520 "queue_id=%u\n", (unsigned) rxq->port_id,
1521 (unsigned) rxq->queue_id);
1522 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1527 rxe = &sw_ring[rx_id];
1529 if (rx_id == rxq->nb_rx_desc)
1532 /* Prefetch next mbuf while processing current one. */
1533 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1536 * When next RX descriptor is on a cache-line boundary,
1537 * prefetch the next 4 RX descriptors and the next 8 pointers
1540 if ((rx_id & 0x3) == 0) {
1541 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1542 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1546 * Update RX descriptor with the physical address of the new
1547 * data buffer of the new allocated mbuf.
1551 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1552 rxdp->read.hdr_addr = dma;
1553 rxdp->read.pkt_addr = dma;
1556 * Set data length & data buffer address of mbuf.
1558 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1559 rxm->pkt.data_len = data_len;
1560 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1563 * If this is the first buffer of the received packet,
1564 * set the pointer to the first mbuf of the packet and
1565 * initialize its context.
1566 * Otherwise, update the total length and the number of segments
1567 * of the current scattered packet, and update the pointer to
1568 * the last mbuf of the current packet.
1570 if (first_seg == NULL) {
1572 first_seg->pkt.pkt_len = data_len;
1573 first_seg->pkt.nb_segs = 1;
1575 first_seg->pkt.pkt_len = (uint16_t)(first_seg->pkt.pkt_len
1577 first_seg->pkt.nb_segs++;
1578 last_seg->pkt.next = rxm;
1582 * If this is not the last buffer of the received packet,
1583 * update the pointer to the last mbuf of the current scattered
1584 * packet and continue to parse the RX ring.
1586 if (! (staterr & IXGBE_RXDADV_STAT_EOP)) {
1592 * This is the last buffer of the received packet.
1593 * If the CRC is not stripped by the hardware:
1594 * - Subtract the CRC length from the total packet length.
1595 * - If the last buffer only contains the whole CRC or a part
1596 * of it, free the mbuf associated to the last buffer.
1597 * If part of the CRC is also contained in the previous
1598 * mbuf, subtract the length of that CRC part from the
1599 * data length of the previous mbuf.
1601 rxm->pkt.next = NULL;
1602 if (unlikely(rxq->crc_len > 0)) {
1603 first_seg->pkt.pkt_len -= ETHER_CRC_LEN;
1604 if (data_len <= ETHER_CRC_LEN) {
1605 rte_pktmbuf_free_seg(rxm);
1606 first_seg->pkt.nb_segs--;
1607 last_seg->pkt.data_len = (uint16_t)
1608 (last_seg->pkt.data_len -
1609 (ETHER_CRC_LEN - data_len));
1610 last_seg->pkt.next = NULL;
1613 (uint16_t) (data_len - ETHER_CRC_LEN);
1617 * Initialize the first mbuf of the returned packet:
1618 * - RX port identifier,
1619 * - hardware offload data, if any:
1620 * - RSS flag & hash,
1621 * - IP checksum flag,
1622 * - VLAN TCI, if any,
1625 first_seg->pkt.in_port = rxq->port_id;
1628 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1629 * set in the pkt_flags field.
1631 first_seg->pkt.vlan_macip.f.vlan_tci =
1632 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1633 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1634 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1635 pkt_flags = (pkt_flags |
1636 rx_desc_status_to_pkt_flags(staterr));
1637 pkt_flags = (pkt_flags |
1638 rx_desc_error_to_pkt_flags(staterr));
1639 first_seg->ol_flags = pkt_flags;
1641 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1642 first_seg->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1643 else if (pkt_flags & PKT_RX_FDIR) {
1644 first_seg->pkt.hash.fdir.hash =
1645 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1646 & IXGBE_ATR_HASH_MASK);
1647 first_seg->pkt.hash.fdir.id =
1648 rxd.wb.lower.hi_dword.csum_ip.ip_id;
1651 /* Prefetch data of first segment, if configured to do so. */
1652 rte_packet_prefetch(first_seg->pkt.data);
1655 * Store the mbuf address into the next entry of the array
1656 * of returned packets.
1658 rx_pkts[nb_rx++] = first_seg;
1661 * Setup receipt context for a new packet.
1667 * Record index of the next RX descriptor to probe.
1669 rxq->rx_tail = rx_id;
1672 * Save receive context.
1674 rxq->pkt_first_seg = first_seg;
1675 rxq->pkt_last_seg = last_seg;
1678 * If the number of free RX descriptors is greater than the RX free
1679 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1681 * Update the RDT with the value of the last processed RX descriptor
1682 * minus 1, to guarantee that the RDT register is never equal to the
1683 * RDH register, which creates a "full" ring situtation from the
1684 * hardware point of view...
1686 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1687 if (nb_hold > rxq->rx_free_thresh) {
1688 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1689 "nb_hold=%u nb_rx=%u\n",
1690 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1691 (unsigned) rx_id, (unsigned) nb_hold,
1693 rx_id = (uint16_t) ((rx_id == 0) ?
1694 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1695 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1698 rxq->nb_rx_hold = nb_hold;
1702 /*********************************************************************
1704 * Queue management functions
1706 **********************************************************************/
1709 * Rings setup and release.
1711 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1712 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
1713 * also optimize cache line size effect. H/W supports up to cache line size 128.
1715 #define IXGBE_ALIGN 128
1718 * Maximum number of Ring Descriptors.
1720 * Since RDLEN/TDLEN should be multiple of 128 bytes, the number of ring
1721 * descriptors should meet the following condition:
1722 * (num_ring_desc * sizeof(rx/tx descriptor)) % 128 == 0
1724 #define IXGBE_MIN_RING_DESC 64
1725 #define IXGBE_MAX_RING_DESC 4096
1728 * Create memzone for HW rings. malloc can't be used as the physical address is
1729 * needed. If the memzone is already created, then this function returns a ptr
1732 static const struct rte_memzone *
1733 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1734 uint16_t queue_id, uint32_t ring_size, int socket_id)
1736 char z_name[RTE_MEMZONE_NAMESIZE];
1737 const struct rte_memzone *mz;
1739 rte_snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1740 dev->driver->pci_drv.name, ring_name,
1741 dev->data->port_id, queue_id);
1743 mz = rte_memzone_lookup(z_name);
1747 return rte_memzone_reserve_aligned(z_name, (uint64_t) ring_size,
1748 socket_id, 0, IXGBE_ALIGN);
1752 ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1756 if (txq->sw_ring != NULL) {
1757 for (i = 0; i < txq->nb_tx_desc; i++) {
1758 if (txq->sw_ring[i].mbuf != NULL) {
1759 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1760 txq->sw_ring[i].mbuf = NULL;
1767 ixgbe_tx_queue_release(struct igb_tx_queue *txq)
1770 ixgbe_tx_queue_release_mbufs(txq);
1771 rte_free(txq->sw_ring);
1777 ixgbe_dev_tx_queue_release(void *txq)
1779 ixgbe_tx_queue_release(txq);
1782 /* (Re)set dynamic igb_tx_queue fields to defaults */
1784 ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
1786 struct igb_tx_entry *txe = txq->sw_ring;
1789 /* Zero out HW ring memory */
1790 for (i = 0; i < sizeof(union ixgbe_adv_tx_desc) * txq->nb_tx_desc; i++) {
1791 ((volatile char *)txq->tx_ring)[i] = 0;
1794 /* Initialize SW ring entries */
1795 prev = (uint16_t) (txq->nb_tx_desc - 1);
1796 for (i = 0; i < txq->nb_tx_desc; i++) {
1797 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1798 txd->wb.status = IXGBE_TXD_STAT_DD;
1801 txe[prev].next_id = i;
1805 txq->tx_next_dd = txq->tx_rs_thresh - 1;
1806 txq->tx_next_rs = txq->tx_rs_thresh - 1;
1809 txq->nb_tx_used = 0;
1811 * Always allow 1 descriptor to be un-allocated to avoid
1812 * a H/W race condition
1814 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1815 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1817 memset((void*)&txq->ctx_cache, 0,
1818 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1822 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1825 unsigned int socket_id,
1826 const struct rte_eth_txconf *tx_conf)
1828 const struct rte_memzone *tz;
1829 struct igb_tx_queue *txq;
1830 struct ixgbe_hw *hw;
1831 uint16_t tx_rs_thresh, tx_free_thresh;
1833 PMD_INIT_FUNC_TRACE();
1834 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1837 * Validate number of transmit descriptors.
1838 * It must not exceed hardware maximum, and must be multiple
1841 if (((nb_desc * sizeof(union ixgbe_adv_tx_desc)) % IXGBE_ALIGN) != 0 ||
1842 (nb_desc > IXGBE_MAX_RING_DESC) ||
1843 (nb_desc < IXGBE_MIN_RING_DESC)) {
1848 * The following two parameters control the setting of the RS bit on
1849 * transmit descriptors.
1850 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
1851 * descriptors have been used.
1852 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
1853 * descriptors are used or if the number of descriptors required
1854 * to transmit a packet is greater than the number of free TX
1856 * The following constraints must be satisfied:
1857 * tx_rs_thresh must be greater than 0.
1858 * tx_rs_thresh must be less than the size of the ring minus 2.
1859 * tx_rs_thresh must be less than or equal to tx_free_thresh.
1860 * tx_rs_thresh must be a divisor of the ring size.
1861 * tx_free_thresh must be greater than 0.
1862 * tx_free_thresh must be less than the size of the ring minus 3.
1863 * One descriptor in the TX ring is used as a sentinel to avoid a
1864 * H/W race condition, hence the maximum threshold constraints.
1865 * When set to zero use default values.
1867 tx_rs_thresh = (tx_conf->tx_rs_thresh) ?
1868 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH;
1869 tx_free_thresh = (tx_conf->tx_free_thresh) ?
1870 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH;
1871 if (tx_rs_thresh >= (nb_desc - 2)) {
1873 "tx_rs_thresh must be less than the "
1874 "number of TX descriptors minus 2. "
1875 "(tx_rs_thresh=%u port=%d queue=%d)\n",
1876 tx_rs_thresh, dev->data->port_id, queue_idx);
1879 if (tx_free_thresh >= (nb_desc - 3)) {
1881 "tx_rs_thresh must be less than the "
1882 "tx_free_thresh must be less than the "
1883 "number of TX descriptors minus 3. "
1884 "(tx_free_thresh=%u port=%d queue=%d)\n",
1885 tx_free_thresh, dev->data->port_id, queue_idx);
1888 if (tx_rs_thresh > tx_free_thresh) {
1890 "tx_rs_thresh must be less than or equal to "
1892 "(tx_free_thresh=%u tx_rs_thresh=%u "
1893 "port=%d queue=%d)\n",
1894 tx_free_thresh, tx_rs_thresh,
1895 dev->data->port_id, queue_idx);
1898 if ((nb_desc % tx_rs_thresh) != 0) {
1900 "tx_rs_thresh must be a divisor of the"
1901 "number of TX descriptors. "
1902 "(tx_rs_thresh=%u port=%d queue=%d)\n",
1903 tx_rs_thresh, dev->data->port_id, queue_idx);
1908 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
1909 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
1910 * by the NIC and all descriptors are written back after the NIC
1911 * accumulates WTHRESH descriptors.
1913 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
1915 "TX WTHRESH must be set to 0 if "
1916 "tx_rs_thresh is greater than 1. "
1917 "(tx_rs_thresh=%u port=%d queue=%d)\n",
1919 dev->data->port_id, queue_idx);
1923 /* Free memory prior to re-allocation if needed... */
1924 if (dev->data->tx_queues[queue_idx] != NULL)
1925 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
1927 /* First allocate the tx queue data structure */
1928 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1934 * Allocate TX ring hardware descriptors. A memzone large enough to
1935 * handle the maximum ring size is allocated in order to allow for
1936 * resizing in later calls to the queue setup function.
1938 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1939 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
1942 ixgbe_tx_queue_release(txq);
1946 txq->nb_tx_desc = nb_desc;
1947 txq->tx_rs_thresh = tx_rs_thresh;
1948 txq->tx_free_thresh = tx_free_thresh;
1949 txq->pthresh = tx_conf->tx_thresh.pthresh;
1950 txq->hthresh = tx_conf->tx_thresh.hthresh;
1951 txq->wthresh = tx_conf->tx_thresh.wthresh;
1952 txq->queue_id = queue_idx;
1953 txq->port_id = dev->data->port_id;
1954 txq->txq_flags = tx_conf->txq_flags;
1957 * Modification to set VFTDT for virtual function if vf is detected
1959 if (hw->mac.type == ixgbe_mac_82599_vf)
1960 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
1962 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(queue_idx));
1964 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1965 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
1967 /* Allocate software ring */
1968 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1969 sizeof(struct igb_tx_entry) * nb_desc,
1971 if (txq->sw_ring == NULL) {
1972 ixgbe_tx_queue_release(txq);
1975 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1976 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1978 ixgbe_reset_tx_queue(txq);
1980 dev->data->tx_queues[queue_idx] = txq;
1982 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1983 if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) &&
1984 (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST))
1985 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
1987 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1993 ixgbe_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1997 if (rxq->sw_ring != NULL) {
1998 for (i = 0; i < rxq->nb_rx_desc; i++) {
1999 if (rxq->sw_ring[i].mbuf != NULL) {
2000 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2001 rxq->sw_ring[i].mbuf = NULL;
2004 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2005 if (rxq->rx_nb_avail) {
2006 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2007 struct rte_mbuf *mb;
2008 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2009 rte_pktmbuf_free_seg(mb);
2011 rxq->rx_nb_avail = 0;
2018 ixgbe_rx_queue_release(struct igb_rx_queue *rxq)
2021 ixgbe_rx_queue_release_mbufs(rxq);
2022 rte_free(rxq->sw_ring);
2028 ixgbe_dev_rx_queue_release(void *rxq)
2030 ixgbe_rx_queue_release(rxq);
2034 * Check if Rx Burst Bulk Alloc function can be used.
2036 * 0: the preconditions are satisfied and the bulk allocation function
2038 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2039 * function must be used.
2042 check_rx_burst_bulk_alloc_preconditions(struct igb_rx_queue *rxq)
2047 * Make sure the following pre-conditions are satisfied:
2048 * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2049 * rxq->rx_free_thresh < rxq->nb_rx_desc
2050 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2051 * rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2052 * Scattered packets are not supported. This should be checked
2053 * outside of this function.
2055 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2056 if (! (rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST))
2058 else if (! (rxq->rx_free_thresh < rxq->nb_rx_desc))
2060 else if (! ((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0))
2062 else if (! (rxq->nb_rx_desc <
2063 (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST)))
2072 /* Reset dynamic igb_rx_queue fields back to defaults */
2074 ixgbe_reset_rx_queue(struct igb_rx_queue *rxq)
2080 * By default, the Rx queue setup function allocates enough memory for
2081 * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires
2082 * extra memory at the end of the descriptor ring to be zero'd out. A
2083 * pre-condition for using the Rx burst bulk alloc function is that the
2084 * number of descriptors is less than or equal to
2085 * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2086 * constraints here to see if we need to zero out memory after the end
2087 * of the H/W descriptor ring.
2089 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2090 if (check_rx_burst_bulk_alloc_preconditions(rxq) == 0)
2091 /* zero out extra memory */
2092 len = rxq->nb_rx_desc + RTE_PMD_IXGBE_RX_MAX_BURST;
2095 /* do not zero out extra memory */
2096 len = rxq->nb_rx_desc;
2099 * Zero out HW ring memory. Zero out extra memory at the end of
2100 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2101 * reads extra memory as zeros.
2103 for (i = 0; i < len * sizeof(union ixgbe_adv_rx_desc); i++) {
2104 ((volatile char *)rxq->rx_ring)[i] = 0;
2107 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2109 * initialize extra software ring entries. Space for these extra
2110 * entries is always allocated
2112 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2113 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST; ++i) {
2114 rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
2117 rxq->rx_nb_avail = 0;
2118 rxq->rx_next_avail = 0;
2119 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
2120 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
2122 rxq->nb_rx_hold = 0;
2123 rxq->pkt_first_seg = NULL;
2124 rxq->pkt_last_seg = NULL;
2128 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2131 unsigned int socket_id,
2132 const struct rte_eth_rxconf *rx_conf,
2133 struct rte_mempool *mp)
2135 const struct rte_memzone *rz;
2136 struct igb_rx_queue *rxq;
2137 struct ixgbe_hw *hw;
2138 int use_def_burst_func = 1;
2141 PMD_INIT_FUNC_TRACE();
2142 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2145 * Validate number of receive descriptors.
2146 * It must not exceed hardware maximum, and must be multiple
2149 if (((nb_desc * sizeof(union ixgbe_adv_rx_desc)) % IXGBE_ALIGN) != 0 ||
2150 (nb_desc > IXGBE_MAX_RING_DESC) ||
2151 (nb_desc < IXGBE_MIN_RING_DESC)) {
2155 /* Free memory prior to re-allocation if needed... */
2156 if (dev->data->rx_queues[queue_idx] != NULL)
2157 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2159 /* First allocate the rx queue data structure */
2160 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
2165 rxq->nb_rx_desc = nb_desc;
2166 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2167 rxq->queue_id = queue_idx;
2168 rxq->port_id = dev->data->port_id;
2169 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
2171 rxq->drop_en = rx_conf->rx_drop_en;
2174 * Allocate RX ring hardware descriptors. A memzone large enough to
2175 * handle the maximum ring size is allocated in order to allow for
2176 * resizing in later calls to the queue setup function.
2178 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
2179 IXGBE_MAX_RING_DESC * sizeof(union ixgbe_adv_rx_desc),
2182 ixgbe_rx_queue_release(rxq);
2186 * Modified to setup VFRDT for Virtual Function
2188 if (hw->mac.type == ixgbe_mac_82599_vf)
2189 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2191 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(queue_idx));
2193 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
2194 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2197 * Allocate software ring. Allow for space at the end of the
2198 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2199 * function does not access an invalid memory region.
2201 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2202 len = nb_desc + RTE_PMD_IXGBE_RX_MAX_BURST;
2206 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
2207 sizeof(struct igb_rx_entry) * len,
2209 if (rxq->sw_ring == NULL) {
2210 ixgbe_rx_queue_release(rxq);
2213 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
2214 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
2217 * Certain constaints must be met in order to use the bulk buffer
2218 * allocation Rx burst function.
2220 use_def_burst_func = check_rx_burst_bulk_alloc_preconditions(rxq);
2222 /* Check if pre-conditions are satisfied, and no Scattered Rx */
2223 if (!use_def_burst_func && !dev->data->scattered_rx) {
2224 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
2225 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
2226 "satisfied. Rx Burst Bulk Alloc function will be "
2227 "used on port=%d, queue=%d.\n",
2228 rxq->port_id, rxq->queue_id);
2229 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
2232 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions "
2233 "are not satisfied, Scattered Rx is requested, "
2234 "or RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC is not "
2235 "enabled (port=%d, queue=%d).\n",
2236 rxq->port_id, rxq->queue_id);
2238 dev->data->rx_queues[queue_idx] = rxq;
2240 ixgbe_reset_rx_queue(rxq);
2246 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2250 PMD_INIT_FUNC_TRACE();
2252 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2253 struct igb_tx_queue *txq = dev->data->tx_queues[i];
2255 ixgbe_tx_queue_release_mbufs(txq);
2256 ixgbe_reset_tx_queue(txq);
2260 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2261 struct igb_rx_queue *rxq = dev->data->rx_queues[i];
2263 ixgbe_rx_queue_release_mbufs(rxq);
2264 ixgbe_reset_rx_queue(rxq);
2269 /*********************************************************************
2271 * Device RX/TX init functions
2273 **********************************************************************/
2276 * Receive Side Scaling (RSS)
2277 * See section 7.1.2.8 in the following document:
2278 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2281 * The source and destination IP addresses of the IP header and the source
2282 * and destination ports of TCP/UDP headers, if any, of received packets are
2283 * hashed against a configurable random key to compute a 32-bit RSS hash result.
2284 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2285 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
2286 * RSS output index which is used as the RX queue index where to store the
2288 * The following output is supplied in the RX write-back descriptor:
2289 * - 32-bit result of the Microsoft RSS hash function,
2290 * - 4-bit RSS type field.
2294 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2295 * Used as the default key.
2297 static uint8_t rss_intel_key[40] = {
2298 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2299 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2300 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2301 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2302 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2306 ixgbe_rss_disable(struct rte_eth_dev *dev)
2308 struct ixgbe_hw *hw;
2311 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2312 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
2313 mrqc &= ~IXGBE_MRQC_RSSEN;
2314 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2318 ixgbe_rss_configure(struct rte_eth_dev *dev)
2320 struct ixgbe_hw *hw;
2329 PMD_INIT_FUNC_TRACE();
2330 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2332 rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2333 if (rss_hf == 0) { /* Disable RSS */
2334 ixgbe_rss_disable(dev);
2337 hash_key = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
2338 if (hash_key == NULL)
2339 hash_key = rss_intel_key; /* Default hash key */
2341 /* Fill in RSS hash key */
2342 for (i = 0; i < 10; i++) {
2343 rss_key = hash_key[(i * 4)];
2344 rss_key |= hash_key[(i * 4) + 1] << 8;
2345 rss_key |= hash_key[(i * 4) + 2] << 16;
2346 rss_key |= hash_key[(i * 4) + 3] << 24;
2347 IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RSSRK(0), i, rss_key);
2350 /* Fill in redirection table */
2352 for (i = 0, j = 0; i < 128; i++, j++) {
2353 if (j == dev->data->nb_rx_queues) j = 0;
2354 reta = (reta << 8) | j;
2356 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), rte_bswap32(reta));
2359 /* Set configured hashing functions in MRQC register */
2360 mrqc = IXGBE_MRQC_RSSEN; /* RSS enable */
2361 if (rss_hf & ETH_RSS_IPV4)
2362 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2363 if (rss_hf & ETH_RSS_IPV4_TCP)
2364 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2365 if (rss_hf & ETH_RSS_IPV6)
2366 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2367 if (rss_hf & ETH_RSS_IPV6_EX)
2368 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2369 if (rss_hf & ETH_RSS_IPV6_TCP)
2370 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2371 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2372 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2373 if (rss_hf & ETH_RSS_IPV4_UDP)
2374 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2375 if (rss_hf & ETH_RSS_IPV6_UDP)
2376 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2377 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2378 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2379 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2382 #define NUM_VFTA_REGISTERS 128
2383 #define NIC_RX_BUFFER_SIZE 0x200
2386 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2388 struct rte_eth_vmdq_dcb_conf *cfg;
2389 struct ixgbe_hw *hw;
2390 enum rte_eth_nb_pools num_pools;
2391 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2393 uint8_t nb_tcs; /* number of traffic classes */
2396 PMD_INIT_FUNC_TRACE();
2397 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2398 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2399 num_pools = cfg->nb_queue_pools;
2400 /* Check we have a valid number of pools */
2401 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2402 ixgbe_rss_disable(dev);
2405 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2406 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2410 * split rx buffer up into sections, each for 1 traffic class
2412 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2413 for (i = 0 ; i < nb_tcs; i++) {
2414 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2415 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2416 /* clear 10 bits. */
2417 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2418 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2420 /* zero alloc all unused TCs */
2421 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2422 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2423 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2424 /* clear 10 bits. */
2425 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2428 /* MRQC: enable vmdq and dcb */
2429 mrqc = ((num_pools == ETH_16_POOLS) ? \
2430 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2431 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2433 /* PFVTCTL: turn on virtualisation and set the default pool */
2434 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2435 if (cfg->enable_default_pool) {
2436 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2438 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2440 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2442 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2444 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2446 * mapping is done with 3 bits per priority,
2447 * so shift by i*3 each time
2449 queue_mapping |= ((cfg->dcb_queue[i] & 0x07) << (i * 3));
2451 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2453 /* RTRPCS: DCB related */
2454 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2456 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2457 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2458 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2459 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2461 /* VFTA - enable all vlan filters */
2462 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2463 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2466 /* VFRE: pool enabling for receive - 16 or 32 */
2467 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2468 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2471 * MPSAR - allow pools to read specific mac addresses
2472 * In this case, all pools should be able to read from mac addr 0
2474 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2475 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2477 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2478 for (i = 0; i < cfg->nb_pool_maps; i++) {
2479 /* set vlan id in VF register and set the valid bit */
2480 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2481 (cfg->pool_map[i].vlan_id & 0xFFF)));
2483 * Put the allowed pools in VFB reg. As we only have 16 or 32
2484 * pools, we only need to use the first half of the register
2487 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2492 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2493 * @hw: pointer to hardware structure
2494 * @dcb_config: pointer to ixgbe_dcb_config structure
2497 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
2498 struct ixgbe_dcb_config *dcb_config)
2503 PMD_INIT_FUNC_TRACE();
2504 if (hw->mac.type != ixgbe_mac_82598EB) {
2505 /* Disable the Tx desc arbiter so that MTQC can be changed */
2506 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2507 reg |= IXGBE_RTTDCS_ARBDIS;
2508 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2510 /* Enable DCB for Tx with 8 TCs */
2511 if (dcb_config->num_tcs.pg_tcs == 8) {
2512 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
2515 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
2517 if (dcb_config->vt_mode)
2518 reg |= IXGBE_MTQC_VT_ENA;
2519 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
2521 /* Disable drop for all queues */
2522 for (q = 0; q < 128; q++)
2523 IXGBE_WRITE_REG(hw, IXGBE_QDE,
2524 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
2526 /* Enable the Tx desc arbiter */
2527 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2528 reg &= ~IXGBE_RTTDCS_ARBDIS;
2529 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
2531 /* Enable Security TX Buffer IFG for DCB */
2532 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
2533 reg |= IXGBE_SECTX_DCB;
2534 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
2540 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
2541 * @dev: pointer to rte_eth_dev structure
2542 * @dcb_config: pointer to ixgbe_dcb_config structure
2545 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
2546 struct ixgbe_dcb_config *dcb_config)
2548 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2549 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2550 struct ixgbe_hw *hw =
2551 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2553 PMD_INIT_FUNC_TRACE();
2554 if (hw->mac.type != ixgbe_mac_82598EB)
2555 /*PF VF Transmit Enable*/
2556 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
2557 vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2559 /*Configure general DCB TX parameters*/
2560 ixgbe_dcb_tx_hw_config(hw,dcb_config);
2565 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
2566 struct ixgbe_dcb_config *dcb_config)
2568 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
2569 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2570 struct ixgbe_dcb_tc_config *tc;
2573 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
2574 if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS ) {
2575 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2576 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2579 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2580 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2582 /* User Priority to Traffic Class mapping */
2583 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2584 j = vmdq_rx_conf->dcb_queue[i];
2585 tc = &dcb_config->tc_config[j];
2586 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = (1 << j);
2591 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
2592 struct ixgbe_dcb_config *dcb_config)
2594 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2595 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2596 struct ixgbe_dcb_tc_config *tc;
2599 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
2600 if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ) {
2601 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2602 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2605 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2606 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2609 /* User Priority to Traffic Class mapping */
2610 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2611 j = vmdq_tx_conf->dcb_queue[i];
2612 tc = &dcb_config->tc_config[j];
2613 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = (1 << j);
2619 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,struct ixgbe_dcb_config *dcb_config)
2621 struct rte_eth_dcb_rx_conf *rx_conf =
2622 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
2623 struct ixgbe_dcb_tc_config *tc;
2626 dcb_config->num_tcs.pg_tcs = rx_conf->nb_tcs;
2627 dcb_config->num_tcs.pfc_tcs = rx_conf->nb_tcs;
2629 /* User Priority to Traffic Class mapping */
2630 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2631 j = rx_conf->dcb_queue[i];
2632 tc = &dcb_config->tc_config[j];
2633 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = (1 << j);
2638 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,struct ixgbe_dcb_config *dcb_config)
2640 struct rte_eth_dcb_tx_conf *tx_conf =
2641 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
2642 struct ixgbe_dcb_tc_config *tc;
2645 dcb_config->num_tcs.pg_tcs = tx_conf->nb_tcs;
2646 dcb_config->num_tcs.pfc_tcs = tx_conf->nb_tcs;
2648 /* User Priority to Traffic Class mapping */
2649 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2650 j = tx_conf->dcb_queue[i];
2651 tc = &dcb_config->tc_config[j];
2652 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = (1 << j);
2657 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
2658 * @hw: pointer to hardware structure
2659 * @dcb_config: pointer to ixgbe_dcb_config structure
2662 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
2663 struct ixgbe_dcb_config *dcb_config)
2669 PMD_INIT_FUNC_TRACE();
2671 * Disable the arbiter before changing parameters
2672 * (always enable recycle mode; WSP)
2674 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
2675 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
2677 if (hw->mac.type != ixgbe_mac_82598EB) {
2678 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
2679 if (dcb_config->num_tcs.pg_tcs == 4) {
2680 if (dcb_config->vt_mode)
2681 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2682 IXGBE_MRQC_VMDQRT4TCEN;
2684 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
2685 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2689 if (dcb_config->num_tcs.pg_tcs == 8) {
2690 if (dcb_config->vt_mode)
2691 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2692 IXGBE_MRQC_VMDQRT8TCEN;
2694 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
2695 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
2700 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
2703 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2704 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2705 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2706 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2708 /* VFTA - enable all vlan filters */
2709 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2710 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2714 * Configure Rx packet plane (recycle mode; WSP) and
2717 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
2718 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
2724 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
2725 uint16_t *max,uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
2727 switch (hw->mac.type) {
2728 case ixgbe_mac_82598EB:
2729 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
2731 case ixgbe_mac_82599EB:
2732 case ixgbe_mac_X540:
2733 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
2742 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
2743 uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
2745 switch (hw->mac.type) {
2746 case ixgbe_mac_82598EB:
2747 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id,tsa);
2748 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id,tsa);
2750 case ixgbe_mac_82599EB:
2751 case ixgbe_mac_X540:
2752 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id,tsa);
2753 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,tsa, map);
2760 #define DCB_RX_CONFIG 1
2761 #define DCB_TX_CONFIG 1
2762 #define DCB_TX_PB 1024
2764 * ixgbe_dcb_hw_configure - Enable DCB and configure
2765 * general DCB in VT mode and non-VT mode parameters
2766 * @dev: pointer to rte_eth_dev structure
2767 * @dcb_config: pointer to ixgbe_dcb_config structure
2770 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
2771 struct ixgbe_dcb_config *dcb_config)
2774 uint8_t i,pfc_en,nb_tcs;
2776 uint8_t config_dcb_rx = 0;
2777 uint8_t config_dcb_tx = 0;
2778 uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2779 uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2780 uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2781 uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2782 uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
2783 struct ixgbe_dcb_tc_config *tc;
2784 uint32_t max_frame = dev->data->max_frame_size;
2785 struct ixgbe_hw *hw =
2786 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2788 switch(dev->data->dev_conf.rxmode.mq_mode){
2790 dcb_config->vt_mode = true;
2791 if (hw->mac.type != ixgbe_mac_82598EB) {
2792 config_dcb_rx = DCB_RX_CONFIG;
2794 *get dcb and VT rx configuration parameters
2797 ixgbe_vmdq_dcb_rx_config(dev,dcb_config);
2798 /*Configure general VMDQ and DCB RX parameters*/
2799 ixgbe_vmdq_dcb_configure(dev);
2803 dcb_config->vt_mode = false;
2804 config_dcb_rx = DCB_RX_CONFIG;
2805 /* Get dcb TX configuration parameters from rte_eth_conf */
2806 ixgbe_dcb_rx_config(dev,dcb_config);
2807 /*Configure general DCB RX parameters*/
2808 ixgbe_dcb_rx_hw_config(hw, dcb_config);
2811 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration\n");
2814 switch (dev->data->dev_conf.txmode.mq_mode) {
2815 case ETH_VMDQ_DCB_TX:
2816 dcb_config->vt_mode = true;
2817 config_dcb_tx = DCB_TX_CONFIG;
2818 /* get DCB and VT TX configuration parameters from rte_eth_conf */
2819 ixgbe_dcb_vt_tx_config(dev,dcb_config);
2820 /*Configure general VMDQ and DCB TX parameters*/
2821 ixgbe_vmdq_dcb_hw_tx_config(dev,dcb_config);
2825 dcb_config->vt_mode = false;
2826 config_dcb_tx = DCB_RX_CONFIG;
2827 /*get DCB TX configuration parameters from rte_eth_conf*/
2828 ixgbe_dcb_tx_config(dev,dcb_config);
2829 /*Configure general DCB TX parameters*/
2830 ixgbe_dcb_tx_hw_config(hw, dcb_config);
2833 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration\n");
2837 nb_tcs = dcb_config->num_tcs.pfc_tcs;
2839 ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
2840 if(nb_tcs == ETH_4_TCS) {
2841 /* Avoid un-configured priority mapping to TC0 */
2843 uint8_t mask = 0xFF;
2844 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
2845 mask &= ~ (1 << map[i]);
2846 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
2847 if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
2851 /* Re-configure 4 TCs BW */
2852 for (i = 0; i < nb_tcs; i++) {
2853 tc = &dcb_config->tc_config[i];
2854 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 100 / nb_tcs;
2855 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 100 / nb_tcs;
2857 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
2858 tc = &dcb_config->tc_config[i];
2859 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
2860 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
2865 /* Set RX buffer size */
2866 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2867 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
2868 for (i = 0 ; i < nb_tcs; i++) {
2869 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2871 /* zero alloc all unused TCs */
2872 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2873 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
2877 /* Only support an equally distributed Tx packet buffer strategy. */
2878 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
2879 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
2880 for (i = 0; i < nb_tcs; i++) {
2881 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
2882 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
2884 /* Clear unused TCs, if any, to zero buffer size*/
2885 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2886 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
2887 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
2891 /*Calculates traffic class credits*/
2892 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
2893 IXGBE_DCB_TX_CONFIG);
2894 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config,max_frame,
2895 IXGBE_DCB_RX_CONFIG);
2898 /* Unpack CEE standard containers */
2899 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
2900 ixgbe_dcb_unpack_max_cee(dcb_config, max);
2901 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
2902 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
2903 /* Configure PG(ETS) RX */
2904 ixgbe_dcb_hw_arbite_rx_config(hw,refill,max,bwgid,tsa,map);
2908 /* Unpack CEE standard containers */
2909 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
2910 ixgbe_dcb_unpack_max_cee(dcb_config, max);
2911 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
2912 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
2913 /* Configure PG(ETS) TX */
2914 ixgbe_dcb_hw_arbite_tx_config(hw,refill,max,bwgid,tsa,map);
2917 /*Configure queue statistics registers*/
2918 ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
2920 /* Check if the PFC is supported */
2921 if(dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
2922 pbsize = (uint16_t) (NIC_RX_BUFFER_SIZE / nb_tcs);
2923 for (i = 0; i < nb_tcs; i++) {
2925 * If the TC count is 8,and the default high_water is 48,
2926 * the low_water is 16 as default.
2928 hw->fc.high_water[i] = (pbsize * 3 ) / 4;
2929 hw->fc.low_water[i] = pbsize / 4;
2930 /* Enable pfc for this TC */
2931 tc = &dcb_config->tc_config[i];
2932 tc->pfc = ixgbe_dcb_pfc_enabled;
2934 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
2935 if(dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
2937 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
2944 * ixgbe_configure_dcb - Configure DCB Hardware
2945 * @dev: pointer to rte_eth_dev
2947 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
2949 struct ixgbe_dcb_config *dcb_cfg =
2950 IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
2952 PMD_INIT_FUNC_TRACE();
2953 /** Configure DCB hardware **/
2954 if(((dev->data->dev_conf.rxmode.mq_mode != ETH_RSS) &&
2955 (dev->data->nb_rx_queues == ETH_DCB_NUM_QUEUES))||
2956 ((dev->data->dev_conf.txmode.mq_mode != ETH_DCB_NONE) &&
2957 (dev->data->nb_tx_queues == ETH_DCB_NUM_QUEUES))) {
2958 ixgbe_dcb_hw_configure(dev,dcb_cfg);
2964 ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2966 struct igb_rx_entry *rxe = rxq->sw_ring;
2970 /* Initialize software ring entries */
2971 for (i = 0; i < rxq->nb_rx_desc; i++) {
2972 volatile union ixgbe_adv_rx_desc *rxd;
2973 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
2975 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u\n",
2976 (unsigned) rxq->queue_id);
2980 rte_mbuf_refcnt_set(mbuf, 1);
2981 mbuf->type = RTE_MBUF_PKT;
2982 mbuf->pkt.next = NULL;
2983 mbuf->pkt.data = (char *)mbuf->buf_addr + RTE_PKTMBUF_HEADROOM;
2984 mbuf->pkt.nb_segs = 1;
2985 mbuf->pkt.in_port = rxq->port_id;
2988 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
2989 rxd = &rxq->rx_ring[i];
2990 rxd->read.hdr_addr = dma_addr;
2991 rxd->read.pkt_addr = dma_addr;
2999 * Initializes Receive Unit.
3002 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
3004 struct ixgbe_hw *hw;
3005 struct igb_rx_queue *rxq;
3006 struct rte_pktmbuf_pool_private *mbp_priv;
3019 PMD_INIT_FUNC_TRACE();
3020 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3023 * Make sure receives are disabled while setting
3024 * up the RX context (registers, descriptor rings, etc.).
3026 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
3027 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
3029 /* Enable receipt of broadcasted frames */
3030 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
3031 fctrl |= IXGBE_FCTRL_BAM;
3032 fctrl |= IXGBE_FCTRL_DPF;
3033 fctrl |= IXGBE_FCTRL_PMCF;
3034 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
3037 * Configure CRC stripping, if any.
3039 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
3040 if (dev->data->dev_conf.rxmode.hw_strip_crc)
3041 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
3043 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
3046 * Configure jumbo frame support, if any.
3048 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
3049 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
3050 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
3051 maxfrs &= 0x0000FFFF;
3052 maxfrs |= (dev->data->dev_conf.rxmode.max_rx_pkt_len << 16);
3053 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
3055 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
3057 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
3059 /* Setup RX queues */
3060 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3061 rxq = dev->data->rx_queues[i];
3063 /* Allocate buffers for descriptor rings */
3064 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
3069 * Reset crc_len in case it was changed after queue setup by a
3070 * call to configure.
3072 rxq->crc_len = (uint8_t)
3073 ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
3076 /* Setup the Base and Length of the Rx Descriptor Rings */
3077 bus_addr = rxq->rx_ring_phys_addr;
3078 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
3079 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3080 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i),
3081 (uint32_t)(bus_addr >> 32));
3082 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
3083 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
3084 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
3085 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
3087 /* Configure the SRRCTL register */
3088 #ifdef RTE_HEADER_SPLIT_ENABLE
3090 * Configure Header Split
3092 if (dev->data->dev_conf.rxmode.header_split) {
3093 if (hw->mac.type == ixgbe_mac_82599EB) {
3094 /* Must setup the PSRTYPE register */
3096 psrtype = IXGBE_PSRTYPE_TCPHDR |
3097 IXGBE_PSRTYPE_UDPHDR |
3098 IXGBE_PSRTYPE_IPV4HDR |
3099 IXGBE_PSRTYPE_IPV6HDR;
3100 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(i), psrtype);
3102 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
3103 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
3104 IXGBE_SRRCTL_BSIZEHDR_MASK);
3105 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3108 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
3110 /* Set if packets are dropped when no descriptors available */
3112 srrctl |= IXGBE_SRRCTL_DROP_EN;
3115 * Configure the RX buffer size in the BSIZEPACKET field of
3116 * the SRRCTL register of the queue.
3117 * The value is in 1 KB resolution. Valid values can be from
3120 mbp_priv = (struct rte_pktmbuf_pool_private *)
3121 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
3122 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
3123 RTE_PKTMBUF_HEADROOM);
3124 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
3125 IXGBE_SRRCTL_BSIZEPKT_MASK);
3126 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3128 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
3129 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
3130 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
3131 IXGBE_RX_BUF_THRESHOLD > buf_size){
3132 dev->data->scattered_rx = 1;
3133 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
3138 * Configure RSS if device configured with multiple RX queues.
3140 if (hw->mac.type == ixgbe_mac_82599EB) {
3141 if (dev->data->nb_rx_queues > 1)
3142 switch (dev->data->dev_conf.rxmode.mq_mode) {
3144 ixgbe_rss_configure(dev);
3148 ixgbe_vmdq_dcb_configure(dev);
3151 default: ixgbe_rss_disable(dev);
3154 ixgbe_rss_disable(dev);
3158 * Setup the Checksum Register.
3159 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
3160 * Enable IP/L4 checkum computation by hardware if requested to do so.
3162 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
3163 rxcsum |= IXGBE_RXCSUM_PCSD;
3164 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
3165 rxcsum |= IXGBE_RXCSUM_IPPCSE;
3167 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
3169 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
3171 if (hw->mac.type == ixgbe_mac_82599EB) {
3172 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3173 if (dev->data->dev_conf.rxmode.hw_strip_crc)
3174 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3176 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
3177 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3178 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3185 * Initializes Transmit Unit.
3188 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
3190 struct ixgbe_hw *hw;
3191 struct igb_tx_queue *txq;
3198 PMD_INIT_FUNC_TRACE();
3199 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3201 /* Enable TX CRC (checksum offload requirement) */
3202 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
3203 hlreg0 |= IXGBE_HLREG0_TXCRCEN;
3204 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
3206 /* Setup the Base and Length of the Tx Descriptor Rings */
3207 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3208 txq = dev->data->tx_queues[i];
3210 bus_addr = txq->tx_ring_phys_addr;
3211 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3212 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3213 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i),
3214 (uint32_t)(bus_addr >> 32));
3215 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3216 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3217 /* Setup the HW Tx Head and TX Tail descriptor pointers */
3218 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3219 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3222 * Disable Tx Head Writeback RO bit, since this hoses
3223 * bookkeeping if things aren't delivered in order.
3225 switch (hw->mac.type) {
3226 case ixgbe_mac_82598EB:
3227 txctrl = IXGBE_READ_REG(hw,
3228 IXGBE_DCA_TXCTRL(i));
3229 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3230 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i),
3234 case ixgbe_mac_82599EB:
3235 case ixgbe_mac_X540:
3237 txctrl = IXGBE_READ_REG(hw,
3238 IXGBE_DCA_TXCTRL_82599(i));
3239 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3240 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i),
3246 if (hw->mac.type != ixgbe_mac_82598EB) {
3247 /* disable arbiter before setting MTQC */
3248 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3249 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3250 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3252 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3254 /* re-enable arbiter */
3255 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3256 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3261 * Start Transmit and Receive Units.
3264 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
3266 struct ixgbe_hw *hw;
3267 struct igb_tx_queue *txq;
3268 struct igb_rx_queue *rxq;
3276 PMD_INIT_FUNC_TRACE();
3277 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3279 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3280 txq = dev->data->tx_queues[i];
3281 /* Setup Transmit Threshold Registers */
3282 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3283 txdctl |= txq->pthresh & 0x7F;
3284 txdctl |= ((txq->hthresh & 0x7F) << 8);
3285 txdctl |= ((txq->wthresh & 0x7F) << 16);
3286 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
3289 if (hw->mac.type != ixgbe_mac_82598EB) {
3290 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3291 dmatxctl |= IXGBE_DMATXCTL_TE;
3292 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3295 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3296 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3297 txdctl |= IXGBE_TXDCTL_ENABLE;
3298 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
3300 /* Wait until TX Enable ready */
3301 if (hw->mac.type == ixgbe_mac_82599EB) {
3305 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
3306 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
3308 PMD_INIT_LOG(ERR, "Could not enable "
3309 "Tx Queue %d\n", i);
3312 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3313 rxq = dev->data->rx_queues[i];
3314 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3315 rxdctl |= IXGBE_RXDCTL_ENABLE;
3316 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
3318 /* Wait until RX Enable ready */
3322 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3323 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
3325 PMD_INIT_LOG(ERR, "Could not enable "
3326 "Rx Queue %d\n", i);
3328 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), rxq->nb_rx_desc - 1);
3331 /* Enable Receive engine */
3332 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
3333 if (hw->mac.type == ixgbe_mac_82598EB)
3334 rxctrl |= IXGBE_RXCTRL_DMBYPS;
3335 rxctrl |= IXGBE_RXCTRL_RXEN;
3336 hw->mac.ops.enable_rx_dma(hw, rxctrl);
3341 * [VF] Initializes Receive Unit.
3344 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
3346 struct ixgbe_hw *hw;
3347 struct igb_rx_queue *rxq;
3348 struct rte_pktmbuf_pool_private *mbp_priv;
3355 PMD_INIT_FUNC_TRACE();
3356 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3358 /* Setup RX queues */
3359 dev->rx_pkt_burst = ixgbe_recv_pkts;
3360 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3361 rxq = dev->data->rx_queues[i];
3363 /* Allocate buffers for descriptor rings */
3364 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
3368 /* Setup the Base and Length of the Rx Descriptor Rings */
3369 bus_addr = rxq->rx_ring_phys_addr;
3371 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
3372 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3373 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
3374 (uint32_t)(bus_addr >> 32));
3375 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
3376 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
3377 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
3378 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
3381 /* Configure the SRRCTL register */
3382 #ifdef RTE_HEADER_SPLIT_ENABLE
3384 * Configure Header Split
3386 if (dev->data->dev_conf.rxmode.header_split) {
3388 /* Must setup the PSRTYPE register */
3390 psrtype = IXGBE_PSRTYPE_TCPHDR |
3391 IXGBE_PSRTYPE_UDPHDR |
3392 IXGBE_PSRTYPE_IPV4HDR |
3393 IXGBE_PSRTYPE_IPV6HDR;
3395 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
3397 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
3398 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
3399 IXGBE_SRRCTL_BSIZEHDR_MASK);
3400 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3403 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
3405 /* Set if packets are dropped when no descriptors available */
3407 srrctl |= IXGBE_SRRCTL_DROP_EN;
3410 * Configure the RX buffer size in the BSIZEPACKET field of
3411 * the SRRCTL register of the queue.
3412 * The value is in 1 KB resolution. Valid values can be from
3415 mbp_priv = (struct rte_pktmbuf_pool_private *)
3416 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
3417 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
3418 RTE_PKTMBUF_HEADROOM);
3419 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
3420 IXGBE_SRRCTL_BSIZEPKT_MASK);
3423 * VF modification to write virtual function SRRCTL register
3425 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
3427 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
3428 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
3429 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
3430 dev->data->scattered_rx = 1;
3431 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
3439 * [VF] Initializes Transmit Unit.
3442 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
3444 struct ixgbe_hw *hw;
3445 struct igb_tx_queue *txq;
3450 PMD_INIT_FUNC_TRACE();
3451 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3453 /* Setup the Base and Length of the Tx Descriptor Rings */
3454 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3455 txq = dev->data->tx_queues[i];
3456 bus_addr = txq->tx_ring_phys_addr;
3457 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
3458 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
3459 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
3460 (uint32_t)(bus_addr >> 32));
3461 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
3462 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3463 /* Setup the HW Tx Head and TX Tail descriptor pointers */
3464 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
3465 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
3468 * Disable Tx Head Writeback RO bit, since this hoses
3469 * bookkeeping if things aren't delivered in order.
3471 txctrl = IXGBE_READ_REG(hw,
3472 IXGBE_VFDCA_TXCTRL(i));
3473 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3474 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
3480 * [VF] Start Transmit and Receive Units.
3483 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
3485 struct ixgbe_hw *hw;
3486 struct igb_tx_queue *txq;
3487 struct igb_rx_queue *rxq;
3493 PMD_INIT_FUNC_TRACE();
3494 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3496 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3497 txq = dev->data->tx_queues[i];
3498 /* Setup Transmit Threshold Registers */
3499 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3500 txdctl |= txq->pthresh & 0x7F;
3501 txdctl |= ((txq->hthresh & 0x7F) << 8);
3502 txdctl |= ((txq->wthresh & 0x7F) << 16);
3503 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
3506 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3508 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3509 txdctl |= IXGBE_TXDCTL_ENABLE;
3510 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
3513 /* Wait until TX Enable ready */
3516 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
3517 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
3519 PMD_INIT_LOG(ERR, "Could not enable "
3520 "Tx Queue %d\n", i);
3522 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3524 rxq = dev->data->rx_queues[i];
3526 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
3527 rxdctl |= IXGBE_RXDCTL_ENABLE;
3528 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
3530 /* Wait until RX Enable ready */
3534 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
3535 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
3537 PMD_INIT_LOG(ERR, "Could not enable "
3538 "Rx Queue %d\n", i);
3540 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);