4 * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
47 #include <rte_byteorder.h>
48 #include <rte_common.h>
49 #include <rte_cycles.h>
51 #include <rte_debug.h>
52 #include <rte_interrupts.h>
54 #include <rte_memory.h>
55 #include <rte_memzone.h>
56 #include <rte_launch.h>
57 #include <rte_tailq.h>
59 #include <rte_per_lcore.h>
60 #include <rte_lcore.h>
61 #include <rte_atomic.h>
62 #include <rte_branch_prediction.h>
64 #include <rte_mempool.h>
65 #include <rte_malloc.h>
67 #include <rte_ether.h>
68 #include <rte_ethdev.h>
69 #include <rte_prefetch.h>
73 #include <rte_string_fns.h>
74 #include <rte_errno.h>
76 #include "ixgbe_logs.h"
77 #include "ixgbe/ixgbe_api.h"
78 #include "ixgbe/ixgbe_vf.h"
79 #include "ixgbe_ethdev.h"
81 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
82 #define RTE_PMD_IXGBE_RX_MAX_BURST 32
85 static inline struct rte_mbuf *
86 rte_rxmbuf_alloc(struct rte_mempool *mp)
90 m = __rte_mbuf_raw_alloc(mp);
91 __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
95 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
96 (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->pkt.data) - \
97 (char *)(mb)->buf_addr))
99 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
100 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
103 * Structure associated with each descriptor of the RX ring of a RX queue.
105 struct igb_rx_entry {
106 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
110 * Structure associated with each descriptor of the TX ring of a TX queue.
112 struct igb_tx_entry {
113 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
114 uint16_t next_id; /**< Index of next descriptor in ring. */
115 uint16_t last_id; /**< Index of last scattered descriptor. */
119 * Structure associated with each RX queue.
121 struct igb_rx_queue {
122 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
123 volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
124 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
125 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
126 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
127 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
128 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
129 uint16_t nb_rx_desc; /**< number of RX descriptors. */
130 uint16_t rx_tail; /**< current value of RDT register. */
131 uint16_t nb_rx_hold; /**< number of held free RX desc. */
132 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
133 uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
134 uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
135 uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
137 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
138 uint16_t queue_id; /**< RX queue index. */
139 uint8_t port_id; /**< Device port identifier. */
140 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
141 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
142 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
143 /** need to alloc dummy mbuf, for wraparound when scanning hw ring */
144 struct rte_mbuf fake_mbuf;
145 /** hold packets to return to application */
146 struct rte_mbuf *rx_stage[RTE_PMD_IXGBE_RX_MAX_BURST*2];
151 * IXGBE CTX Constants
153 enum ixgbe_advctx_num {
154 IXGBE_CTX_0 = 0, /**< CTX0 */
155 IXGBE_CTX_1 = 1, /**< CTX1 */
156 IXGBE_CTX_NUM = 2, /**< CTX NUMBER */
160 * Structure to check if new context need be built
162 struct ixgbe_advctx_info {
163 uint16_t flags; /**< ol_flags for context build. */
164 uint32_t cmp_mask; /**< compare mask for vlan_macip_lens */
165 uint32_t vlan_macip_lens; /**< vlan, mac ip length. */
169 * Structure associated with each TX queue.
171 struct igb_tx_queue {
172 /** TX ring virtual address. */
173 volatile union ixgbe_adv_tx_desc *tx_ring;
174 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
175 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
176 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
177 uint16_t nb_tx_desc; /**< number of TX descriptors. */
178 uint16_t tx_tail; /**< current value of TDT reg. */
179 uint16_t tx_free_thresh;/**< minimum TX before freeing. */
180 /** Number of TX descriptors to use before RS bit is set. */
181 uint16_t tx_rs_thresh;
182 /** Number of TX descriptors used since RS bit was set. */
184 /** Index to last TX descriptor to have been cleaned. */
185 uint16_t last_desc_cleaned;
186 /** Total number of TX descriptors ready to be allocated. */
188 uint16_t queue_id; /**< TX queue index. */
189 uint8_t port_id; /**< Device port identifier. */
190 uint8_t pthresh; /**< Prefetch threshold register. */
191 uint8_t hthresh; /**< Host threshold register. */
192 uint8_t wthresh; /**< Write-back threshold reg. */
193 uint32_t ctx_curr; /**< Hardware context states. */
194 /** Hardware context0 history. */
195 struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];
200 #define RTE_PMD_USE_PREFETCH
203 #ifdef RTE_PMD_USE_PREFETCH
205 * Prefetch a cache line into all cache levels.
207 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
209 #define rte_ixgbe_prefetch(p) do {} while(0)
212 #ifdef RTE_PMD_PACKET_PREFETCH
213 #define rte_packet_prefetch(p) rte_prefetch1(p)
215 #define rte_packet_prefetch(p) do {} while(0)
218 /*********************************************************************
222 **********************************************************************/
224 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
225 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
226 uint16_t ol_flags, uint32_t vlan_macip_lens)
228 uint32_t type_tucmd_mlhl;
229 uint32_t mss_l4len_idx;
233 ctx_idx = txq->ctx_curr;
237 if (ol_flags & PKT_TX_VLAN_PKT) {
238 cmp_mask |= TX_VLAN_CMP_MASK;
241 if (ol_flags & PKT_TX_IP_CKSUM) {
242 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
243 cmp_mask |= TX_MAC_LEN_CMP_MASK;
246 /* Specify which HW CTX to upload. */
247 mss_l4len_idx = (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
248 switch (ol_flags & PKT_TX_L4_MASK) {
249 case PKT_TX_UDP_CKSUM:
250 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
251 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
252 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
253 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
255 case PKT_TX_TCP_CKSUM:
256 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
257 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
258 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
259 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
261 case PKT_TX_SCTP_CKSUM:
262 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
263 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
264 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
265 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
268 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
269 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
273 txq->ctx_cache[ctx_idx].flags = ol_flags;
274 txq->ctx_cache[ctx_idx].cmp_mask = cmp_mask;
275 txq->ctx_cache[ctx_idx].vlan_macip_lens = vlan_macip_lens & cmp_mask;
277 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
278 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
279 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
280 ctx_txd->seqnum_seed = 0;
284 * Check which hardware context can be used. Use the existing match
285 * or create a new context descriptor.
287 static inline uint32_t
288 what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
289 uint32_t vlan_macip_lens)
291 /* If match with the current used context */
292 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
293 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens ==
294 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
295 return txq->ctx_curr;
298 /* What if match with the next context */
300 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
301 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens ==
302 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
303 return txq->ctx_curr;
306 /* Mismatch, use the previous context */
307 return (IXGBE_CTX_NUM);
310 static inline uint32_t
311 tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
313 static const uint32_t l4_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_TXSM};
314 static const uint32_t l3_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_IXSM};
317 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
318 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
322 static inline uint32_t
323 tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
325 static const uint32_t vlan_cmd[2] = {0, IXGBE_ADVTXD_DCMD_VLE};
326 return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
329 /* Default RS bit threshold values */
330 #ifndef DEFAULT_TX_RS_THRESH
331 #define DEFAULT_TX_RS_THRESH 32
333 #ifndef DEFAULT_TX_FREE_THRESH
334 #define DEFAULT_TX_FREE_THRESH 32
337 /* Reset transmit descriptors after they have been used */
339 ixgbe_xmit_cleanup(struct igb_tx_queue *txq)
341 struct igb_tx_entry *sw_ring = txq->sw_ring;
342 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
343 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
344 uint16_t nb_tx_desc = txq->nb_tx_desc;
345 uint16_t desc_to_clean_to;
346 uint16_t nb_tx_to_clean;
348 /* Determine the last descriptor needing to be cleaned */
349 desc_to_clean_to = last_desc_cleaned + txq->tx_rs_thresh;
350 if (desc_to_clean_to >= nb_tx_desc)
351 desc_to_clean_to = desc_to_clean_to - nb_tx_desc;
353 /* Check to make sure the last descriptor to clean is done */
354 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
355 if (! (txr[desc_to_clean_to].wb.status & IXGBE_TXD_STAT_DD))
357 PMD_TX_FREE_LOG(DEBUG,
358 "TX descriptor %4u is not done"
359 "(port=%d queue=%d)",
361 txq->port_id, txq->queue_id);
362 /* Failed to clean any descriptors, better luck next time */
366 /* Figure out how many descriptors will be cleaned */
367 if (last_desc_cleaned > desc_to_clean_to)
368 nb_tx_to_clean = ((nb_tx_desc - last_desc_cleaned) +
371 nb_tx_to_clean = desc_to_clean_to - last_desc_cleaned;
373 PMD_TX_FREE_LOG(DEBUG,
374 "Cleaning %4u TX descriptors: %4u to %4u "
375 "(port=%d queue=%d)",
376 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
377 txq->port_id, txq->queue_id);
380 * The last descriptor to clean is done, so that means all the
381 * descriptors from the last descriptor that was cleaned
382 * up to the last descriptor with the RS bit set
383 * are done. Only reset the threshold descriptor.
385 txr[desc_to_clean_to].wb.status = 0;
387 /* Update the txq to reflect the last descriptor that was cleaned */
388 txq->last_desc_cleaned = desc_to_clean_to;
389 txq->nb_tx_free += nb_tx_to_clean;
396 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
399 struct igb_tx_queue *txq;
400 struct igb_tx_entry *sw_ring;
401 struct igb_tx_entry *txe, *txn;
402 volatile union ixgbe_adv_tx_desc *txr;
403 volatile union ixgbe_adv_tx_desc *txd;
404 struct rte_mbuf *tx_pkt;
405 struct rte_mbuf *m_seg;
406 uint64_t buf_dma_addr;
407 uint32_t olinfo_status;
408 uint32_t cmd_type_len;
417 uint32_t vlan_macip_lens;
422 sw_ring = txq->sw_ring;
424 tx_id = txq->tx_tail;
425 txe = &sw_ring[tx_id];
427 /* Determine if the descriptor ring needs to be cleaned. */
428 if ((txq->nb_tx_desc - txq->nb_tx_free) > txq->tx_free_thresh) {
429 ixgbe_xmit_cleanup(txq);
433 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
436 pkt_len = tx_pkt->pkt.pkt_len;
438 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
441 * Determine how many (if any) context descriptors
442 * are needed for offload functionality.
444 ol_flags = tx_pkt->ol_flags;
445 vlan_macip_lens = tx_pkt->pkt.vlan_tci << 16 |
446 tx_pkt->pkt.l2_len << IXGBE_ADVTXD_MACLEN_SHIFT |
449 /* If hardware offload required */
450 tx_ol_req = ol_flags & PKT_TX_OFFLOAD_MASK;
452 /* If new context need be built or reuse the exist ctx. */
453 ctx = what_advctx_update(txq, tx_ol_req, vlan_macip_lens);
454 /* Only allocate context descriptor if required*/
455 new_ctx = (ctx == IXGBE_CTX_NUM);
460 * Keep track of how many descriptors are used this loop
461 * This will always be the number of segments + the number of
462 * Context descriptors required to transmit the packet
464 nb_used = tx_pkt->pkt.nb_segs + new_ctx;
467 * The number of descriptors that must be allocated for a
468 * packet is the number of segments of that packet, plus 1
469 * Context Descriptor for the hardware offload, if any.
470 * Determine the last TX descriptor to allocate in the TX ring
471 * for the packet, starting from the current position (tx_id)
474 tx_last = (uint16_t) (tx_id + nb_used - 1);
477 if (tx_last >= txq->nb_tx_desc)
478 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
480 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
481 " tx_first=%u tx_last=%u\n",
482 (unsigned) txq->port_id,
483 (unsigned) txq->queue_id,
489 * Make sure there are enough TX descriptors available to
490 * transmit the entire packet.
491 * nb_used better be less than or equal to txq->tx_rs_thresh
493 if (nb_used > txq->nb_tx_free) {
494 PMD_TX_FREE_LOG(DEBUG,
495 "Not enough free TX descriptors "
496 "nb_used=%4u nb_free=%4u "
497 "(port=%d queue=%d)",
498 nb_used, txq->nb_tx_free,
499 txq->port_id, txq->queue_id);
501 if (ixgbe_xmit_cleanup(txq) != 0) {
502 /* Could not clean any descriptors */
508 /* nb_used better be <= txq->tx_rs_thresh */
509 if (unlikely(nb_used > txq->tx_rs_thresh)) {
510 PMD_TX_FREE_LOG(DEBUG,
511 "The number of descriptors needed to "
512 "transmit the packet exceeds the "
513 "RS bit threshold. This will impact "
515 "nb_used=%4u nb_free=%4u "
517 "(port=%d queue=%d)",
518 nb_used, txq->nb_tx_free,
520 txq->port_id, txq->queue_id);
522 * Loop here until there are enough TX
523 * descriptors or until the ring cannot be
526 while (nb_used > txq->nb_tx_free) {
527 if (ixgbe_xmit_cleanup(txq) != 0) {
529 * Could not clean any
541 * By now there are enough free TX descriptors to transmit
546 * Set common flags of all TX Data Descriptors.
548 * The following bits must be set in all Data Descriptors:
549 * - IXGBE_ADVTXD_DTYP_DATA
550 * - IXGBE_ADVTXD_DCMD_DEXT
552 * The following bits must be set in the first Data Descriptor
553 * and are ignored in the other ones:
554 * - IXGBE_ADVTXD_DCMD_IFCS
555 * - IXGBE_ADVTXD_MAC_1588
556 * - IXGBE_ADVTXD_DCMD_VLE
558 * The following bits must only be set in the last Data
560 * - IXGBE_TXD_CMD_EOP
562 * The following bits can be set in any Data Descriptor, but
563 * are only set in the last Data Descriptor:
566 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
567 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
568 olinfo_status = (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
569 #ifdef RTE_LIBRTE_IEEE1588
570 if (ol_flags & PKT_TX_IEEE1588_TMST)
571 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
576 * Setup the TX Advanced Context Descriptor if required
579 volatile struct ixgbe_adv_tx_context_desc *
582 ctx_txd = (volatile struct
583 ixgbe_adv_tx_context_desc *)
586 txn = &sw_ring[txe->next_id];
587 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
589 if (txe->mbuf != NULL) {
590 rte_pktmbuf_free_seg(txe->mbuf);
594 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
597 txe->last_id = tx_last;
598 tx_id = txe->next_id;
603 * Setup the TX Advanced Data Descriptor,
604 * This path will go through
605 * whatever new/reuse the context descriptor
607 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
608 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
609 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
615 txn = &sw_ring[txe->next_id];
617 if (txe->mbuf != NULL)
618 rte_pktmbuf_free_seg(txe->mbuf);
622 * Set up Transmit Data Descriptor.
624 slen = m_seg->pkt.data_len;
625 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
626 txd->read.buffer_addr =
627 rte_cpu_to_le_64(buf_dma_addr);
628 txd->read.cmd_type_len =
629 rte_cpu_to_le_32(cmd_type_len | slen);
630 txd->read.olinfo_status =
631 rte_cpu_to_le_32(olinfo_status);
632 txe->last_id = tx_last;
633 tx_id = txe->next_id;
635 m_seg = m_seg->pkt.next;
636 } while (m_seg != NULL);
639 * The last packet data descriptor needs End Of Packet (EOP)
641 cmd_type_len |= IXGBE_TXD_CMD_EOP;
642 txq->nb_tx_used += nb_used;
643 txq->nb_tx_free -= nb_used;
645 /* Set RS bit only on threshold packets' last descriptor */
646 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
647 PMD_TX_FREE_LOG(DEBUG,
648 "Setting RS bit on TXD id="
649 "%4u (port=%d queue=%d)",
650 tx_last, txq->port_id, txq->queue_id);
652 cmd_type_len |= IXGBE_TXD_CMD_RS;
654 /* Update txq RS bit counters */
657 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
663 * Set the Transmit Descriptor Tail (TDT)
665 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
666 (unsigned) txq->port_id, (unsigned) txq->queue_id,
667 (unsigned) tx_id, (unsigned) nb_tx);
668 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
669 txq->tx_tail = tx_id;
674 /*********************************************************************
678 **********************************************************************/
679 static inline uint16_t
680 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
684 static uint16_t ip_pkt_types_map[16] = {
685 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
686 PKT_RX_IPV6_HDR, 0, 0, 0,
687 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
688 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
691 static uint16_t ip_rss_types_map[16] = {
692 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
693 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
694 PKT_RX_RSS_HASH, 0, 0, 0,
695 0, 0, 0, PKT_RX_FDIR,
698 #ifdef RTE_LIBRTE_IEEE1588
699 static uint32_t ip_pkt_etqf_map[8] = {
700 0, 0, 0, PKT_RX_IEEE1588_PTP,
704 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ?
705 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
706 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
708 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ? 0 :
709 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
712 return (pkt_flags | ip_rss_types_map[hl_tp_rs & 0xF]);
715 static inline uint16_t
716 rx_desc_status_to_pkt_flags(uint32_t rx_status)
721 * Check if VLAN present only.
722 * Do not check whether L3/L4 rx checksum done by NIC or not,
723 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
725 pkt_flags = (uint16_t) (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
727 #ifdef RTE_LIBRTE_IEEE1588
728 if (rx_status & IXGBE_RXD_STAT_TMST)
729 pkt_flags = (pkt_flags | PKT_RX_IEEE1588_TMST);
734 static inline uint16_t
735 rx_desc_error_to_pkt_flags(uint32_t rx_status)
738 * Bit 31: IPE, IPv4 checksum error
739 * Bit 30: L4I, L4I integrity error
741 static uint16_t error_to_pkt_flags_map[4] = {
742 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
743 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
745 return error_to_pkt_flags_map[(rx_status >>
746 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
749 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
751 * LOOK_AHEAD defines how many desc statuses to check beyond the
752 * current descriptor.
753 * It must be a pound define for optimal performance.
754 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
755 * function only works with LOOK_AHEAD=8.
758 #if (LOOK_AHEAD != 8)
759 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
762 ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq)
764 volatile union ixgbe_adv_rx_desc *rxdp;
765 struct igb_rx_entry *rxep;
768 int s[LOOK_AHEAD], nb_dd;
772 /* get references to current descriptor and S/W ring entry */
773 rxdp = &rxq->rx_ring[rxq->rx_tail];
774 rxep = &rxq->sw_ring[rxq->rx_tail];
776 /* check to make sure there is at least 1 packet to receive */
777 if (! (rxdp->wb.upper.status_error & IXGBE_RXDADV_STAT_DD))
781 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
782 * reference packets that are ready to be received.
784 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
785 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD)
787 /* Read desc statuses backwards to avoid race condition */
788 for (j = LOOK_AHEAD-1; j >= 0; --j)
789 s[j] = rxdp[j].wb.upper.status_error;
791 /* Clear everything but the status bits (LSB) */
792 for (j = 0; j < LOOK_AHEAD; ++j)
793 s[j] &= IXGBE_RXDADV_STAT_DD;
795 /* Compute how many status bits were set */
796 nb_dd = s[0]+s[1]+s[2]+s[3]+s[4]+s[5]+s[6]+s[7];
799 /* Translate descriptor info to mbuf format */
800 for (j = 0; j < nb_dd; ++j) {
802 pkt_len = rxdp[j].wb.upper.length - rxq->crc_len;
803 mb->pkt.data_len = pkt_len;
804 mb->pkt.pkt_len = pkt_len;
805 mb->pkt.vlan_macip.f.vlan_tci = rxdp[j].wb.upper.vlan;
806 mb->pkt.hash.rss = rxdp[j].wb.lower.hi_dword.rss;
808 /* convert descriptor fields to rte mbuf flags */
809 mb->ol_flags = rx_desc_hlen_type_rss_to_pkt_flags(
810 rxdp[j].wb.lower.lo_dword.data);
811 /* reuse status field from scan list */
812 mb->ol_flags |= rx_desc_status_to_pkt_flags(s[j]);
813 mb->ol_flags |= rx_desc_error_to_pkt_flags(s[j]);
816 /* Move mbuf pointers from the S/W ring to the stage */
817 for (j = 0; j < LOOK_AHEAD; ++j) {
818 rxq->rx_stage[i + j] = rxep[j].mbuf;
821 /* stop if all requested packets could not be received */
822 if (nb_dd != LOOK_AHEAD)
826 /* clear software ring entries so we can cleanup correctly */
827 for (i = 0; i < nb_rx; ++i)
828 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
834 ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq)
836 volatile union ixgbe_adv_rx_desc *rxdp;
837 struct igb_rx_entry *rxep;
843 /* allocate buffers in bulk directly into the S/W ring */
844 alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
845 rxep = &rxq->sw_ring[alloc_idx];
846 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
847 rxq->rx_free_thresh);
848 if (unlikely(diag != 0))
851 rxdp = &rxq->rx_ring[alloc_idx];
852 for (i = 0; i < rxq->rx_free_thresh; ++i) {
853 /* populate the static rte mbuf fields */
855 rte_mbuf_refcnt_set(mb, 1);
856 mb->type = RTE_MBUF_PKT;
858 mb->pkt.data = (char *)mb->buf_addr + RTE_PKTMBUF_HEADROOM;
860 mb->pkt.in_port = rxq->port_id;
862 /* populate the descriptors */
863 dma_addr = (uint64_t)mb->buf_physaddr + RTE_PKTMBUF_HEADROOM;
864 rxdp[i].read.hdr_addr = dma_addr;
865 rxdp[i].read.pkt_addr = dma_addr;
868 /* update tail pointer */
870 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rxq->rx_free_trigger);
872 /* update state of internal queue structure */
873 rxq->rx_free_trigger += rxq->rx_free_thresh;
874 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
875 rxq->rx_free_trigger = (rxq->rx_free_thresh - 1);
881 static inline uint16_t
882 ixgbe_rx_fill_from_stage(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
885 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
888 /* how many packets are ready to return? */
889 nb_pkts = RTE_MIN(nb_pkts, rxq->rx_nb_avail);
891 /* copy mbuf pointers to the application's packet list */
892 for (i = 0; i < nb_pkts; ++i)
893 rx_pkts[i] = stage[i];
895 /* update internal queue state */
896 rxq->rx_nb_avail -= nb_pkts;
897 rxq->rx_next_avail += nb_pkts;
902 static inline uint16_t
903 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
906 struct igb_rx_queue *rxq = (struct igb_rx_queue *)rx_queue;
909 /* Any previously recv'd pkts will be returned from the Rx stage */
910 if (rxq->rx_nb_avail)
911 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
913 /* Scan the H/W ring for packets to receive */
914 nb_rx = ixgbe_rx_scan_hw_ring(rxq);
916 /* update internal queue state */
917 rxq->rx_next_avail = 0;
918 rxq->rx_nb_avail = nb_rx;
919 rxq->rx_tail += nb_rx;
921 /* if required, allocate new buffers to replenish descriptors */
922 if (rxq->rx_tail > rxq->rx_free_trigger) {
923 if (ixgbe_rx_alloc_bufs(rxq) != 0) {
925 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
926 "queue_id=%u\n", (unsigned) rxq->port_id,
927 (unsigned) rxq->queue_id);
929 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
933 * Need to rewind any previous receives if we cannot
934 * allocate new buffers to replenish the old ones.
936 rxq->rx_nb_avail = 0;
937 rxq->rx_tail -= nb_rx;
938 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
939 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
945 if (rxq->rx_tail >= rxq->nb_rx_desc)
948 /* received any packets this loop? */
949 if (rxq->rx_nb_avail)
950 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
955 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
957 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
962 if (unlikely(nb_pkts == 0))
965 if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
966 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
968 /* request is relatively large, chunk it up */
972 n = RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
973 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
982 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
985 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
988 struct igb_rx_queue *rxq;
989 volatile union ixgbe_adv_rx_desc *rx_ring;
990 volatile union ixgbe_adv_rx_desc *rxdp;
991 struct igb_rx_entry *sw_ring;
992 struct igb_rx_entry *rxe;
993 struct rte_mbuf *rxm;
994 struct rte_mbuf *nmb;
995 union ixgbe_adv_rx_desc rxd;
998 uint32_t hlen_type_rss;
1008 rx_id = rxq->rx_tail;
1009 rx_ring = rxq->rx_ring;
1010 sw_ring = rxq->sw_ring;
1011 while (nb_rx < nb_pkts) {
1013 * The order of operations here is important as the DD status
1014 * bit must not be read after any other descriptor fields.
1015 * rx_ring and rxdp are pointing to volatile data so the order
1016 * of accesses cannot be reordered by the compiler. If they were
1017 * not volatile, they could be reordered which could lead to
1018 * using invalid descriptor fields when read from rxd.
1020 rxdp = &rx_ring[rx_id];
1021 staterr = rxdp->wb.upper.status_error;
1022 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1029 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1030 * is likely to be invalid and to be dropped by the various
1031 * validation checks performed by the network stack.
1033 * Allocate a new mbuf to replenish the RX ring descriptor.
1034 * If the allocation fails:
1035 * - arrange for that RX descriptor to be the first one
1036 * being parsed the next time the receive function is
1037 * invoked [on the same queue].
1039 * - Stop parsing the RX ring and return immediately.
1041 * This policy do not drop the packet received in the RX
1042 * descriptor for which the allocation of a new mbuf failed.
1043 * Thus, it allows that packet to be later retrieved if
1044 * mbuf have been freed in the mean time.
1045 * As a side effect, holding RX descriptors instead of
1046 * systematically giving them back to the NIC may lead to
1047 * RX ring exhaustion situations.
1048 * However, the NIC can gracefully prevent such situations
1049 * to happen by sending specific "back-pressure" flow control
1050 * frames to its peer(s).
1052 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1053 "ext_err_stat=0x%08x pkt_len=%u\n",
1054 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1055 (unsigned) rx_id, (unsigned) staterr,
1056 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1058 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1060 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1061 "queue_id=%u\n", (unsigned) rxq->port_id,
1062 (unsigned) rxq->queue_id);
1063 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1068 rxe = &sw_ring[rx_id];
1070 if (rx_id == rxq->nb_rx_desc)
1073 /* Prefetch next mbuf while processing current one. */
1074 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1077 * When next RX descriptor is on a cache-line boundary,
1078 * prefetch the next 4 RX descriptors and the next 8 pointers
1081 if ((rx_id & 0x3) == 0) {
1082 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1083 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1089 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1090 rxdp->read.hdr_addr = dma_addr;
1091 rxdp->read.pkt_addr = dma_addr;
1094 * Initialize the returned mbuf.
1095 * 1) setup generic mbuf fields:
1096 * - number of segments,
1099 * - RX port identifier.
1100 * 2) integrate hardware offload data, if any:
1101 * - RSS flag & hash,
1102 * - IP checksum flag,
1103 * - VLAN TCI, if any,
1106 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1108 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1109 rte_packet_prefetch(rxm->pkt.data);
1110 rxm->pkt.nb_segs = 1;
1111 rxm->pkt.next = NULL;
1112 rxm->pkt.pkt_len = pkt_len;
1113 rxm->pkt.data_len = pkt_len;
1114 rxm->pkt.in_port = rxq->port_id;
1116 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1117 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1118 rxm->pkt.vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1120 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1121 pkt_flags = (pkt_flags | rx_desc_status_to_pkt_flags(staterr));
1122 pkt_flags = (pkt_flags | rx_desc_error_to_pkt_flags(staterr));
1123 rxm->ol_flags = pkt_flags;
1125 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1126 rxm->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1127 else if (pkt_flags & PKT_RX_FDIR) {
1128 rxm->pkt.hash.fdir.hash =
1129 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1130 & IXGBE_ATR_HASH_MASK);
1131 rxm->pkt.hash.fdir.id = rxd.wb.lower.hi_dword.csum_ip.ip_id;
1134 * Store the mbuf address into the next entry of the array
1135 * of returned packets.
1137 rx_pkts[nb_rx++] = rxm;
1139 rxq->rx_tail = rx_id;
1142 * If the number of free RX descriptors is greater than the RX free
1143 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1145 * Update the RDT with the value of the last processed RX descriptor
1146 * minus 1, to guarantee that the RDT register is never equal to the
1147 * RDH register, which creates a "full" ring situtation from the
1148 * hardware point of view...
1150 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1151 if (nb_hold > rxq->rx_free_thresh) {
1152 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1153 "nb_hold=%u nb_rx=%u\n",
1154 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1155 (unsigned) rx_id, (unsigned) nb_hold,
1157 rx_id = (uint16_t) ((rx_id == 0) ?
1158 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1159 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1162 rxq->nb_rx_hold = nb_hold;
1167 ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1170 struct igb_rx_queue *rxq;
1171 volatile union ixgbe_adv_rx_desc *rx_ring;
1172 volatile union ixgbe_adv_rx_desc *rxdp;
1173 struct igb_rx_entry *sw_ring;
1174 struct igb_rx_entry *rxe;
1175 struct rte_mbuf *first_seg;
1176 struct rte_mbuf *last_seg;
1177 struct rte_mbuf *rxm;
1178 struct rte_mbuf *nmb;
1179 union ixgbe_adv_rx_desc rxd;
1180 uint64_t dma; /* Physical address of mbuf data buffer */
1182 uint32_t hlen_type_rss;
1192 rx_id = rxq->rx_tail;
1193 rx_ring = rxq->rx_ring;
1194 sw_ring = rxq->sw_ring;
1197 * Retrieve RX context of current packet, if any.
1199 first_seg = rxq->pkt_first_seg;
1200 last_seg = rxq->pkt_last_seg;
1202 while (nb_rx < nb_pkts) {
1205 * The order of operations here is important as the DD status
1206 * bit must not be read after any other descriptor fields.
1207 * rx_ring and rxdp are pointing to volatile data so the order
1208 * of accesses cannot be reordered by the compiler. If they were
1209 * not volatile, they could be reordered which could lead to
1210 * using invalid descriptor fields when read from rxd.
1212 rxdp = &rx_ring[rx_id];
1213 staterr = rxdp->wb.upper.status_error;
1214 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1221 * Allocate a new mbuf to replenish the RX ring descriptor.
1222 * If the allocation fails:
1223 * - arrange for that RX descriptor to be the first one
1224 * being parsed the next time the receive function is
1225 * invoked [on the same queue].
1227 * - Stop parsing the RX ring and return immediately.
1229 * This policy does not drop the packet received in the RX
1230 * descriptor for which the allocation of a new mbuf failed.
1231 * Thus, it allows that packet to be later retrieved if
1232 * mbuf have been freed in the mean time.
1233 * As a side effect, holding RX descriptors instead of
1234 * systematically giving them back to the NIC may lead to
1235 * RX ring exhaustion situations.
1236 * However, the NIC can gracefully prevent such situations
1237 * to happen by sending specific "back-pressure" flow control
1238 * frames to its peer(s).
1240 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
1241 "staterr=0x%x data_len=%u\n",
1242 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1243 (unsigned) rx_id, (unsigned) staterr,
1244 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1246 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
1248 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1249 "queue_id=%u\n", (unsigned) rxq->port_id,
1250 (unsigned) rxq->queue_id);
1251 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1256 rxe = &sw_ring[rx_id];
1258 if (rx_id == rxq->nb_rx_desc)
1261 /* Prefetch next mbuf while processing current one. */
1262 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1265 * When next RX descriptor is on a cache-line boundary,
1266 * prefetch the next 4 RX descriptors and the next 8 pointers
1269 if ((rx_id & 0x3) == 0) {
1270 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1271 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1275 * Update RX descriptor with the physical address of the new
1276 * data buffer of the new allocated mbuf.
1280 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1281 rxdp->read.hdr_addr = dma;
1282 rxdp->read.pkt_addr = dma;
1285 * Set data length & data buffer address of mbuf.
1287 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1288 rxm->pkt.data_len = data_len;
1289 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1292 * If this is the first buffer of the received packet,
1293 * set the pointer to the first mbuf of the packet and
1294 * initialize its context.
1295 * Otherwise, update the total length and the number of segments
1296 * of the current scattered packet, and update the pointer to
1297 * the last mbuf of the current packet.
1299 if (first_seg == NULL) {
1301 first_seg->pkt.pkt_len = data_len;
1302 first_seg->pkt.nb_segs = 1;
1304 first_seg->pkt.pkt_len = (uint16_t)(first_seg->pkt.pkt_len
1306 first_seg->pkt.nb_segs++;
1307 last_seg->pkt.next = rxm;
1311 * If this is not the last buffer of the received packet,
1312 * update the pointer to the last mbuf of the current scattered
1313 * packet and continue to parse the RX ring.
1315 if (! (staterr & IXGBE_RXDADV_STAT_EOP)) {
1321 * This is the last buffer of the received packet.
1322 * If the CRC is not stripped by the hardware:
1323 * - Subtract the CRC length from the total packet length.
1324 * - If the last buffer only contains the whole CRC or a part
1325 * of it, free the mbuf associated to the last buffer.
1326 * If part of the CRC is also contained in the previous
1327 * mbuf, subtract the length of that CRC part from the
1328 * data length of the previous mbuf.
1330 rxm->pkt.next = NULL;
1331 if (unlikely(rxq->crc_len > 0)) {
1332 first_seg->pkt.pkt_len -= ETHER_CRC_LEN;
1333 if (data_len <= ETHER_CRC_LEN) {
1334 rte_pktmbuf_free_seg(rxm);
1335 first_seg->pkt.nb_segs--;
1336 last_seg->pkt.data_len = (uint16_t)
1337 (last_seg->pkt.data_len -
1338 (ETHER_CRC_LEN - data_len));
1339 last_seg->pkt.next = NULL;
1342 (uint16_t) (data_len - ETHER_CRC_LEN);
1346 * Initialize the first mbuf of the returned packet:
1347 * - RX port identifier,
1348 * - hardware offload data, if any:
1349 * - RSS flag & hash,
1350 * - IP checksum flag,
1351 * - VLAN TCI, if any,
1354 first_seg->pkt.in_port = rxq->port_id;
1357 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1358 * set in the pkt_flags field.
1360 first_seg->pkt.vlan_tci =
1361 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1362 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1363 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1364 pkt_flags = (pkt_flags |
1365 rx_desc_status_to_pkt_flags(staterr));
1366 pkt_flags = (pkt_flags |
1367 rx_desc_error_to_pkt_flags(staterr));
1368 first_seg->ol_flags = pkt_flags;
1370 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1371 first_seg->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1372 else if (pkt_flags & PKT_RX_FDIR) {
1373 first_seg->pkt.hash.fdir.hash =
1374 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1375 & IXGBE_ATR_HASH_MASK);
1376 first_seg->pkt.hash.fdir.id =
1377 rxd.wb.lower.hi_dword.csum_ip.ip_id;
1380 /* Prefetch data of first segment, if configured to do so. */
1381 rte_packet_prefetch(first_seg->pkt.data);
1384 * Store the mbuf address into the next entry of the array
1385 * of returned packets.
1387 rx_pkts[nb_rx++] = first_seg;
1390 * Setup receipt context for a new packet.
1396 * Record index of the next RX descriptor to probe.
1398 rxq->rx_tail = rx_id;
1401 * Save receive context.
1403 rxq->pkt_first_seg = first_seg;
1404 rxq->pkt_last_seg = last_seg;
1407 * If the number of free RX descriptors is greater than the RX free
1408 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1410 * Update the RDT with the value of the last processed RX descriptor
1411 * minus 1, to guarantee that the RDT register is never equal to the
1412 * RDH register, which creates a "full" ring situtation from the
1413 * hardware point of view...
1415 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1416 if (nb_hold > rxq->rx_free_thresh) {
1417 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1418 "nb_hold=%u nb_rx=%u\n",
1419 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1420 (unsigned) rx_id, (unsigned) nb_hold,
1422 rx_id = (uint16_t) ((rx_id == 0) ?
1423 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1424 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1427 rxq->nb_rx_hold = nb_hold;
1431 /*********************************************************************
1433 * Queue management functions
1435 **********************************************************************/
1438 * Rings setup and release.
1440 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1441 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
1442 * also optimize cache line size effect. H/W supports up to cache line size 128.
1444 #define IXGBE_ALIGN 128
1447 * Maximum number of Ring Descriptors.
1449 * Since RDLEN/TDLEN should be multiple of 128 bytes, the number of ring
1450 * descriptors should meet the following condition:
1451 * (num_ring_desc * sizeof(rx/tx descriptor)) % 128 == 0
1453 #define IXGBE_MIN_RING_DESC 64
1454 #define IXGBE_MAX_RING_DESC 4096
1457 * Create memzone for HW rings. malloc can't be used as the physical address is
1458 * needed. If the memzone is already created, then this function returns a ptr
1461 static const struct rte_memzone *
1462 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1463 uint16_t queue_id, uint32_t ring_size, int socket_id)
1465 char z_name[RTE_MEMZONE_NAMESIZE];
1466 const struct rte_memzone *mz;
1468 rte_snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1469 dev->driver->pci_drv.name, ring_name,
1470 dev->data->port_id, queue_id);
1472 mz = rte_memzone_lookup(z_name);
1476 return rte_memzone_reserve_aligned(z_name, (uint64_t) ring_size,
1477 socket_id, 0, IXGBE_ALIGN);
1481 ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1485 if (txq->sw_ring != NULL) {
1486 for (i = 0; i < txq->nb_tx_desc; i++) {
1487 if (txq->sw_ring[i].mbuf != NULL) {
1488 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1489 txq->sw_ring[i].mbuf = NULL;
1496 ixgbe_tx_queue_release(struct igb_tx_queue *txq)
1499 ixgbe_tx_queue_release_mbufs(txq);
1500 rte_free(txq->sw_ring);
1506 ixgbe_dev_tx_queue_release(void *txq)
1508 ixgbe_tx_queue_release(txq);
1511 /* (Re)set dynamic igb_tx_queue fields to defaults */
1513 ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
1515 struct igb_tx_entry *txe = txq->sw_ring;
1518 /* Zero out HW ring memory */
1519 for (i = 0; i < sizeof(union ixgbe_adv_tx_desc) * txq->nb_tx_desc; i++) {
1520 ((volatile char *)txq->tx_ring)[i] = 0;
1523 /* Initialize SW ring entries */
1524 prev = (uint16_t) (txq->nb_tx_desc - 1);
1525 for (i = 0; i < txq->nb_tx_desc; i++) {
1526 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1527 txd->wb.status = IXGBE_TXD_STAT_DD;
1530 txe[prev].next_id = i;
1535 txq->nb_tx_used = 0;
1537 * Always allow 1 descriptor to be un-allocated to avoid
1538 * a H/W race condition
1540 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1541 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1543 memset((void*)&txq->ctx_cache, 0,
1544 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1548 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1551 unsigned int socket_id,
1552 const struct rte_eth_txconf *tx_conf)
1554 const struct rte_memzone *tz;
1555 struct igb_tx_queue *txq;
1556 struct ixgbe_hw *hw;
1557 uint16_t tx_rs_thresh, tx_free_thresh;
1559 PMD_INIT_FUNC_TRACE();
1560 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1563 * Validate number of transmit descriptors.
1564 * It must not exceed hardware maximum, and must be multiple
1567 if (((nb_desc * sizeof(union ixgbe_adv_tx_desc)) % IXGBE_ALIGN) != 0 ||
1568 (nb_desc > IXGBE_MAX_RING_DESC) ||
1569 (nb_desc < IXGBE_MIN_RING_DESC)) {
1574 * The following two parameters control the setting of the RS bit on
1575 * transmit descriptors.
1576 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
1577 * descriptors have been used.
1578 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
1579 * descriptors are used or if the number of descriptors required
1580 * to transmit a packet is greater than the number of free TX
1582 * The following constraints must be satisfied:
1583 * tx_rs_thresh must be greater than 0.
1584 * tx_rs_thresh must be less than the size of the ring minus 2.
1585 * tx_rs_thresh must be less than or equal to tx_free_thresh.
1586 * tx_free_thresh must be greater than 0.
1587 * tx_free_thresh must be less than the size of the ring minus 3.
1588 * One descriptor in the TX ring is used as a sentinel to avoid a
1589 * H/W race condition, hence the maximum threshold constraints.
1590 * When set to zero use default values.
1592 tx_rs_thresh = (tx_conf->tx_rs_thresh) ?
1593 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH;
1594 tx_free_thresh = (tx_conf->tx_free_thresh) ?
1595 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH;
1596 if (tx_rs_thresh >= (nb_desc - 2)) {
1598 "tx_rs_thresh must be less than the "
1599 "number of TX descriptors minus 2. "
1600 "(tx_rs_thresh=%u port=%d queue=%d)",
1601 tx_rs_thresh, dev->data->port_id, queue_idx);
1604 if (tx_free_thresh >= (nb_desc - 3)) {
1606 "tx_rs_thresh must be less than the "
1607 "tx_free_thresh must be less than the "
1608 "number of TX descriptors minus 3. "
1609 "(tx_free_thresh=%u port=%d queue=%d)",
1610 tx_free_thresh, dev->data->port_id, queue_idx);
1613 if (tx_rs_thresh > tx_free_thresh) {
1615 "tx_rs_thresh must be less than or equal to "
1617 "(tx_free_thresh=%u tx_rs_thresh=%u "
1618 "port=%d queue=%d)",
1619 tx_free_thresh, tx_rs_thresh,
1620 dev->data->port_id, queue_idx);
1625 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
1626 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
1627 * by the NIC and all descriptors are written back after the NIC
1628 * accumulates WTHRESH descriptors.
1630 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
1632 "TX WTHRESH should be set to 0 if "
1633 "tx_rs_thresh is greater than 1. "
1634 "TX WTHRESH will be set to 0. "
1635 "(tx_rs_thresh=%u port=%d queue=%d)",
1637 dev->data->port_id, queue_idx);
1641 /* Free memory prior to re-allocation if needed... */
1642 if (dev->data->tx_queues[queue_idx] != NULL)
1643 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
1645 /* First allocate the tx queue data structure */
1646 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1652 * Allocate TX ring hardware descriptors. A memzone large enough to
1653 * handle the maximum ring size is allocated in order to allow for
1654 * resizing in later calls to the queue setup function.
1656 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1657 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
1660 ixgbe_tx_queue_release(txq);
1664 txq->nb_tx_desc = nb_desc;
1665 txq->tx_rs_thresh = tx_rs_thresh;
1666 txq->tx_free_thresh = tx_free_thresh;
1667 txq->pthresh = tx_conf->tx_thresh.pthresh;
1668 txq->hthresh = tx_conf->tx_thresh.hthresh;
1669 txq->wthresh = tx_conf->tx_thresh.wthresh;
1670 txq->queue_id = queue_idx;
1671 txq->port_id = dev->data->port_id;
1674 * Modification to set VFTDT for virtual function if vf is detected
1676 if (hw->mac.type == ixgbe_mac_82599_vf)
1677 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
1679 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(queue_idx));
1681 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1682 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
1684 /* Allocate software ring */
1685 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1686 sizeof(struct igb_tx_entry) * nb_desc,
1688 if (txq->sw_ring == NULL) {
1689 ixgbe_tx_queue_release(txq);
1692 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1693 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1695 ixgbe_reset_tx_queue(txq);
1697 dev->data->tx_queues[queue_idx] = txq;
1699 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1705 ixgbe_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1709 if (rxq->sw_ring != NULL) {
1710 for (i = 0; i < rxq->nb_rx_desc; i++) {
1711 if (rxq->sw_ring[i].mbuf != NULL) {
1712 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1713 rxq->sw_ring[i].mbuf = NULL;
1716 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
1717 if (rxq->rx_nb_avail) {
1718 for (i = 0; i < rxq->rx_nb_avail; ++i) {
1719 struct rte_mbuf *mb;
1720 mb = rxq->rx_stage[rxq->rx_next_avail + i];
1721 rte_pktmbuf_free_seg(mb);
1723 rxq->rx_nb_avail = 0;
1730 ixgbe_rx_queue_release(struct igb_rx_queue *rxq)
1732 ixgbe_rx_queue_release_mbufs(rxq);
1733 rte_free(rxq->sw_ring);
1738 ixgbe_dev_rx_queue_release(void *rxq)
1740 ixgbe_rx_queue_release(rxq);
1744 * Check if Rx Burst Bulk Alloc function can be used.
1746 * 0: the preconditions are satisfied and the bulk allocation function
1748 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
1749 * function must be used.
1752 check_rx_burst_bulk_alloc_preconditions(struct igb_rx_queue *rxq)
1757 * Make sure the following pre-conditions are satisfied:
1758 * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
1759 * rxq->rx_free_thresh < rxq->nb_rx_desc
1760 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
1761 * rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
1762 * Scattered packets are not supported. This should be checked
1763 * outside of this function.
1765 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
1766 if (! (rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST))
1768 else if (! (rxq->rx_free_thresh < rxq->nb_rx_desc))
1770 else if (! ((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0))
1772 else if (! (rxq->nb_rx_desc <
1773 (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST)))
1782 /* (Re)set dynamic igb_rx_queue fields to defaults */
1784 ixgbe_reset_rx_queue(struct igb_rx_queue *rxq)
1789 * By default, the Rx queue setup function allocates enough memory for
1790 * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires
1791 * extra memory at the end of the descriptor ring to be zero'd out. A
1792 * pre-condition for using the Rx burst bulk alloc function is that the
1793 * number of descriptors is less than or equal to
1794 * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
1795 * constraints here to see if we need to zero out memory after the end
1796 * of the H/W descriptor ring.
1798 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
1799 if (check_rx_burst_bulk_alloc_preconditions(rxq) == 0)
1800 /* zero out extra memory */
1801 len = rxq->nb_rx_desc + RTE_PMD_IXGBE_RX_MAX_BURST;
1804 /* do not zero out extra memory */
1805 len = rxq->nb_rx_desc;
1808 * Zero out HW ring memory. Zero out extra memory at the end of
1809 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
1810 * reads extra memory as zeros.
1812 for (i = 0; i < len * sizeof(union ixgbe_adv_rx_desc); i++) {
1813 ((volatile char *)rxq->rx_ring)[i] = 0;
1816 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
1818 * initialize extra software ring entries. Space for these extra
1819 * entries is always allocated
1821 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
1822 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST; ++i) {
1823 rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
1826 rxq->rx_nb_avail = 0;
1827 rxq->rx_next_avail = 0;
1828 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1829 #endif /* RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC */
1831 rxq->nb_rx_hold = 0;
1832 rxq->pkt_first_seg = NULL;
1833 rxq->pkt_last_seg = NULL;
1837 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
1840 unsigned int socket_id,
1841 const struct rte_eth_rxconf *rx_conf,
1842 struct rte_mempool *mp)
1844 const struct rte_memzone *rz;
1845 struct igb_rx_queue *rxq;
1846 struct ixgbe_hw *hw;
1848 PMD_INIT_FUNC_TRACE();
1849 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1852 * Validate number of receive descriptors.
1853 * It must not exceed hardware maximum, and must be multiple
1856 if (((nb_desc * sizeof(union ixgbe_adv_rx_desc)) % IXGBE_ALIGN) != 0 ||
1857 (nb_desc > IXGBE_MAX_RING_DESC) ||
1858 (nb_desc < IXGBE_MIN_RING_DESC)) {
1862 /* Free memory prior to re-allocation if needed... */
1863 if (dev->data->rx_queues[queue_idx] != NULL)
1864 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
1866 /* First allocate the rx queue data structure */
1867 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1872 rxq->nb_rx_desc = nb_desc;
1873 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1874 rxq->queue_id = queue_idx;
1875 rxq->port_id = dev->data->port_id;
1876 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1880 * Allocate TX ring hardware descriptors. A memzone large enough to
1881 * handle the maximum ring size is allocated in order to allow for
1882 * resizing in later calls to the queue setup function.
1884 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
1885 IXGBE_MAX_RING_DESC * sizeof(union ixgbe_adv_rx_desc),
1888 ixgbe_rx_queue_release(rxq);
1892 * Modified to setup VFRDT for Virtual Function
1894 if (hw->mac.type == ixgbe_mac_82599_vf)
1895 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
1897 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(queue_idx));
1899 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
1900 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
1903 * Allocate software ring. Allow for space at the end of the
1904 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
1905 * function does not access an invalid memory region.
1907 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
1908 len = nb_desc + RTE_PMD_IXGBE_RX_MAX_BURST;
1912 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1913 sizeof(struct igb_rx_entry) * nb_desc,
1915 if (rxq->sw_ring == NULL) {
1916 ixgbe_rx_queue_release(rxq);
1919 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1920 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1923 * Certain constaints must be met in order to use the bulk buffer
1924 * allocation Rx burst function.
1926 use_def_burst_func = check_rx_burst_bulk_alloc_preconditions(rxq);
1928 /* Check if pre-conditions are satisfied, and no Scattered Rx */
1929 if (!use_def_burst_func && !dev->data->scattered_rx) {
1930 #ifdef RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC
1931 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
1932 "satisfied. Rx Burst Bulk Alloc function will be "
1933 "used on port=%d, queue=%d.\n",
1934 rxq->port_id, rxq->queue_id);
1935 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
1938 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions "
1939 "are not satisfied, Scattered Rx is requested, "
1940 "or RTE_LIBRTE_IXGBE_RX_ALLOW_BULK_ALLOC is not "
1941 "enabled (port=%d, queue=%d).\n",
1942 rxq->port_id, rxq->queue_id);
1944 dev->data->rx_queues[queue_idx] = rxq;
1946 ixgbe_reset_rx_queue(rxq);
1952 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
1957 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1958 struct igb_tx_queue *txq = dev->data->tx_queues[i];
1959 ixgbe_tx_queue_release_mbufs(txq);
1960 ixgbe_reset_tx_queue(txq);
1963 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1964 struct igb_rx_queue *rxq = dev->data->rx_queues[i];
1965 ixgbe_rx_queue_release_mbufs(rxq);
1966 ixgbe_reset_rx_queue(rxq);
1970 /*********************************************************************
1972 * Device RX/TX init functions
1974 **********************************************************************/
1977 * Receive Side Scaling (RSS)
1978 * See section 7.1.2.8 in the following document:
1979 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
1982 * The source and destination IP addresses of the IP header and the source
1983 * and destination ports of TCP/UDP headers, if any, of received packets are
1984 * hashed against a configurable random key to compute a 32-bit RSS hash result.
1985 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1986 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1987 * RSS output index which is used as the RX queue index where to store the
1989 * The following output is supplied in the RX write-back descriptor:
1990 * - 32-bit result of the Microsoft RSS hash function,
1991 * - 4-bit RSS type field.
1995 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
1996 * Used as the default key.
1998 static uint8_t rss_intel_key[40] = {
1999 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2000 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2001 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2002 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2003 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2007 ixgbe_rss_disable(struct rte_eth_dev *dev)
2009 struct ixgbe_hw *hw;
2012 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2013 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
2014 mrqc &= ~IXGBE_MRQC_RSSEN;
2015 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2019 ixgbe_rss_configure(struct rte_eth_dev *dev)
2021 struct ixgbe_hw *hw;
2030 PMD_INIT_FUNC_TRACE();
2031 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2033 rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2034 if (rss_hf == 0) { /* Disable RSS */
2035 ixgbe_rss_disable(dev);
2038 hash_key = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
2039 if (hash_key == NULL)
2040 hash_key = rss_intel_key; /* Default hash key */
2042 /* Fill in RSS hash key */
2043 for (i = 0; i < 10; i++) {
2044 rss_key = hash_key[(i * 4)];
2045 rss_key |= hash_key[(i * 4) + 1] << 8;
2046 rss_key |= hash_key[(i * 4) + 2] << 16;
2047 rss_key |= hash_key[(i * 4) + 3] << 24;
2048 IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RSSRK(0), i, rss_key);
2051 /* Fill in redirection table */
2053 for (i = 0, j = 0; i < 128; i++, j++) {
2054 if (j == dev->data->nb_rx_queues) j = 0;
2055 reta = (reta << 8) | j;
2057 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), rte_bswap32(reta));
2060 /* Set configured hashing functions in MRQC register */
2061 mrqc = IXGBE_MRQC_RSSEN; /* RSS enable */
2062 if (rss_hf & ETH_RSS_IPV4)
2063 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2064 if (rss_hf & ETH_RSS_IPV4_TCP)
2065 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2066 if (rss_hf & ETH_RSS_IPV6)
2067 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2068 if (rss_hf & ETH_RSS_IPV6_EX)
2069 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2070 if (rss_hf & ETH_RSS_IPV6_TCP)
2071 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2072 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2073 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2074 if (rss_hf & ETH_RSS_IPV4_UDP)
2075 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2076 if (rss_hf & ETH_RSS_IPV6_UDP)
2077 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2078 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2079 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2080 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2083 #define NUM_VFTA_REGISTERS 128
2084 #define NIC_RX_BUFFER_SIZE 0x200
2087 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2089 struct rte_eth_vmdq_dcb_conf *cfg;
2090 struct ixgbe_hw *hw;
2091 enum rte_eth_nb_pools num_pools;
2092 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2094 uint8_t nb_tcs; /* number of traffic classes */
2097 PMD_INIT_FUNC_TRACE();
2098 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2099 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2100 num_pools = cfg->nb_queue_pools;
2101 /* Check we have a valid number of pools */
2102 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2103 ixgbe_rss_disable(dev);
2106 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2107 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2111 * split rx buffer up into sections, each for 1 traffic class
2113 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2114 for (i = 0 ; i < nb_tcs; i++) {
2115 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2116 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
2117 /* clear 10 bits. */
2118 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
2119 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2121 /* zero alloc all unused TCs */
2122 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2123 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
2124 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
2125 /* clear 10 bits. */
2126 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
2129 /* MRQC: enable vmdq and dcb */
2130 mrqc = ((num_pools == ETH_16_POOLS) ? \
2131 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
2132 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2134 /* PFVTCTL: turn on virtualisation and set the default pool */
2135 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2136 if (cfg->enable_default_pool) {
2137 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
2139 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
2141 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
2143 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
2145 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2147 * mapping is done with 3 bits per priority,
2148 * so shift by i*3 each time
2150 queue_mapping |= ((cfg->dcb_queue[i] & 0x07) << (i * 3));
2152 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
2154 /* RTRPCS: DCB related */
2155 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
2157 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
2158 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2159 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
2160 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
2162 /* VFTA - enable all vlan filters */
2163 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
2164 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
2167 /* VFRE: pool enabling for receive - 16 or 32 */
2168 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
2169 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2172 * MPSAR - allow pools to read specific mac addresses
2173 * In this case, all pools should be able to read from mac addr 0
2175 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
2176 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
2178 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
2179 for (i = 0; i < cfg->nb_pool_maps; i++) {
2180 /* set vlan id in VF register and set the valid bit */
2181 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
2182 (cfg->pool_map[i].vlan_id & 0xFFF)));
2184 * Put the allowed pools in VFB reg. As we only have 16 or 32
2185 * pools, we only need to use the first half of the register
2188 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
2193 ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2195 struct igb_rx_entry *rxe = rxq->sw_ring;
2199 /* Initialize software ring entries */
2200 for (i = 0; i < rxq->nb_rx_desc; i++) {
2201 volatile union ixgbe_adv_rx_desc *rxd;
2202 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
2204 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u\n",
2205 (unsigned) rxq->queue_id);
2209 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
2210 rxd = &rxq->rx_ring[i];
2211 rxd->read.hdr_addr = dma_addr;
2212 rxd->read.pkt_addr = dma_addr;
2220 * Initializes Receive Unit.
2223 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
2225 struct ixgbe_hw *hw;
2226 struct igb_rx_queue *rxq;
2227 struct rte_pktmbuf_pool_private *mbp_priv;
2240 PMD_INIT_FUNC_TRACE();
2241 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2244 * Make sure receives are disabled while setting
2245 * up the RX context (registers, descriptor rings, etc.).
2247 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
2248 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
2250 /* Enable receipt of broadcasted frames */
2251 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2252 fctrl |= IXGBE_FCTRL_BAM;
2253 fctrl |= IXGBE_FCTRL_DPF;
2254 fctrl |= IXGBE_FCTRL_PMCF;
2255 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2258 * Configure CRC stripping, if any.
2260 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2261 if (dev->data->dev_conf.rxmode.hw_strip_crc)
2262 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
2264 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
2267 * Configure jumbo frame support, if any.
2269 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
2270 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
2271 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
2272 maxfrs &= 0x0000FFFF;
2273 maxfrs |= (dev->data->dev_conf.rxmode.max_rx_pkt_len << 16);
2274 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
2276 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
2278 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
2280 /* Setup RX queues */
2281 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2282 rxq = dev->data->rx_queues[i];
2284 /* Allocate buffers for descriptor rings */
2285 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
2290 * Reset crc_len in case it was changed after queue setup by a
2291 * call to configure.
2293 rxq->crc_len = (uint8_t)
2294 ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
2297 /* Setup the Base and Length of the Rx Descriptor Rings */
2298 bus_addr = rxq->rx_ring_phys_addr;
2299 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
2300 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
2301 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i),
2302 (uint32_t)(bus_addr >> 32));
2303 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2304 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
2305 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2306 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2308 /* Configure the SRRCTL register */
2309 #ifdef RTE_HEADER_SPLIT_ENABLE
2311 * Configure Header Split
2313 if (dev->data->dev_conf.rxmode.header_split) {
2314 if (hw->mac.type == ixgbe_mac_82599EB) {
2315 /* Must setup the PSRTYPE register */
2317 psrtype = IXGBE_PSRTYPE_TCPHDR |
2318 IXGBE_PSRTYPE_UDPHDR |
2319 IXGBE_PSRTYPE_IPV4HDR |
2320 IXGBE_PSRTYPE_IPV6HDR;
2321 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(i), psrtype);
2323 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
2324 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
2325 IXGBE_SRRCTL_BSIZEHDR_MASK);
2326 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2329 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2332 * Configure the RX buffer size in the BSIZEPACKET field of
2333 * the SRRCTL register of the queue.
2334 * The value is in 1 KB resolution. Valid values can be from
2337 mbp_priv = (struct rte_pktmbuf_pool_private *)
2338 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
2339 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
2340 RTE_PKTMBUF_HEADROOM);
2341 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
2342 IXGBE_SRRCTL_BSIZEPKT_MASK);
2343 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2345 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
2346 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
2347 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
2348 dev->data->scattered_rx = 1;
2349 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
2354 * Configure RSS if device configured with multiple RX queues.
2356 if (hw->mac.type == ixgbe_mac_82599EB) {
2357 if (dev->data->nb_rx_queues > 1)
2358 switch (dev->data->dev_conf.rxmode.mq_mode) {
2360 ixgbe_rss_configure(dev);
2364 ixgbe_vmdq_dcb_configure(dev);
2367 default: ixgbe_rss_disable(dev);
2370 ixgbe_rss_disable(dev);
2374 * Setup the Checksum Register.
2375 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
2376 * Enable IP/L4 checkum computation by hardware if requested to do so.
2378 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2379 rxcsum |= IXGBE_RXCSUM_PCSD;
2380 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
2381 rxcsum |= IXGBE_RXCSUM_IPPCSE;
2383 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
2385 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2387 if (hw->mac.type == ixgbe_mac_82599EB) {
2388 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2389 if (dev->data->dev_conf.rxmode.hw_strip_crc)
2390 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2392 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
2393 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2394 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2401 * Initializes Transmit Unit.
2404 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
2406 struct ixgbe_hw *hw;
2407 struct igb_tx_queue *txq;
2414 PMD_INIT_FUNC_TRACE();
2415 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2417 /* Enable TX CRC (checksum offload requirement) */
2418 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2419 hlreg0 |= IXGBE_HLREG0_TXCRCEN;
2420 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
2422 /* Setup the Base and Length of the Tx Descriptor Rings */
2423 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2424 txq = dev->data->tx_queues[i];
2426 bus_addr = txq->tx_ring_phys_addr;
2427 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
2428 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
2429 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i),
2430 (uint32_t)(bus_addr >> 32));
2431 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2432 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
2433 /* Setup the HW Tx Head and TX Tail descriptor pointers */
2434 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2435 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2438 * Disable Tx Head Writeback RO bit, since this hoses
2439 * bookkeeping if things aren't delivered in order.
2441 switch (hw->mac.type) {
2442 case ixgbe_mac_82598EB:
2443 txctrl = IXGBE_READ_REG(hw,
2444 IXGBE_DCA_TXCTRL(i));
2445 txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN;
2446 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i),
2450 case ixgbe_mac_82599EB:
2451 case ixgbe_mac_X540:
2453 txctrl = IXGBE_READ_REG(hw,
2454 IXGBE_DCA_TXCTRL_82599(i));
2455 txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN;
2456 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i),
2462 if (hw->mac.type != ixgbe_mac_82598EB) {
2463 /* disable arbiter before setting MTQC */
2464 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2465 rttdcs |= IXGBE_RTTDCS_ARBDIS;
2466 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2468 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2470 /* re-enable arbiter */
2471 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2472 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2477 * Start Transmit and Receive Units.
2480 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
2482 struct ixgbe_hw *hw;
2483 struct igb_tx_queue *txq;
2484 struct igb_rx_queue *rxq;
2492 PMD_INIT_FUNC_TRACE();
2493 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2495 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2496 txq = dev->data->tx_queues[i];
2497 /* Setup Transmit Threshold Registers */
2498 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
2499 txdctl |= txq->pthresh & 0x7F;
2500 txdctl |= ((txq->hthresh & 0x7F) << 8);
2501 txdctl |= ((txq->wthresh & 0x7F) << 16);
2502 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
2505 if (hw->mac.type != ixgbe_mac_82598EB) {
2506 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2507 dmatxctl |= IXGBE_DMATXCTL_TE;
2508 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2511 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2512 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
2513 txdctl |= IXGBE_TXDCTL_ENABLE;
2514 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
2516 /* Wait until TX Enable ready */
2517 if (hw->mac.type == ixgbe_mac_82599EB) {
2521 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
2522 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
2524 PMD_INIT_LOG(ERR, "Could not enable "
2525 "Tx Queue %d\n", i);
2528 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2529 rxq = dev->data->rx_queues[i];
2530 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2531 rxdctl |= IXGBE_RXDCTL_ENABLE;
2532 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
2534 /* Wait until RX Enable ready */
2538 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2539 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
2541 PMD_INIT_LOG(ERR, "Could not enable "
2542 "Rx Queue %d\n", i);
2544 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), rxq->nb_rx_desc - 1);
2547 /* Enable Receive engine */
2548 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
2549 if (hw->mac.type == ixgbe_mac_82598EB)
2550 rxctrl |= IXGBE_RXCTRL_DMBYPS;
2551 rxctrl |= IXGBE_RXCTRL_RXEN;
2552 hw->mac.ops.enable_rx_dma(hw, rxctrl);
2557 * [VF] Initializes Receive Unit.
2560 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
2562 struct ixgbe_hw *hw;
2563 struct igb_rx_queue *rxq;
2564 struct rte_pktmbuf_pool_private *mbp_priv;
2571 PMD_INIT_FUNC_TRACE();
2572 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2574 /* Setup RX queues */
2575 dev->rx_pkt_burst = ixgbe_recv_pkts;
2576 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2577 rxq = dev->data->rx_queues[i];
2579 /* Allocate buffers for descriptor rings */
2580 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
2584 /* Setup the Base and Length of the Rx Descriptor Rings */
2585 bus_addr = rxq->rx_ring_phys_addr;
2587 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
2588 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
2589 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
2590 (uint32_t)(bus_addr >> 32));
2591 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
2592 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
2593 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
2594 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
2597 /* Configure the SRRCTL register */
2598 #ifdef RTE_HEADER_SPLIT_ENABLE
2600 * Configure Header Split
2602 if (dev->data->dev_conf.rxmode.header_split) {
2604 /* Must setup the PSRTYPE register */
2606 psrtype = IXGBE_PSRTYPE_TCPHDR |
2607 IXGBE_PSRTYPE_UDPHDR |
2608 IXGBE_PSRTYPE_IPV4HDR |
2609 IXGBE_PSRTYPE_IPV6HDR;
2611 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
2613 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
2614 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
2615 IXGBE_SRRCTL_BSIZEHDR_MASK);
2616 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2619 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2622 * Configure the RX buffer size in the BSIZEPACKET field of
2623 * the SRRCTL register of the queue.
2624 * The value is in 1 KB resolution. Valid values can be from
2627 mbp_priv = (struct rte_pktmbuf_pool_private *)
2628 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
2629 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
2630 RTE_PKTMBUF_HEADROOM);
2631 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
2632 IXGBE_SRRCTL_BSIZEPKT_MASK);
2635 * VF modification to write virtual function SRRCTL register
2637 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
2639 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
2640 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
2641 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
2642 dev->data->scattered_rx = 1;
2643 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
2650 * [VF] Initializes Transmit Unit.
2653 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
2655 struct ixgbe_hw *hw;
2656 struct igb_tx_queue *txq;
2661 PMD_INIT_FUNC_TRACE();
2662 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2664 /* Setup the Base and Length of the Tx Descriptor Rings */
2665 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2666 txq = dev->data->tx_queues[i];
2667 bus_addr = txq->tx_ring_phys_addr;
2668 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
2669 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
2670 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
2671 (uint32_t)(bus_addr >> 32));
2672 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
2673 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
2674 /* Setup the HW Tx Head and TX Tail descriptor pointers */
2675 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
2676 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
2679 * Disable Tx Head Writeback RO bit, since this hoses
2680 * bookkeeping if things aren't delivered in order.
2682 txctrl = IXGBE_READ_REG(hw,
2683 IXGBE_VFDCA_TXCTRL(i));
2684 txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN;
2685 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
2691 * [VF] Start Transmit and Receive Units.
2694 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
2696 struct ixgbe_hw *hw;
2697 struct igb_tx_queue *txq;
2698 struct igb_rx_queue *rxq;
2704 PMD_INIT_FUNC_TRACE();
2705 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2707 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2708 txq = dev->data->tx_queues[i];
2709 /* Setup Transmit Threshold Registers */
2710 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
2711 txdctl |= txq->pthresh & 0x7F;
2712 txdctl |= ((txq->hthresh & 0x7F) << 8);
2713 txdctl |= ((txq->wthresh & 0x7F) << 16);
2714 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
2717 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2719 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
2720 txdctl |= IXGBE_TXDCTL_ENABLE;
2721 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
2724 /* Wait until TX Enable ready */
2727 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
2728 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
2730 PMD_INIT_LOG(ERR, "Could not enable "
2731 "Tx Queue %d\n", i);
2733 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2735 rxq = dev->data->rx_queues[i];
2737 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
2738 rxdctl |= IXGBE_RXDCTL_ENABLE;
2739 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
2741 /* Wait until RX Enable ready */
2745 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
2746 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
2748 PMD_INIT_LOG(ERR, "Could not enable "
2749 "Rx Queue %d\n", i);
2751 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);