4 * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 * version: DPDK.L.1.2.3-3
36 #include <sys/queue.h>
48 #include <rte_byteorder.h>
49 #include <rte_common.h>
50 #include <rte_cycles.h>
52 #include <rte_debug.h>
53 #include <rte_interrupts.h>
55 #include <rte_memory.h>
56 #include <rte_memzone.h>
57 #include <rte_launch.h>
58 #include <rte_tailq.h>
60 #include <rte_per_lcore.h>
61 #include <rte_lcore.h>
62 #include <rte_atomic.h>
63 #include <rte_branch_prediction.h>
65 #include <rte_mempool.h>
66 #include <rte_malloc.h>
68 #include <rte_ether.h>
69 #include <rte_ethdev.h>
70 #include <rte_prefetch.h>
74 #include <rte_string_fns.h>
75 #include <rte_errno.h>
77 #include "ixgbe_logs.h"
78 #include "ixgbe/ixgbe_api.h"
79 #include "ixgbe/ixgbe_vf.h"
80 #include "ixgbe_ethdev.h"
82 static inline struct rte_mbuf *
83 rte_rxmbuf_alloc(struct rte_mempool *mp)
87 m = __rte_mbuf_raw_alloc(mp);
88 __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
92 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
93 (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->pkt.data) - \
94 (char *)(mb)->buf_addr))
96 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
97 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
100 * Structure associated with each descriptor of the RX ring of a RX queue.
102 struct igb_rx_entry {
103 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
107 * Structure associated with each descriptor of the TX ring of a TX queue.
109 struct igb_tx_entry {
110 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
111 uint16_t next_id; /**< Index of next descriptor in ring. */
112 uint16_t last_id; /**< Index of last scattered descriptor. */
116 * Structure associated with each RX queue.
118 struct igb_rx_queue {
119 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
120 volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
121 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
122 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
123 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
124 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
125 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
126 uint16_t nb_rx_desc; /**< number of RX descriptors. */
127 uint16_t rx_tail; /**< current value of RDT register. */
128 uint16_t nb_rx_hold; /**< number of held free RX desc. */
129 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
130 uint16_t queue_id; /**< RX queue index. */
131 uint8_t port_id; /**< Device port identifier. */
132 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
136 * IXGBE CTX Constants
138 enum ixgbe_advctx_num {
139 IXGBE_CTX_0 = 0, /**< CTX0 */
140 IXGBE_CTX_1 = 1, /**< CTX1 */
141 IXGBE_CTX_NUM = 2, /**< CTX NUMBER */
145 * Structure to check if new context need be built
147 struct ixgbe_advctx_info {
148 uint16_t flags; /**< ol_flags for context build. */
149 uint32_t cmp_mask; /**< compare mask for vlan_macip_lens */
150 uint32_t vlan_macip_lens; /**< vlan, mac ip length. */
154 * Structure associated with each TX queue.
156 struct igb_tx_queue {
157 /** TX ring virtual address. */
158 volatile union ixgbe_adv_tx_desc *tx_ring;
159 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
160 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
161 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
162 uint16_t nb_tx_desc; /**< number of TX descriptors. */
163 uint16_t tx_tail; /**< current value of TDT reg. */
164 uint16_t tx_free_thresh;/**< minimum TX before freeing. */
165 /** Number of TX descriptors to use before RS bit is set. */
166 uint16_t tx_rs_thresh;
167 /** Number of TX descriptors used since RS bit was set. */
169 /** Index to last TX descriptor to have been cleaned. */
170 uint16_t last_desc_cleaned;
171 /** Total number of TX descriptors ready to be allocated. */
173 uint16_t queue_id; /**< TX queue index. */
174 uint8_t port_id; /**< Device port identifier. */
175 uint8_t pthresh; /**< Prefetch threshold register. */
176 uint8_t hthresh; /**< Host threshold register. */
177 uint8_t wthresh; /**< Write-back threshold reg. */
178 uint32_t ctx_curr; /**< Hardware context states. */
179 /** Hardware context0 history. */
180 struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];
185 #define RTE_PMD_USE_PREFETCH
188 #ifdef RTE_PMD_USE_PREFETCH
190 * Prefetch a cache line into all cache levels.
192 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
194 #define rte_ixgbe_prefetch(p) do {} while(0)
197 #ifdef RTE_PMD_PACKET_PREFETCH
198 #define rte_packet_prefetch(p) rte_prefetch1(p)
200 #define rte_packet_prefetch(p) do {} while(0)
203 /*********************************************************************
207 **********************************************************************/
209 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
210 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
211 uint16_t ol_flags, uint32_t vlan_macip_lens)
213 uint32_t type_tucmd_mlhl;
214 uint32_t mss_l4len_idx;
218 ctx_idx = txq->ctx_curr;
222 if (ol_flags & PKT_TX_VLAN_PKT) {
223 cmp_mask |= TX_VLAN_CMP_MASK;
226 if (ol_flags & PKT_TX_IP_CKSUM) {
227 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
228 cmp_mask |= TX_MAC_LEN_CMP_MASK;
231 /* Specify which HW CTX to upload. */
232 mss_l4len_idx = (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
233 switch (ol_flags & PKT_TX_L4_MASK) {
234 case PKT_TX_UDP_CKSUM:
235 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
236 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
237 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
238 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
240 case PKT_TX_TCP_CKSUM:
241 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
242 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
243 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
244 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
246 case PKT_TX_SCTP_CKSUM:
247 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
248 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
249 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
250 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
253 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
254 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
258 txq->ctx_cache[ctx_idx].flags = ol_flags;
259 txq->ctx_cache[ctx_idx].cmp_mask = cmp_mask;
260 txq->ctx_cache[ctx_idx].vlan_macip_lens = vlan_macip_lens & cmp_mask;
262 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
263 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
264 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
265 ctx_txd->seqnum_seed = 0;
269 * Check which hardware context can be used. Use the existing match
270 * or create a new context descriptor.
272 static inline uint32_t
273 what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
274 uint32_t vlan_macip_lens)
276 /* If match with the current used context */
277 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
278 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens ==
279 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
280 return txq->ctx_curr;
283 /* What if match with the next context */
285 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
286 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens ==
287 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
288 return txq->ctx_curr;
291 /* Mismatch, use the previous context */
292 return (IXGBE_CTX_NUM);
295 static inline uint32_t
296 tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
298 static const uint32_t l4_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_TXSM};
299 static const uint32_t l3_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_IXSM};
302 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
303 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
307 static inline uint32_t
308 tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
310 static const uint32_t vlan_cmd[2] = {0, IXGBE_ADVTXD_DCMD_VLE};
311 return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
314 /* Default RS bit threshold values */
315 #ifndef DEFAULT_TX_RS_THRESH
316 #define DEFAULT_TX_RS_THRESH 32
318 #ifndef DEFAULT_TX_FREE_THRESH
319 #define DEFAULT_TX_FREE_THRESH 32
322 /* Reset transmit descriptors after they have been used */
324 ixgbe_xmit_cleanup(struct igb_tx_queue *txq)
326 struct igb_tx_entry *sw_ring = txq->sw_ring;
327 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
328 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
329 uint16_t nb_tx_desc = txq->nb_tx_desc;
330 uint16_t desc_to_clean_to;
331 uint16_t nb_tx_to_clean;
333 /* Determine the last descriptor needing to be cleaned */
334 desc_to_clean_to = last_desc_cleaned + txq->tx_rs_thresh;
335 if (desc_to_clean_to >= nb_tx_desc)
336 desc_to_clean_to = desc_to_clean_to - nb_tx_desc;
338 /* Check to make sure the last descriptor to clean is done */
339 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
340 if (! (txr[desc_to_clean_to].wb.status & IXGBE_TXD_STAT_DD))
342 PMD_TX_FREE_LOG(DEBUG,
343 "TX descriptor %4u is not done"
344 "(port=%d queue=%d)",
346 txq->port_id, txq->queue_id);
347 /* Failed to clean any descriptors, better luck next time */
351 /* Figure out how many descriptors will be cleaned */
352 if (last_desc_cleaned > desc_to_clean_to)
353 nb_tx_to_clean = ((nb_tx_desc - last_desc_cleaned) +
356 nb_tx_to_clean = desc_to_clean_to - last_desc_cleaned;
358 PMD_TX_FREE_LOG(DEBUG,
359 "Cleaning %4u TX descriptors: %4u to %4u "
360 "(port=%d queue=%d)",
361 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
362 txq->port_id, txq->queue_id);
365 * The last descriptor to clean is done, so that means all the
366 * descriptors from the last descriptor that was cleaned
367 * up to the last descriptor with the RS bit set
368 * are done. Only reset the threshold descriptor.
370 txr[desc_to_clean_to].wb.status = 0;
372 /* Update the txq to reflect the last descriptor that was cleaned */
373 txq->last_desc_cleaned = desc_to_clean_to;
374 txq->nb_tx_free += nb_tx_to_clean;
381 ixgbe_xmit_pkts(struct igb_tx_queue *txq, struct rte_mbuf **tx_pkts,
384 struct igb_tx_entry *sw_ring;
385 struct igb_tx_entry *txe, *txn;
386 volatile union ixgbe_adv_tx_desc *txr;
387 volatile union ixgbe_adv_tx_desc *txd;
388 struct rte_mbuf *tx_pkt;
389 struct rte_mbuf *m_seg;
390 uint64_t buf_dma_addr;
391 uint32_t olinfo_status;
392 uint32_t cmd_type_len;
401 uint32_t vlan_macip_lens;
405 sw_ring = txq->sw_ring;
407 tx_id = txq->tx_tail;
408 txe = &sw_ring[tx_id];
410 /* Determine if the descriptor ring needs to be cleaned. */
411 if ((txq->nb_tx_desc - txq->nb_tx_free) > txq->tx_free_thresh) {
412 ixgbe_xmit_cleanup(txq);
416 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
419 pkt_len = tx_pkt->pkt.pkt_len;
421 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
424 * Determine how many (if any) context descriptors
425 * are needed for offload functionality.
427 ol_flags = tx_pkt->ol_flags;
428 vlan_macip_lens = tx_pkt->pkt.vlan_tci << 16 |
429 tx_pkt->pkt.l2_len << IXGBE_ADVTXD_MACLEN_SHIFT |
432 /* If hardware offload required */
433 tx_ol_req = ol_flags & PKT_TX_OFFLOAD_MASK;
435 /* If new context need be built or reuse the exist ctx. */
436 ctx = what_advctx_update(txq, tx_ol_req, vlan_macip_lens);
437 /* Only allocate context descriptor if required*/
438 new_ctx = (ctx == IXGBE_CTX_NUM);
443 * Keep track of how many descriptors are used this loop
444 * This will always be the number of segments + the number of
445 * Context descriptors required to transmit the packet
447 nb_used = tx_pkt->pkt.nb_segs + new_ctx;
450 * The number of descriptors that must be allocated for a
451 * packet is the number of segments of that packet, plus 1
452 * Context Descriptor for the hardware offload, if any.
453 * Determine the last TX descriptor to allocate in the TX ring
454 * for the packet, starting from the current position (tx_id)
457 tx_last = (uint16_t) (tx_id + nb_used - 1);
460 if (tx_last >= txq->nb_tx_desc)
461 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
463 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
464 " tx_first=%u tx_last=%u\n",
465 (unsigned) txq->port_id,
466 (unsigned) txq->queue_id,
472 * Make sure there are enough TX descriptors available to
473 * transmit the entire packet.
474 * nb_used better be less than or equal to txq->tx_rs_thresh
476 if (nb_used > txq->nb_tx_free) {
477 PMD_TX_FREE_LOG(DEBUG,
478 "Not enough free TX descriptors "
479 "nb_used=%4u nb_free=%4u "
480 "(port=%d queue=%d)",
481 nb_used, txq->nb_tx_free,
482 txq->port_id, txq->queue_id);
484 if (ixgbe_xmit_cleanup(txq) != 0) {
485 /* Could not clean any descriptors */
491 /* nb_used better be <= txq->tx_rs_thresh */
492 if (unlikely(nb_used > txq->tx_rs_thresh)) {
493 PMD_TX_FREE_LOG(DEBUG,
494 "The number of descriptors needed to "
495 "transmit the packet exceeds the "
496 "RS bit threshold. This will impact "
498 "nb_used=%4u nb_free=%4u "
500 "(port=%d queue=%d)",
501 nb_used, txq->nb_tx_free,
503 txq->port_id, txq->queue_id);
505 * Loop here until there are enough TX
506 * descriptors or until the ring cannot be
509 while (nb_used > txq->nb_tx_free) {
510 if (ixgbe_xmit_cleanup(txq) != 0) {
512 * Could not clean any
524 * By now there are enough free TX descriptors to transmit
529 * Set common flags of all TX Data Descriptors.
531 * The following bits must be set in all Data Descriptors:
532 * - IXGBE_ADVTXD_DTYP_DATA
533 * - IXGBE_ADVTXD_DCMD_DEXT
535 * The following bits must be set in the first Data Descriptor
536 * and are ignored in the other ones:
537 * - IXGBE_ADVTXD_DCMD_IFCS
538 * - IXGBE_ADVTXD_MAC_1588
539 * - IXGBE_ADVTXD_DCMD_VLE
541 * The following bits must only be set in the last Data
543 * - IXGBE_TXD_CMD_EOP
545 * The following bits can be set in any Data Descriptor, but
546 * are only set in the last Data Descriptor:
549 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
550 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
551 olinfo_status = (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
552 #ifdef RTE_LIBRTE_IEEE1588
553 if (ol_flags & PKT_TX_IEEE1588_TMST)
554 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
559 * Setup the TX Advanced Context Descriptor if required
562 volatile struct ixgbe_adv_tx_context_desc *
565 ctx_txd = (volatile struct
566 ixgbe_adv_tx_context_desc *)
569 txn = &sw_ring[txe->next_id];
570 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
572 if (txe->mbuf != NULL) {
573 rte_pktmbuf_free_seg(txe->mbuf);
577 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
580 txe->last_id = tx_last;
581 tx_id = txe->next_id;
586 * Setup the TX Advanced Data Descriptor,
587 * This path will go through
588 * whatever new/reuse the context descriptor
590 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
591 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
592 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
598 txn = &sw_ring[txe->next_id];
600 if (txe->mbuf != NULL)
601 rte_pktmbuf_free_seg(txe->mbuf);
605 * Set up Transmit Data Descriptor.
607 slen = m_seg->pkt.data_len;
608 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
609 txd->read.buffer_addr =
610 rte_cpu_to_le_64(buf_dma_addr);
611 txd->read.cmd_type_len =
612 rte_cpu_to_le_32(cmd_type_len | slen);
613 txd->read.olinfo_status =
614 rte_cpu_to_le_32(olinfo_status);
615 txe->last_id = tx_last;
616 tx_id = txe->next_id;
618 m_seg = m_seg->pkt.next;
619 } while (m_seg != NULL);
622 * The last packet data descriptor needs End Of Packet (EOP)
624 cmd_type_len |= IXGBE_TXD_CMD_EOP;
625 txq->nb_tx_used += nb_used;
626 txq->nb_tx_free -= nb_used;
628 /* Set RS bit only on threshold packets' last descriptor */
629 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
630 PMD_TX_FREE_LOG(DEBUG,
631 "Setting RS bit on TXD id="
632 "%4u (port=%d queue=%d)",
633 tx_last, txq->port_id, txq->queue_id);
635 cmd_type_len |= IXGBE_TXD_CMD_RS;
637 /* Update txq RS bit counters */
640 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
646 * Set the Transmit Descriptor Tail (TDT)
648 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
649 (unsigned) txq->port_id, (unsigned) txq->queue_id,
650 (unsigned) tx_id, (unsigned) nb_tx);
651 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
652 txq->tx_tail = tx_id;
657 /*********************************************************************
661 **********************************************************************/
662 static inline uint16_t
663 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
667 static uint16_t ip_pkt_types_map[16] = {
668 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
669 PKT_RX_IPV6_HDR, 0, 0, 0,
670 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
671 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
674 static uint16_t ip_rss_types_map[16] = {
675 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
676 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
677 PKT_RX_RSS_HASH, 0, 0, 0,
678 0, 0, 0, PKT_RX_FDIR,
681 #ifdef RTE_LIBRTE_IEEE1588
682 static uint32_t ip_pkt_etqf_map[8] = {
683 0, 0, 0, PKT_RX_IEEE1588_PTP,
687 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ?
688 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
689 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
691 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ? 0 :
692 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
695 return (pkt_flags | ip_rss_types_map[hl_tp_rs & 0xF]);
698 static inline uint16_t
699 rx_desc_status_to_pkt_flags(uint32_t rx_status)
704 * Check if VLAN present only.
705 * Do not check whether L3/L4 rx checksum done by NIC or not,
706 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
708 pkt_flags = (uint16_t) (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
710 #ifdef RTE_LIBRTE_IEEE1588
711 if (rx_status & IXGBE_RXD_STAT_TMST)
712 pkt_flags = (pkt_flags | PKT_RX_IEEE1588_TMST);
717 static inline uint16_t
718 rx_desc_error_to_pkt_flags(uint32_t rx_status)
721 * Bit 31: IPE, IPv4 checksum error
722 * Bit 30: L4I, L4I integrity error
724 static uint16_t error_to_pkt_flags_map[4] = {
725 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
726 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
728 return error_to_pkt_flags_map[(rx_status >>
729 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
733 ixgbe_recv_pkts(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
736 volatile union ixgbe_adv_rx_desc *rx_ring;
737 volatile union ixgbe_adv_rx_desc *rxdp;
738 struct igb_rx_entry *sw_ring;
739 struct igb_rx_entry *rxe;
740 struct rte_mbuf *rxm;
741 struct rte_mbuf *nmb;
742 union ixgbe_adv_rx_desc rxd;
745 uint32_t hlen_type_rss;
754 rx_id = rxq->rx_tail;
755 rx_ring = rxq->rx_ring;
756 sw_ring = rxq->sw_ring;
757 while (nb_rx < nb_pkts) {
759 * The order of operations here is important as the DD status
760 * bit must not be read after any other descriptor fields.
761 * rx_ring and rxdp are pointing to volatile data so the order
762 * of accesses cannot be reordered by the compiler. If they were
763 * not volatile, they could be reordered which could lead to
764 * using invalid descriptor fields when read from rxd.
766 rxdp = &rx_ring[rx_id];
767 staterr = rxdp->wb.upper.status_error;
768 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
775 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
776 * is likely to be invalid and to be dropped by the various
777 * validation checks performed by the network stack.
779 * Allocate a new mbuf to replenish the RX ring descriptor.
780 * If the allocation fails:
781 * - arrange for that RX descriptor to be the first one
782 * being parsed the next time the receive function is
783 * invoked [on the same queue].
785 * - Stop parsing the RX ring and return immediately.
787 * This policy do not drop the packet received in the RX
788 * descriptor for which the allocation of a new mbuf failed.
789 * Thus, it allows that packet to be later retrieved if
790 * mbuf have been freed in the mean time.
791 * As a side effect, holding RX descriptors instead of
792 * systematically giving them back to the NIC may lead to
793 * RX ring exhaustion situations.
794 * However, the NIC can gracefully prevent such situations
795 * to happen by sending specific "back-pressure" flow control
796 * frames to its peer(s).
798 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
799 "ext_err_stat=0x%08x pkt_len=%u\n",
800 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
801 (unsigned) rx_id, (unsigned) staterr,
802 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
804 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
806 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
807 "queue_id=%u\n", (unsigned) rxq->port_id,
808 (unsigned) rxq->queue_id);
809 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
814 rxe = &sw_ring[rx_id];
816 if (rx_id == rxq->nb_rx_desc)
819 /* Prefetch next mbuf while processing current one. */
820 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
823 * When next RX descriptor is on a cache-line boundary,
824 * prefetch the next 4 RX descriptors and the next 8 pointers
827 if ((rx_id & 0x3) == 0) {
828 rte_ixgbe_prefetch(&rx_ring[rx_id]);
829 rte_ixgbe_prefetch(&sw_ring[rx_id]);
835 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
836 rxdp->read.hdr_addr = dma_addr;
837 rxdp->read.pkt_addr = dma_addr;
840 * Initialize the returned mbuf.
841 * 1) setup generic mbuf fields:
842 * - number of segments,
845 * - RX port identifier.
846 * 2) integrate hardware offload data, if any:
848 * - IP checksum flag,
849 * - VLAN TCI, if any,
852 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
854 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
855 rte_packet_prefetch(rxm->pkt.data);
856 rxm->pkt.nb_segs = 1;
857 rxm->pkt.next = NULL;
858 rxm->pkt.pkt_len = pkt_len;
859 rxm->pkt.data_len = pkt_len;
860 rxm->pkt.in_port = rxq->port_id;
862 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
863 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
864 rxm->pkt.vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
866 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
867 pkt_flags = (pkt_flags | rx_desc_status_to_pkt_flags(staterr));
868 pkt_flags = (pkt_flags | rx_desc_error_to_pkt_flags(staterr));
869 rxm->ol_flags = pkt_flags;
871 if (likely(pkt_flags & PKT_RX_RSS_HASH))
872 rxm->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
873 else if (pkt_flags & PKT_RX_FDIR) {
874 rxm->pkt.hash.fdir.hash =
875 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
876 & IXGBE_ATR_HASH_MASK);
877 rxm->pkt.hash.fdir.id = rxd.wb.lower.hi_dword.csum_ip.ip_id;
880 * Store the mbuf address into the next entry of the array
881 * of returned packets.
883 rx_pkts[nb_rx++] = rxm;
885 rxq->rx_tail = rx_id;
888 * If the number of free RX descriptors is greater than the RX free
889 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
891 * Update the RDT with the value of the last processed RX descriptor
892 * minus 1, to guarantee that the RDT register is never equal to the
893 * RDH register, which creates a "full" ring situtation from the
894 * hardware point of view...
896 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
897 if (nb_hold > rxq->rx_free_thresh) {
898 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
899 "nb_hold=%u nb_rx=%u\n",
900 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
901 (unsigned) rx_id, (unsigned) nb_hold,
903 rx_id = (uint16_t) ((rx_id == 0) ?
904 (rxq->nb_rx_desc - 1) : (rx_id - 1));
905 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
908 rxq->nb_rx_hold = nb_hold;
913 ixgbe_recv_scattered_pkts(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
916 volatile union ixgbe_adv_rx_desc *rx_ring;
917 volatile union ixgbe_adv_rx_desc *rxdp;
918 struct igb_rx_entry *sw_ring;
919 struct igb_rx_entry *rxe;
920 struct rte_mbuf *first_seg;
921 struct rte_mbuf *last_seg;
922 struct rte_mbuf *rxm;
923 struct rte_mbuf *nmb;
924 union ixgbe_adv_rx_desc rxd;
925 uint64_t dma; /* Physical address of mbuf data buffer */
927 uint32_t hlen_type_rss;
936 rx_id = rxq->rx_tail;
937 rx_ring = rxq->rx_ring;
938 sw_ring = rxq->sw_ring;
941 * Retrieve RX context of current packet, if any.
943 first_seg = rxq->pkt_first_seg;
944 last_seg = rxq->pkt_last_seg;
946 while (nb_rx < nb_pkts) {
949 * The order of operations here is important as the DD status
950 * bit must not be read after any other descriptor fields.
951 * rx_ring and rxdp are pointing to volatile data so the order
952 * of accesses cannot be reordered by the compiler. If they were
953 * not volatile, they could be reordered which could lead to
954 * using invalid descriptor fields when read from rxd.
956 rxdp = &rx_ring[rx_id];
957 staterr = rxdp->wb.upper.status_error;
958 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
965 * Allocate a new mbuf to replenish the RX ring descriptor.
966 * If the allocation fails:
967 * - arrange for that RX descriptor to be the first one
968 * being parsed the next time the receive function is
969 * invoked [on the same queue].
971 * - Stop parsing the RX ring and return immediately.
973 * This policy does not drop the packet received in the RX
974 * descriptor for which the allocation of a new mbuf failed.
975 * Thus, it allows that packet to be later retrieved if
976 * mbuf have been freed in the mean time.
977 * As a side effect, holding RX descriptors instead of
978 * systematically giving them back to the NIC may lead to
979 * RX ring exhaustion situations.
980 * However, the NIC can gracefully prevent such situations
981 * to happen by sending specific "back-pressure" flow control
982 * frames to its peer(s).
984 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
985 "staterr=0x%x data_len=%u\n",
986 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
987 (unsigned) rx_id, (unsigned) staterr,
988 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
990 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
992 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
993 "queue_id=%u\n", (unsigned) rxq->port_id,
994 (unsigned) rxq->queue_id);
995 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1000 rxe = &sw_ring[rx_id];
1002 if (rx_id == rxq->nb_rx_desc)
1005 /* Prefetch next mbuf while processing current one. */
1006 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1009 * When next RX descriptor is on a cache-line boundary,
1010 * prefetch the next 4 RX descriptors and the next 8 pointers
1013 if ((rx_id & 0x3) == 0) {
1014 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1015 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1019 * Update RX descriptor with the physical address of the new
1020 * data buffer of the new allocated mbuf.
1024 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1025 rxdp->read.hdr_addr = dma;
1026 rxdp->read.pkt_addr = dma;
1029 * Set data length & data buffer address of mbuf.
1031 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1032 rxm->pkt.data_len = data_len;
1033 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1036 * If this is the first buffer of the received packet,
1037 * set the pointer to the first mbuf of the packet and
1038 * initialize its context.
1039 * Otherwise, update the total length and the number of segments
1040 * of the current scattered packet, and update the pointer to
1041 * the last mbuf of the current packet.
1043 if (first_seg == NULL) {
1045 first_seg->pkt.pkt_len = data_len;
1046 first_seg->pkt.nb_segs = 1;
1048 first_seg->pkt.pkt_len = (uint16_t)(first_seg->pkt.pkt_len
1050 first_seg->pkt.nb_segs++;
1051 last_seg->pkt.next = rxm;
1055 * If this is not the last buffer of the received packet,
1056 * update the pointer to the last mbuf of the current scattered
1057 * packet and continue to parse the RX ring.
1059 if (! (staterr & IXGBE_RXDADV_STAT_EOP)) {
1065 * This is the last buffer of the received packet.
1066 * If the CRC is not stripped by the hardware:
1067 * - Subtract the CRC length from the total packet length.
1068 * - If the last buffer only contains the whole CRC or a part
1069 * of it, free the mbuf associated to the last buffer.
1070 * If part of the CRC is also contained in the previous
1071 * mbuf, subtract the length of that CRC part from the
1072 * data length of the previous mbuf.
1074 rxm->pkt.next = NULL;
1075 if (unlikely(rxq->crc_len > 0)) {
1076 first_seg->pkt.pkt_len -= ETHER_CRC_LEN;
1077 if (data_len <= ETHER_CRC_LEN) {
1078 rte_pktmbuf_free_seg(rxm);
1079 first_seg->pkt.nb_segs--;
1080 last_seg->pkt.data_len = (uint16_t)
1081 (last_seg->pkt.data_len -
1082 (ETHER_CRC_LEN - data_len));
1083 last_seg->pkt.next = NULL;
1086 (uint16_t) (data_len - ETHER_CRC_LEN);
1090 * Initialize the first mbuf of the returned packet:
1091 * - RX port identifier,
1092 * - hardware offload data, if any:
1093 * - RSS flag & hash,
1094 * - IP checksum flag,
1095 * - VLAN TCI, if any,
1098 first_seg->pkt.in_port = rxq->port_id;
1101 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1102 * set in the pkt_flags field.
1104 first_seg->pkt.vlan_tci =
1105 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1106 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1107 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1108 pkt_flags = (pkt_flags |
1109 rx_desc_status_to_pkt_flags(staterr));
1110 pkt_flags = (pkt_flags |
1111 rx_desc_error_to_pkt_flags(staterr));
1112 first_seg->ol_flags = pkt_flags;
1114 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1115 first_seg->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1116 else if (pkt_flags & PKT_RX_FDIR) {
1117 first_seg->pkt.hash.fdir.hash =
1118 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1119 & IXGBE_ATR_HASH_MASK);
1120 first_seg->pkt.hash.fdir.id =
1121 rxd.wb.lower.hi_dword.csum_ip.ip_id;
1124 /* Prefetch data of first segment, if configured to do so. */
1125 rte_packet_prefetch(first_seg->pkt.data);
1128 * Store the mbuf address into the next entry of the array
1129 * of returned packets.
1131 rx_pkts[nb_rx++] = first_seg;
1134 * Setup receipt context for a new packet.
1140 * Record index of the next RX descriptor to probe.
1142 rxq->rx_tail = rx_id;
1145 * Save receive context.
1147 rxq->pkt_first_seg = first_seg;
1148 rxq->pkt_last_seg = last_seg;
1151 * If the number of free RX descriptors is greater than the RX free
1152 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1154 * Update the RDT with the value of the last processed RX descriptor
1155 * minus 1, to guarantee that the RDT register is never equal to the
1156 * RDH register, which creates a "full" ring situtation from the
1157 * hardware point of view...
1159 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1160 if (nb_hold > rxq->rx_free_thresh) {
1161 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1162 "nb_hold=%u nb_rx=%u\n",
1163 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1164 (unsigned) rx_id, (unsigned) nb_hold,
1166 rx_id = (uint16_t) ((rx_id == 0) ?
1167 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1168 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1171 rxq->nb_rx_hold = nb_hold;
1175 /*********************************************************************
1177 * Queue management functions
1179 **********************************************************************/
1182 * Rings setup and release.
1184 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1185 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
1186 * also optimize cache line size effect. H/W supports up to cache line size 128.
1188 #define IXGBE_ALIGN 128
1191 * Maximum number of Ring Descriptors.
1193 * Since RDLEN/TDLEN should be multiple of 128 bytes, the number of ring
1194 * descriptors should meet the following condition:
1195 * (num_ring_desc * sizeof(rx/tx descriptor)) % 128 == 0
1197 #define IXGBE_MIN_RING_DESC 64
1198 #define IXGBE_MAX_RING_DESC 4096
1201 * Create memzone for HW rings. malloc can't be used as the physical address is
1202 * needed. If the memzone is already created, then this function returns a ptr
1205 static const struct rte_memzone *
1206 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1207 uint16_t queue_id, uint32_t ring_size, int socket_id)
1209 char z_name[RTE_MEMZONE_NAMESIZE];
1210 const struct rte_memzone *mz;
1212 rte_snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1213 dev->driver->pci_drv.name, ring_name,
1214 dev->data->port_id, queue_id);
1216 mz = rte_memzone_lookup(z_name);
1220 return rte_memzone_reserve_aligned(z_name, (uint64_t) ring_size,
1221 socket_id, 0, IXGBE_ALIGN);
1225 ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1229 if (txq->sw_ring != NULL) {
1230 for (i = 0; i < txq->nb_tx_desc; i++) {
1231 if (txq->sw_ring[i].mbuf != NULL) {
1232 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1233 txq->sw_ring[i].mbuf = NULL;
1240 ixgbe_tx_queue_release(struct igb_tx_queue *txq)
1242 ixgbe_tx_queue_release_mbufs(txq);
1243 rte_free(txq->sw_ring);
1248 ixgbe_dev_tx_queue_alloc(struct rte_eth_dev *dev, uint16_t nb_queues)
1250 uint16_t old_nb_queues = dev->data->nb_tx_queues;
1251 struct igb_tx_queue **txq;
1254 PMD_INIT_FUNC_TRACE();
1256 if (dev->data->tx_queues == NULL) {
1257 dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues",
1258 sizeof(struct igb_tx_queue *) * nb_queues,
1260 if (dev->data->tx_queues == NULL) {
1261 dev->data->nb_tx_queues = 0;
1266 for (i = nb_queues; i < old_nb_queues; i++)
1267 ixgbe_tx_queue_release(dev->data->tx_queues[i]);
1268 txq = rte_realloc(dev->data->tx_queues,
1269 sizeof(struct igb_tx_queue *) * nb_queues,
1274 dev->data->tx_queues = txq;
1275 if (nb_queues > old_nb_queues)
1276 memset(&dev->data->tx_queues[old_nb_queues], 0,
1277 sizeof(struct igb_tx_queue *) *
1278 (nb_queues - old_nb_queues));
1280 dev->data->nb_tx_queues = nb_queues;
1284 /* (Re)set dynamic igb_tx_queue fields to defaults */
1286 ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
1288 struct igb_tx_entry *txe = txq->sw_ring;
1291 /* Zero out HW ring memory */
1292 for (i = 0; i < sizeof(union ixgbe_adv_tx_desc) * txq->nb_tx_desc; i++) {
1293 ((volatile char *)txq->tx_ring)[i] = 0;
1296 /* Initialize SW ring entries */
1297 prev = (uint16_t) (txq->nb_tx_desc - 1);
1298 for (i = 0; i < txq->nb_tx_desc; i++) {
1299 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1300 txd->wb.status = IXGBE_TXD_STAT_DD;
1303 txe[prev].next_id = i;
1308 txq->nb_tx_used = 0;
1310 * Always allow 1 descriptor to be un-allocated to avoid
1311 * a H/W race condition
1313 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1314 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1316 memset((void*)&txq->ctx_cache, 0,
1317 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1321 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1324 unsigned int socket_id,
1325 const struct rte_eth_txconf *tx_conf)
1327 const struct rte_memzone *tz;
1328 struct igb_tx_queue *txq;
1329 struct ixgbe_hw *hw;
1330 uint16_t tx_rs_thresh, tx_free_thresh;
1332 PMD_INIT_FUNC_TRACE();
1333 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1336 * Validate number of transmit descriptors.
1337 * It must not exceed hardware maximum, and must be multiple
1340 if (((nb_desc * sizeof(union ixgbe_adv_tx_desc)) % IXGBE_ALIGN) != 0 ||
1341 (nb_desc > IXGBE_MAX_RING_DESC) ||
1342 (nb_desc < IXGBE_MIN_RING_DESC)) {
1347 * The following two parameters control the setting of the RS bit on
1348 * transmit descriptors.
1349 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
1350 * descriptors have been used.
1351 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
1352 * descriptors are used or if the number of descriptors required
1353 * to transmit a packet is greater than the number of free TX
1355 * The following constraints must be satisfied:
1356 * tx_rs_thresh must be greater than 0.
1357 * tx_rs_thresh must be less than the size of the ring minus 2.
1358 * tx_rs_thresh must be less than or equal to tx_free_thresh.
1359 * tx_free_thresh must be greater than 0.
1360 * tx_free_thresh must be less than the size of the ring minus 3.
1361 * One descriptor in the TX ring is used as a sentinel to avoid a
1362 * H/W race condition, hence the maximum threshold constraints.
1363 * When set to zero use default values.
1365 tx_rs_thresh = (tx_conf->tx_rs_thresh) ?
1366 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH;
1367 tx_free_thresh = (tx_conf->tx_free_thresh) ?
1368 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH;
1369 if (tx_rs_thresh >= (nb_desc - 2)) {
1371 "tx_rs_thresh must be less than the "
1372 "number of TX descriptors minus 2. "
1373 "(tx_rs_thresh=%u port=%d queue=%d)",
1374 tx_rs_thresh, dev->data->port_id, queue_idx);
1377 if (tx_free_thresh >= (nb_desc - 3)) {
1379 "tx_rs_thresh must be less than the "
1380 "tx_free_thresh must be less than the "
1381 "number of TX descriptors minus 3. "
1382 "(tx_free_thresh=%u port=%d queue=%d)",
1383 tx_free_thresh, dev->data->port_id, queue_idx);
1386 if (tx_rs_thresh > tx_free_thresh) {
1388 "tx_rs_thresh must be less than or equal to "
1390 "(tx_free_thresh=%u tx_rs_thresh=%u "
1391 "port=%d queue=%d)",
1392 tx_free_thresh, tx_rs_thresh,
1393 dev->data->port_id, queue_idx);
1398 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
1399 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
1400 * by the NIC and all descriptors are written back after the NIC
1401 * accumulates WTHRESH descriptors.
1403 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
1405 "TX WTHRESH should be set to 0 if "
1406 "tx_rs_thresh is greater than 1. "
1407 "TX WTHRESH will be set to 0. "
1408 "(tx_rs_thresh=%u port=%d queue=%d)",
1410 dev->data->port_id, queue_idx);
1414 /* Free memory prior to re-allocation if needed... */
1415 if (dev->data->tx_queues[queue_idx] != NULL)
1416 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
1418 /* First allocate the tx queue data structure */
1419 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1425 * Allocate TX ring hardware descriptors. A memzone large enough to
1426 * handle the maximum ring size is allocated in order to allow for
1427 * resizing in later calls to the queue setup function.
1429 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1430 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
1433 ixgbe_tx_queue_release(txq);
1437 txq->nb_tx_desc = nb_desc;
1438 txq->tx_rs_thresh = tx_rs_thresh;
1439 txq->tx_free_thresh = tx_free_thresh;
1440 txq->pthresh = tx_conf->tx_thresh.pthresh;
1441 txq->hthresh = tx_conf->tx_thresh.hthresh;
1442 txq->wthresh = tx_conf->tx_thresh.wthresh;
1443 txq->queue_id = queue_idx;
1444 txq->port_id = dev->data->port_id;
1447 * Modification to set VFTDT for virtual function if vf is detected
1449 if (hw->mac.type == ixgbe_mac_82599_vf)
1450 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
1452 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(queue_idx));
1454 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1455 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
1457 /* Allocate software ring */
1458 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1459 sizeof(struct igb_tx_entry) * nb_desc,
1461 if (txq->sw_ring == NULL) {
1462 ixgbe_tx_queue_release(txq);
1465 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1466 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1468 ixgbe_reset_tx_queue(txq);
1470 dev->data->tx_queues[queue_idx] = txq;
1472 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1478 ixgbe_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1482 if (rxq->sw_ring != NULL) {
1483 for (i = 0; i < rxq->nb_rx_desc; i++) {
1484 if (rxq->sw_ring[i].mbuf != NULL) {
1485 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1486 rxq->sw_ring[i].mbuf = NULL;
1493 ixgbe_rx_queue_release(struct igb_rx_queue *rxq)
1495 ixgbe_rx_queue_release_mbufs(rxq);
1496 rte_free(rxq->sw_ring);
1501 ixgbe_dev_rx_queue_alloc(struct rte_eth_dev *dev, uint16_t nb_queues)
1503 uint16_t old_nb_queues = dev->data->nb_rx_queues;
1504 struct igb_rx_queue **rxq;
1507 PMD_INIT_FUNC_TRACE();
1509 if (dev->data->rx_queues == NULL) {
1510 dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues",
1511 sizeof(struct igb_rx_queue *) * nb_queues,
1513 if (dev->data->rx_queues == NULL) {
1514 dev->data->nb_rx_queues = 0;
1519 for (i = nb_queues; i < old_nb_queues; i++)
1520 ixgbe_rx_queue_release(dev->data->rx_queues[i]);
1521 rxq = rte_realloc(dev->data->rx_queues,
1522 sizeof(struct igb_rx_queue *) * nb_queues,
1527 dev->data->rx_queues = rxq;
1528 if (nb_queues > old_nb_queues)
1529 memset(&dev->data->rx_queues[old_nb_queues], 0,
1530 sizeof(struct igb_rx_queue *) *
1531 (nb_queues - old_nb_queues));
1533 dev->data->nb_rx_queues = nb_queues;
1537 /* (Re)set dynamic igb_rx_queue fields to defaults */
1539 ixgbe_reset_rx_queue(struct igb_rx_queue *rxq)
1543 /* Zero out HW ring memory */
1544 for (i = 0; i < rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc); i++) {
1545 ((volatile char *)rxq->rx_ring)[i] = 0;
1549 rxq->nb_rx_hold = 0;
1550 rxq->pkt_first_seg = NULL;
1551 rxq->pkt_last_seg = NULL;
1555 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
1558 unsigned int socket_id,
1559 const struct rte_eth_rxconf *rx_conf,
1560 struct rte_mempool *mp)
1562 const struct rte_memzone *rz;
1563 struct igb_rx_queue *rxq;
1564 struct ixgbe_hw *hw;
1566 PMD_INIT_FUNC_TRACE();
1567 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1570 * Validate number of receive descriptors.
1571 * It must not exceed hardware maximum, and must be multiple
1574 if (((nb_desc * sizeof(union ixgbe_adv_rx_desc)) % IXGBE_ALIGN) != 0 ||
1575 (nb_desc > IXGBE_MAX_RING_DESC) ||
1576 (nb_desc < IXGBE_MIN_RING_DESC)) {
1580 /* Free memory prior to re-allocation if needed... */
1581 if (dev->data->rx_queues[queue_idx] != NULL)
1582 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
1584 /* First allocate the rx queue data structure */
1585 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1590 rxq->nb_rx_desc = nb_desc;
1591 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1592 rxq->queue_id = queue_idx;
1593 rxq->port_id = dev->data->port_id;
1594 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1598 * Allocate TX ring hardware descriptors. A memzone large enough to
1599 * handle the maximum ring size is allocated in order to allow for
1600 * resizing in later calls to the queue setup function.
1602 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
1603 IXGBE_MAX_RING_DESC * sizeof(union ixgbe_adv_rx_desc),
1606 ixgbe_rx_queue_release(rxq);
1610 * Modified to setup VFRDT for Virtual Function
1612 if (hw->mac.type == ixgbe_mac_82599_vf)
1613 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
1615 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(queue_idx));
1617 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
1618 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
1620 /* Allocate software ring */
1621 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1622 sizeof(struct igb_rx_entry) * nb_desc,
1624 if (rxq->sw_ring == NULL) {
1625 ixgbe_rx_queue_release(rxq);
1628 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1629 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1631 dev->data->rx_queues[queue_idx] = rxq;
1633 ixgbe_reset_rx_queue(rxq);
1639 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
1643 PMD_INIT_FUNC_TRACE();
1645 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1646 struct igb_tx_queue *txq = dev->data->tx_queues[i];
1647 ixgbe_tx_queue_release_mbufs(txq);
1648 ixgbe_reset_tx_queue(txq);
1651 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1652 struct igb_rx_queue *rxq = dev->data->rx_queues[i];
1653 ixgbe_rx_queue_release_mbufs(rxq);
1654 ixgbe_reset_rx_queue(rxq);
1658 /*********************************************************************
1660 * Device RX/TX init functions
1662 **********************************************************************/
1665 * Receive Side Scaling (RSS)
1666 * See section 7.1.2.8 in the following document:
1667 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
1670 * The source and destination IP addresses of the IP header and the source
1671 * and destination ports of TCP/UDP headers, if any, of received packets are
1672 * hashed against a configurable random key to compute a 32-bit RSS hash result.
1673 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1674 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1675 * RSS output index which is used as the RX queue index where to store the
1677 * The following output is supplied in the RX write-back descriptor:
1678 * - 32-bit result of the Microsoft RSS hash function,
1679 * - 4-bit RSS type field.
1683 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
1684 * Used as the default key.
1686 static uint8_t rss_intel_key[40] = {
1687 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1688 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1689 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1690 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1691 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1695 ixgbe_rss_disable(struct rte_eth_dev *dev)
1697 struct ixgbe_hw *hw;
1700 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1701 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
1702 mrqc &= ~IXGBE_MRQC_RSSEN;
1703 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
1707 ixgbe_rss_configure(struct rte_eth_dev *dev)
1709 struct ixgbe_hw *hw;
1718 PMD_INIT_FUNC_TRACE();
1719 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1721 rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1722 if (rss_hf == 0) { /* Disable RSS */
1723 ixgbe_rss_disable(dev);
1726 hash_key = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1727 if (hash_key == NULL)
1728 hash_key = rss_intel_key; /* Default hash key */
1730 /* Fill in RSS hash key */
1731 for (i = 0; i < 10; i++) {
1732 rss_key = hash_key[(i * 4)];
1733 rss_key |= hash_key[(i * 4) + 1] << 8;
1734 rss_key |= hash_key[(i * 4) + 2] << 16;
1735 rss_key |= hash_key[(i * 4) + 3] << 24;
1736 IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RSSRK(0), i, rss_key);
1739 /* Fill in redirection table */
1741 for (i = 0, j = 0; i < 128; i++, j++) {
1742 if (j == dev->data->nb_rx_queues) j = 0;
1743 reta = (reta << 8) | j;
1745 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), rte_bswap32(reta));
1748 /* Set configured hashing functions in MRQC register */
1749 mrqc = IXGBE_MRQC_RSSEN; /* RSS enable */
1750 if (rss_hf & ETH_RSS_IPV4)
1751 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
1752 if (rss_hf & ETH_RSS_IPV4_TCP)
1753 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
1754 if (rss_hf & ETH_RSS_IPV6)
1755 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
1756 if (rss_hf & ETH_RSS_IPV6_EX)
1757 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
1758 if (rss_hf & ETH_RSS_IPV6_TCP)
1759 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
1760 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1761 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
1762 if (rss_hf & ETH_RSS_IPV4_UDP)
1763 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
1764 if (rss_hf & ETH_RSS_IPV6_UDP)
1765 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
1766 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1767 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
1768 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
1771 #define NUM_VFTA_REGISTERS 128
1772 #define NIC_RX_BUFFER_SIZE 0x200
1775 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
1777 struct rte_eth_vmdq_dcb_conf *cfg;
1778 struct ixgbe_hw *hw;
1779 enum rte_eth_nb_pools num_pools;
1780 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
1782 uint8_t nb_tcs; /* number of traffic classes */
1785 PMD_INIT_FUNC_TRACE();
1786 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1787 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
1788 num_pools = cfg->nb_queue_pools;
1789 /* Check we have a valid number of pools */
1790 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
1791 ixgbe_rss_disable(dev);
1794 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
1795 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
1799 * split rx buffer up into sections, each for 1 traffic class
1801 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
1802 for (i = 0 ; i < nb_tcs; i++) {
1803 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
1804 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
1805 /* clear 10 bits. */
1806 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
1807 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
1809 /* zero alloc all unused TCs */
1810 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
1811 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
1812 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
1813 /* clear 10 bits. */
1814 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
1817 /* MRQC: enable vmdq and dcb */
1818 mrqc = ((num_pools == ETH_16_POOLS) ? \
1819 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
1820 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
1822 /* PFVTCTL: turn on virtualisation and set the default pool */
1823 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
1824 if (cfg->enable_default_pool) {
1825 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
1827 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
1829 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
1831 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
1833 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
1835 * mapping is done with 3 bits per priority,
1836 * so shift by i*3 each time
1838 queue_mapping |= ((cfg->dcb_queue[i] & 0x07) << (i * 3));
1840 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
1842 /* RTRPCS: DCB related */
1843 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
1845 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
1846 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
1847 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
1848 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
1850 /* VFTA - enable all vlan filters */
1851 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
1852 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
1855 /* VFRE: pool enabling for receive - 16 or 32 */
1856 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
1857 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
1860 * MPSAR - allow pools to read specific mac addresses
1861 * In this case, all pools should be able to read from mac addr 0
1863 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
1864 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
1866 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
1867 for (i = 0; i < cfg->nb_pool_maps; i++) {
1868 /* set vlan id in VF register and set the valid bit */
1869 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
1870 (cfg->pool_map[i].vlan_id & 0xFFF)));
1872 * Put the allowed pools in VFB reg. As we only have 16 or 32
1873 * pools, we only need to use the first half of the register
1876 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
1881 ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
1883 struct igb_rx_entry *rxe = rxq->sw_ring;
1887 /* Initialize software ring entries */
1888 for (i = 0; i < rxq->nb_rx_desc; i++) {
1889 volatile union ixgbe_adv_rx_desc *rxd;
1890 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
1892 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u\n",
1893 (unsigned) rxq->queue_id);
1897 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
1898 rxd = &rxq->rx_ring[i];
1899 rxd->read.hdr_addr = dma_addr;
1900 rxd->read.pkt_addr = dma_addr;
1908 * Initializes Receive Unit.
1911 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
1913 struct ixgbe_hw *hw;
1914 struct igb_rx_queue *rxq;
1915 struct rte_pktmbuf_pool_private *mbp_priv;
1928 PMD_INIT_FUNC_TRACE();
1929 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1932 * Make sure receives are disabled while setting
1933 * up the RX context (registers, descriptor rings, etc.).
1935 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1936 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
1938 /* Enable receipt of broadcasted frames */
1939 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
1940 fctrl |= IXGBE_FCTRL_BAM;
1941 fctrl |= IXGBE_FCTRL_DPF;
1942 fctrl |= IXGBE_FCTRL_PMCF;
1943 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
1946 * Configure CRC stripping, if any.
1948 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
1949 if (dev->data->dev_conf.rxmode.hw_strip_crc)
1950 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
1952 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
1955 * Configure jumbo frame support, if any.
1957 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
1958 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
1959 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
1960 maxfrs &= 0x0000FFFF;
1961 maxfrs |= (dev->data->dev_conf.rxmode.max_rx_pkt_len << 16);
1962 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
1964 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
1966 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
1968 /* Setup RX queues */
1969 dev->rx_pkt_burst = ixgbe_recv_pkts;
1970 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1971 rxq = dev->data->rx_queues[i];
1973 /* Allocate buffers for descriptor rings */
1974 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
1976 ixgbe_dev_clear_queues(dev);
1981 * Reset crc_len in case it was changed after queue setup by a
1982 * call to configure.
1984 rxq->crc_len = (uint8_t)
1985 ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1988 /* Setup the Base and Length of the Rx Descriptor Rings */
1989 bus_addr = rxq->rx_ring_phys_addr;
1990 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
1991 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
1992 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i),
1993 (uint32_t)(bus_addr >> 32));
1994 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
1995 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
1996 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
1997 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
1999 /* Configure the SRRCTL register */
2000 #ifdef RTE_HEADER_SPLIT_ENABLE
2002 * Configure Header Split
2004 if (dev->data->dev_conf.rxmode.header_split) {
2005 if (hw->mac.type == ixgbe_mac_82599EB) {
2006 /* Must setup the PSRTYPE register */
2008 psrtype = IXGBE_PSRTYPE_TCPHDR |
2009 IXGBE_PSRTYPE_UDPHDR |
2010 IXGBE_PSRTYPE_IPV4HDR |
2011 IXGBE_PSRTYPE_IPV6HDR;
2012 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(i), psrtype);
2014 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
2015 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
2016 IXGBE_SRRCTL_BSIZEHDR_MASK);
2017 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2020 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2023 * Configure the RX buffer size in the BSIZEPACKET field of
2024 * the SRRCTL register of the queue.
2025 * The value is in 1 KB resolution. Valid values can be from
2028 mbp_priv = (struct rte_pktmbuf_pool_private *)
2029 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
2030 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
2031 RTE_PKTMBUF_HEADROOM);
2032 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
2033 IXGBE_SRRCTL_BSIZEPKT_MASK);
2034 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2036 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
2037 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
2038 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
2039 dev->data->scattered_rx = 1;
2040 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
2045 * Configure RSS if device configured with multiple RX queues.
2047 if (hw->mac.type == ixgbe_mac_82599EB) {
2048 if (dev->data->nb_rx_queues > 1)
2049 switch (dev->data->dev_conf.rxmode.mq_mode) {
2051 ixgbe_rss_configure(dev);
2055 ixgbe_vmdq_dcb_configure(dev);
2058 default: ixgbe_rss_disable(dev);
2061 ixgbe_rss_disable(dev);
2065 * Setup the Checksum Register.
2066 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
2067 * Enable IP/L4 checkum computation by hardware if requested to do so.
2069 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2070 rxcsum |= IXGBE_RXCSUM_PCSD;
2071 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
2072 rxcsum |= IXGBE_RXCSUM_IPPCSE;
2074 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
2076 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2078 if (hw->mac.type == ixgbe_mac_82599EB) {
2079 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2080 if (dev->data->dev_conf.rxmode.hw_strip_crc)
2081 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2083 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
2084 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2085 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2092 * Initializes Transmit Unit.
2095 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
2097 struct ixgbe_hw *hw;
2098 struct igb_tx_queue *txq;
2105 PMD_INIT_FUNC_TRACE();
2106 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2108 /* Enable TX CRC (checksum offload requirement) */
2109 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2110 hlreg0 |= IXGBE_HLREG0_TXCRCEN;
2111 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
2113 /* Setup the Base and Length of the Tx Descriptor Rings */
2114 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2115 txq = dev->data->tx_queues[i];
2117 bus_addr = txq->tx_ring_phys_addr;
2118 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
2119 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
2120 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i),
2121 (uint32_t)(bus_addr >> 32));
2122 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2123 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
2124 /* Setup the HW Tx Head and TX Tail descriptor pointers */
2125 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2126 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2129 * Disable Tx Head Writeback RO bit, since this hoses
2130 * bookkeeping if things aren't delivered in order.
2132 switch (hw->mac.type) {
2133 case ixgbe_mac_82598EB:
2134 txctrl = IXGBE_READ_REG(hw,
2135 IXGBE_DCA_TXCTRL(i));
2136 txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN;
2137 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i),
2141 case ixgbe_mac_82599EB:
2142 case ixgbe_mac_X540:
2144 txctrl = IXGBE_READ_REG(hw,
2145 IXGBE_DCA_TXCTRL_82599(i));
2146 txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN;
2147 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i),
2153 if (hw->mac.type != ixgbe_mac_82598EB) {
2154 /* disable arbiter before setting MTQC */
2155 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2156 rttdcs |= IXGBE_RTTDCS_ARBDIS;
2157 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2159 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2161 /* re-enable arbiter */
2162 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2163 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2168 * Start Transmit and Receive Units.
2171 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
2173 struct ixgbe_hw *hw;
2174 struct igb_tx_queue *txq;
2175 struct igb_rx_queue *rxq;
2183 PMD_INIT_FUNC_TRACE();
2184 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2186 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2187 txq = dev->data->tx_queues[i];
2188 /* Setup Transmit Threshold Registers */
2189 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
2190 txdctl |= txq->pthresh & 0x7F;
2191 txdctl |= ((txq->hthresh & 0x7F) << 8);
2192 txdctl |= ((txq->wthresh & 0x7F) << 16);
2193 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
2196 if (hw->mac.type != ixgbe_mac_82598EB) {
2197 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2198 dmatxctl |= IXGBE_DMATXCTL_TE;
2199 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2202 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2203 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
2204 txdctl |= IXGBE_TXDCTL_ENABLE;
2205 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
2207 /* Wait until TX Enable ready */
2208 if (hw->mac.type == ixgbe_mac_82599EB) {
2212 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
2213 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
2215 PMD_INIT_LOG(ERR, "Could not enable "
2216 "Tx Queue %d\n", i);
2219 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2220 rxq = dev->data->rx_queues[i];
2221 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2222 rxdctl |= IXGBE_RXDCTL_ENABLE;
2223 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
2225 /* Wait until RX Enable ready */
2229 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2230 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
2232 PMD_INIT_LOG(ERR, "Could not enable "
2233 "Rx Queue %d\n", i);
2235 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), rxq->nb_rx_desc - 1);
2238 /* Enable Receive engine */
2239 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
2240 if (hw->mac.type == ixgbe_mac_82598EB)
2241 rxctrl |= IXGBE_RXCTRL_DMBYPS;
2242 rxctrl |= IXGBE_RXCTRL_RXEN;
2243 hw->mac.ops.enable_rx_dma(hw, rxctrl);
2248 * [VF] Initializes Receive Unit.
2251 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
2253 struct ixgbe_hw *hw;
2254 struct igb_rx_queue *rxq;
2255 struct rte_pktmbuf_pool_private *mbp_priv;
2262 PMD_INIT_FUNC_TRACE();
2263 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2265 /* Setup RX queues */
2266 dev->rx_pkt_burst = ixgbe_recv_pkts;
2267 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2268 rxq = dev->data->rx_queues[i];
2270 /* Allocate buffers for descriptor rings */
2271 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
2275 /* Setup the Base and Length of the Rx Descriptor Rings */
2276 bus_addr = rxq->rx_ring_phys_addr;
2278 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
2279 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
2280 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
2281 (uint32_t)(bus_addr >> 32));
2282 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
2283 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
2284 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
2285 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
2288 /* Configure the SRRCTL register */
2289 #ifdef RTE_HEADER_SPLIT_ENABLE
2291 * Configure Header Split
2293 if (dev->data->dev_conf.rxmode.header_split) {
2295 /* Must setup the PSRTYPE register */
2297 psrtype = IXGBE_PSRTYPE_TCPHDR |
2298 IXGBE_PSRTYPE_UDPHDR |
2299 IXGBE_PSRTYPE_IPV4HDR |
2300 IXGBE_PSRTYPE_IPV6HDR;
2302 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
2304 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
2305 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
2306 IXGBE_SRRCTL_BSIZEHDR_MASK);
2307 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2310 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2313 * Configure the RX buffer size in the BSIZEPACKET field of
2314 * the SRRCTL register of the queue.
2315 * The value is in 1 KB resolution. Valid values can be from
2318 mbp_priv = (struct rte_pktmbuf_pool_private *)
2319 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
2320 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
2321 RTE_PKTMBUF_HEADROOM);
2322 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
2323 IXGBE_SRRCTL_BSIZEPKT_MASK);
2326 * VF modification to write virtual function SRRCTL register
2328 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
2330 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
2331 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
2332 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
2333 dev->data->scattered_rx = 1;
2334 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
2341 * [VF] Initializes Transmit Unit.
2344 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
2346 struct ixgbe_hw *hw;
2347 struct igb_tx_queue *txq;
2352 PMD_INIT_FUNC_TRACE();
2353 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2355 /* Setup the Base and Length of the Tx Descriptor Rings */
2356 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2357 txq = dev->data->tx_queues[i];
2358 bus_addr = txq->tx_ring_phys_addr;
2359 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
2360 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
2361 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
2362 (uint32_t)(bus_addr >> 32));
2363 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
2364 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
2365 /* Setup the HW Tx Head and TX Tail descriptor pointers */
2366 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
2367 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
2370 * Disable Tx Head Writeback RO bit, since this hoses
2371 * bookkeeping if things aren't delivered in order.
2373 txctrl = IXGBE_READ_REG(hw,
2374 IXGBE_VFDCA_TXCTRL(i));
2375 txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN;
2376 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
2382 * [VF] Start Transmit and Receive Units.
2385 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
2387 struct ixgbe_hw *hw;
2388 struct igb_tx_queue *txq;
2389 struct igb_rx_queue *rxq;
2395 PMD_INIT_FUNC_TRACE();
2396 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2398 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2399 txq = dev->data->tx_queues[i];
2400 /* Setup Transmit Threshold Registers */
2401 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
2402 txdctl |= txq->pthresh & 0x7F;
2403 txdctl |= ((txq->hthresh & 0x7F) << 8);
2404 txdctl |= ((txq->wthresh & 0x7F) << 16);
2405 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
2408 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2410 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
2411 txdctl |= IXGBE_TXDCTL_ENABLE;
2412 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
2415 /* Wait until TX Enable ready */
2418 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
2419 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
2421 PMD_INIT_LOG(ERR, "Could not enable "
2422 "Tx Queue %d\n", i);
2424 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2426 rxq = dev->data->rx_queues[i];
2428 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
2429 rxdctl |= IXGBE_RXDCTL_ENABLE;
2430 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
2432 /* Wait until RX Enable ready */
2436 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
2437 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
2439 PMD_INIT_LOG(ERR, "Could not enable "
2440 "Rx Queue %d\n", i);
2442 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);