4 * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
47 #include <rte_byteorder.h>
48 #include <rte_common.h>
49 #include <rte_cycles.h>
51 #include <rte_debug.h>
52 #include <rte_interrupts.h>
54 #include <rte_memory.h>
55 #include <rte_memzone.h>
56 #include <rte_launch.h>
57 #include <rte_tailq.h>
59 #include <rte_per_lcore.h>
60 #include <rte_lcore.h>
61 #include <rte_atomic.h>
62 #include <rte_branch_prediction.h>
64 #include <rte_mempool.h>
65 #include <rte_malloc.h>
67 #include <rte_ether.h>
68 #include <rte_ethdev.h>
69 #include <rte_prefetch.h>
73 #include <rte_string_fns.h>
74 #include <rte_errno.h>
76 #include "ixgbe_logs.h"
77 #include "ixgbe/ixgbe_api.h"
78 #include "ixgbe/ixgbe_vf.h"
79 #include "ixgbe_ethdev.h"
81 static inline struct rte_mbuf *
82 rte_rxmbuf_alloc(struct rte_mempool *mp)
86 m = __rte_mbuf_raw_alloc(mp);
87 __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
91 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
92 (uint64_t) ((mb)->buf_physaddr + (uint64_t)((char *)((mb)->pkt.data) - \
93 (char *)(mb)->buf_addr))
95 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
96 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
99 * Structure associated with each descriptor of the RX ring of a RX queue.
101 struct igb_rx_entry {
102 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
106 * Structure associated with each descriptor of the TX ring of a TX queue.
108 struct igb_tx_entry {
109 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
110 uint16_t next_id; /**< Index of next descriptor in ring. */
111 uint16_t last_id; /**< Index of last scattered descriptor. */
115 * Structure associated with each RX queue.
117 struct igb_rx_queue {
118 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
119 volatile union ixgbe_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
120 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
121 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
122 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
123 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
124 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
125 uint16_t nb_rx_desc; /**< number of RX descriptors. */
126 uint16_t rx_tail; /**< current value of RDT register. */
127 uint16_t nb_rx_hold; /**< number of held free RX desc. */
128 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
129 uint16_t queue_id; /**< RX queue index. */
130 uint8_t port_id; /**< Device port identifier. */
131 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
135 * IXGBE CTX Constants
137 enum ixgbe_advctx_num {
138 IXGBE_CTX_0 = 0, /**< CTX0 */
139 IXGBE_CTX_1 = 1, /**< CTX1 */
140 IXGBE_CTX_NUM = 2, /**< CTX NUMBER */
144 * Structure to check if new context need be built
146 struct ixgbe_advctx_info {
147 uint16_t flags; /**< ol_flags for context build. */
148 uint32_t cmp_mask; /**< compare mask for vlan_macip_lens */
149 uint32_t vlan_macip_lens; /**< vlan, mac ip length. */
153 * Structure associated with each TX queue.
155 struct igb_tx_queue {
156 /** TX ring virtual address. */
157 volatile union ixgbe_adv_tx_desc *tx_ring;
158 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
159 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
160 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
161 uint16_t nb_tx_desc; /**< number of TX descriptors. */
162 uint16_t tx_tail; /**< current value of TDT reg. */
163 uint16_t tx_free_thresh;/**< minimum TX before freeing. */
164 /** Number of TX descriptors to use before RS bit is set. */
165 uint16_t tx_rs_thresh;
166 /** Number of TX descriptors used since RS bit was set. */
168 /** Index to last TX descriptor to have been cleaned. */
169 uint16_t last_desc_cleaned;
170 /** Total number of TX descriptors ready to be allocated. */
172 uint16_t queue_id; /**< TX queue index. */
173 uint8_t port_id; /**< Device port identifier. */
174 uint8_t pthresh; /**< Prefetch threshold register. */
175 uint8_t hthresh; /**< Host threshold register. */
176 uint8_t wthresh; /**< Write-back threshold reg. */
177 uint32_t ctx_curr; /**< Hardware context states. */
178 /** Hardware context0 history. */
179 struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];
184 #define RTE_PMD_USE_PREFETCH
187 #ifdef RTE_PMD_USE_PREFETCH
189 * Prefetch a cache line into all cache levels.
191 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
193 #define rte_ixgbe_prefetch(p) do {} while(0)
196 #ifdef RTE_PMD_PACKET_PREFETCH
197 #define rte_packet_prefetch(p) rte_prefetch1(p)
199 #define rte_packet_prefetch(p) do {} while(0)
202 /*********************************************************************
206 **********************************************************************/
208 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
209 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
210 uint16_t ol_flags, uint32_t vlan_macip_lens)
212 uint32_t type_tucmd_mlhl;
213 uint32_t mss_l4len_idx;
217 ctx_idx = txq->ctx_curr;
221 if (ol_flags & PKT_TX_VLAN_PKT) {
222 cmp_mask |= TX_VLAN_CMP_MASK;
225 if (ol_flags & PKT_TX_IP_CKSUM) {
226 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
227 cmp_mask |= TX_MAC_LEN_CMP_MASK;
230 /* Specify which HW CTX to upload. */
231 mss_l4len_idx = (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
232 switch (ol_flags & PKT_TX_L4_MASK) {
233 case PKT_TX_UDP_CKSUM:
234 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
235 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
236 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
237 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
239 case PKT_TX_TCP_CKSUM:
240 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
241 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
242 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
243 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
245 case PKT_TX_SCTP_CKSUM:
246 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
247 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
248 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
249 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
252 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
253 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
257 txq->ctx_cache[ctx_idx].flags = ol_flags;
258 txq->ctx_cache[ctx_idx].cmp_mask = cmp_mask;
259 txq->ctx_cache[ctx_idx].vlan_macip_lens = vlan_macip_lens & cmp_mask;
261 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
262 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
263 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
264 ctx_txd->seqnum_seed = 0;
268 * Check which hardware context can be used. Use the existing match
269 * or create a new context descriptor.
271 static inline uint32_t
272 what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
273 uint32_t vlan_macip_lens)
275 /* If match with the current used context */
276 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
277 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens ==
278 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
279 return txq->ctx_curr;
282 /* What if match with the next context */
284 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
285 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens ==
286 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
287 return txq->ctx_curr;
290 /* Mismatch, use the previous context */
291 return (IXGBE_CTX_NUM);
294 static inline uint32_t
295 tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
297 static const uint32_t l4_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_TXSM};
298 static const uint32_t l3_olinfo[2] = {0, IXGBE_ADVTXD_POPTS_IXSM};
301 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
302 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
306 static inline uint32_t
307 tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
309 static const uint32_t vlan_cmd[2] = {0, IXGBE_ADVTXD_DCMD_VLE};
310 return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
313 /* Default RS bit threshold values */
314 #ifndef DEFAULT_TX_RS_THRESH
315 #define DEFAULT_TX_RS_THRESH 32
317 #ifndef DEFAULT_TX_FREE_THRESH
318 #define DEFAULT_TX_FREE_THRESH 32
321 /* Reset transmit descriptors after they have been used */
323 ixgbe_xmit_cleanup(struct igb_tx_queue *txq)
325 struct igb_tx_entry *sw_ring = txq->sw_ring;
326 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
327 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
328 uint16_t nb_tx_desc = txq->nb_tx_desc;
329 uint16_t desc_to_clean_to;
330 uint16_t nb_tx_to_clean;
332 /* Determine the last descriptor needing to be cleaned */
333 desc_to_clean_to = last_desc_cleaned + txq->tx_rs_thresh;
334 if (desc_to_clean_to >= nb_tx_desc)
335 desc_to_clean_to = desc_to_clean_to - nb_tx_desc;
337 /* Check to make sure the last descriptor to clean is done */
338 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
339 if (! (txr[desc_to_clean_to].wb.status & IXGBE_TXD_STAT_DD))
341 PMD_TX_FREE_LOG(DEBUG,
342 "TX descriptor %4u is not done"
343 "(port=%d queue=%d)",
345 txq->port_id, txq->queue_id);
346 /* Failed to clean any descriptors, better luck next time */
350 /* Figure out how many descriptors will be cleaned */
351 if (last_desc_cleaned > desc_to_clean_to)
352 nb_tx_to_clean = ((nb_tx_desc - last_desc_cleaned) +
355 nb_tx_to_clean = desc_to_clean_to - last_desc_cleaned;
357 PMD_TX_FREE_LOG(DEBUG,
358 "Cleaning %4u TX descriptors: %4u to %4u "
359 "(port=%d queue=%d)",
360 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
361 txq->port_id, txq->queue_id);
364 * The last descriptor to clean is done, so that means all the
365 * descriptors from the last descriptor that was cleaned
366 * up to the last descriptor with the RS bit set
367 * are done. Only reset the threshold descriptor.
369 txr[desc_to_clean_to].wb.status = 0;
371 /* Update the txq to reflect the last descriptor that was cleaned */
372 txq->last_desc_cleaned = desc_to_clean_to;
373 txq->nb_tx_free += nb_tx_to_clean;
380 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
383 struct igb_tx_queue *txq;
384 struct igb_tx_entry *sw_ring;
385 struct igb_tx_entry *txe, *txn;
386 volatile union ixgbe_adv_tx_desc *txr;
387 volatile union ixgbe_adv_tx_desc *txd;
388 struct rte_mbuf *tx_pkt;
389 struct rte_mbuf *m_seg;
390 uint64_t buf_dma_addr;
391 uint32_t olinfo_status;
392 uint32_t cmd_type_len;
401 uint32_t vlan_macip_lens;
406 sw_ring = txq->sw_ring;
408 tx_id = txq->tx_tail;
409 txe = &sw_ring[tx_id];
411 /* Determine if the descriptor ring needs to be cleaned. */
412 if ((txq->nb_tx_desc - txq->nb_tx_free) > txq->tx_free_thresh) {
413 ixgbe_xmit_cleanup(txq);
417 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
420 pkt_len = tx_pkt->pkt.pkt_len;
422 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
425 * Determine how many (if any) context descriptors
426 * are needed for offload functionality.
428 ol_flags = tx_pkt->ol_flags;
429 vlan_macip_lens = tx_pkt->pkt.vlan_tci << 16 |
430 tx_pkt->pkt.l2_len << IXGBE_ADVTXD_MACLEN_SHIFT |
433 /* If hardware offload required */
434 tx_ol_req = ol_flags & PKT_TX_OFFLOAD_MASK;
436 /* If new context need be built or reuse the exist ctx. */
437 ctx = what_advctx_update(txq, tx_ol_req, vlan_macip_lens);
438 /* Only allocate context descriptor if required*/
439 new_ctx = (ctx == IXGBE_CTX_NUM);
444 * Keep track of how many descriptors are used this loop
445 * This will always be the number of segments + the number of
446 * Context descriptors required to transmit the packet
448 nb_used = tx_pkt->pkt.nb_segs + new_ctx;
451 * The number of descriptors that must be allocated for a
452 * packet is the number of segments of that packet, plus 1
453 * Context Descriptor for the hardware offload, if any.
454 * Determine the last TX descriptor to allocate in the TX ring
455 * for the packet, starting from the current position (tx_id)
458 tx_last = (uint16_t) (tx_id + nb_used - 1);
461 if (tx_last >= txq->nb_tx_desc)
462 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
464 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
465 " tx_first=%u tx_last=%u\n",
466 (unsigned) txq->port_id,
467 (unsigned) txq->queue_id,
473 * Make sure there are enough TX descriptors available to
474 * transmit the entire packet.
475 * nb_used better be less than or equal to txq->tx_rs_thresh
477 if (nb_used > txq->nb_tx_free) {
478 PMD_TX_FREE_LOG(DEBUG,
479 "Not enough free TX descriptors "
480 "nb_used=%4u nb_free=%4u "
481 "(port=%d queue=%d)",
482 nb_used, txq->nb_tx_free,
483 txq->port_id, txq->queue_id);
485 if (ixgbe_xmit_cleanup(txq) != 0) {
486 /* Could not clean any descriptors */
492 /* nb_used better be <= txq->tx_rs_thresh */
493 if (unlikely(nb_used > txq->tx_rs_thresh)) {
494 PMD_TX_FREE_LOG(DEBUG,
495 "The number of descriptors needed to "
496 "transmit the packet exceeds the "
497 "RS bit threshold. This will impact "
499 "nb_used=%4u nb_free=%4u "
501 "(port=%d queue=%d)",
502 nb_used, txq->nb_tx_free,
504 txq->port_id, txq->queue_id);
506 * Loop here until there are enough TX
507 * descriptors or until the ring cannot be
510 while (nb_used > txq->nb_tx_free) {
511 if (ixgbe_xmit_cleanup(txq) != 0) {
513 * Could not clean any
525 * By now there are enough free TX descriptors to transmit
530 * Set common flags of all TX Data Descriptors.
532 * The following bits must be set in all Data Descriptors:
533 * - IXGBE_ADVTXD_DTYP_DATA
534 * - IXGBE_ADVTXD_DCMD_DEXT
536 * The following bits must be set in the first Data Descriptor
537 * and are ignored in the other ones:
538 * - IXGBE_ADVTXD_DCMD_IFCS
539 * - IXGBE_ADVTXD_MAC_1588
540 * - IXGBE_ADVTXD_DCMD_VLE
542 * The following bits must only be set in the last Data
544 * - IXGBE_TXD_CMD_EOP
546 * The following bits can be set in any Data Descriptor, but
547 * are only set in the last Data Descriptor:
550 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
551 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
552 olinfo_status = (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
553 #ifdef RTE_LIBRTE_IEEE1588
554 if (ol_flags & PKT_TX_IEEE1588_TMST)
555 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
560 * Setup the TX Advanced Context Descriptor if required
563 volatile struct ixgbe_adv_tx_context_desc *
566 ctx_txd = (volatile struct
567 ixgbe_adv_tx_context_desc *)
570 txn = &sw_ring[txe->next_id];
571 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
573 if (txe->mbuf != NULL) {
574 rte_pktmbuf_free_seg(txe->mbuf);
578 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
581 txe->last_id = tx_last;
582 tx_id = txe->next_id;
587 * Setup the TX Advanced Data Descriptor,
588 * This path will go through
589 * whatever new/reuse the context descriptor
591 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
592 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
593 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
599 txn = &sw_ring[txe->next_id];
601 if (txe->mbuf != NULL)
602 rte_pktmbuf_free_seg(txe->mbuf);
606 * Set up Transmit Data Descriptor.
608 slen = m_seg->pkt.data_len;
609 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
610 txd->read.buffer_addr =
611 rte_cpu_to_le_64(buf_dma_addr);
612 txd->read.cmd_type_len =
613 rte_cpu_to_le_32(cmd_type_len | slen);
614 txd->read.olinfo_status =
615 rte_cpu_to_le_32(olinfo_status);
616 txe->last_id = tx_last;
617 tx_id = txe->next_id;
619 m_seg = m_seg->pkt.next;
620 } while (m_seg != NULL);
623 * The last packet data descriptor needs End Of Packet (EOP)
625 cmd_type_len |= IXGBE_TXD_CMD_EOP;
626 txq->nb_tx_used += nb_used;
627 txq->nb_tx_free -= nb_used;
629 /* Set RS bit only on threshold packets' last descriptor */
630 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
631 PMD_TX_FREE_LOG(DEBUG,
632 "Setting RS bit on TXD id="
633 "%4u (port=%d queue=%d)",
634 tx_last, txq->port_id, txq->queue_id);
636 cmd_type_len |= IXGBE_TXD_CMD_RS;
638 /* Update txq RS bit counters */
641 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
647 * Set the Transmit Descriptor Tail (TDT)
649 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
650 (unsigned) txq->port_id, (unsigned) txq->queue_id,
651 (unsigned) tx_id, (unsigned) nb_tx);
652 IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
653 txq->tx_tail = tx_id;
658 /*********************************************************************
662 **********************************************************************/
663 static inline uint16_t
664 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
668 static uint16_t ip_pkt_types_map[16] = {
669 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
670 PKT_RX_IPV6_HDR, 0, 0, 0,
671 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
672 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
675 static uint16_t ip_rss_types_map[16] = {
676 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
677 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
678 PKT_RX_RSS_HASH, 0, 0, 0,
679 0, 0, 0, PKT_RX_FDIR,
682 #ifdef RTE_LIBRTE_IEEE1588
683 static uint32_t ip_pkt_etqf_map[8] = {
684 0, 0, 0, PKT_RX_IEEE1588_PTP,
688 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ?
689 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
690 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
692 pkt_flags = (uint16_t) ((hl_tp_rs & IXGBE_RXDADV_PKTTYPE_ETQF) ? 0 :
693 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
696 return (pkt_flags | ip_rss_types_map[hl_tp_rs & 0xF]);
699 static inline uint16_t
700 rx_desc_status_to_pkt_flags(uint32_t rx_status)
705 * Check if VLAN present only.
706 * Do not check whether L3/L4 rx checksum done by NIC or not,
707 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
709 pkt_flags = (uint16_t) (rx_status & IXGBE_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
711 #ifdef RTE_LIBRTE_IEEE1588
712 if (rx_status & IXGBE_RXD_STAT_TMST)
713 pkt_flags = (pkt_flags | PKT_RX_IEEE1588_TMST);
718 static inline uint16_t
719 rx_desc_error_to_pkt_flags(uint32_t rx_status)
722 * Bit 31: IPE, IPv4 checksum error
723 * Bit 30: L4I, L4I integrity error
725 static uint16_t error_to_pkt_flags_map[4] = {
726 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
727 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
729 return error_to_pkt_flags_map[(rx_status >>
730 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
734 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
737 struct igb_rx_queue *rxq;
738 volatile union ixgbe_adv_rx_desc *rx_ring;
739 volatile union ixgbe_adv_rx_desc *rxdp;
740 struct igb_rx_entry *sw_ring;
741 struct igb_rx_entry *rxe;
742 struct rte_mbuf *rxm;
743 struct rte_mbuf *nmb;
744 union ixgbe_adv_rx_desc rxd;
747 uint32_t hlen_type_rss;
757 rx_id = rxq->rx_tail;
758 rx_ring = rxq->rx_ring;
759 sw_ring = rxq->sw_ring;
760 while (nb_rx < nb_pkts) {
762 * The order of operations here is important as the DD status
763 * bit must not be read after any other descriptor fields.
764 * rx_ring and rxdp are pointing to volatile data so the order
765 * of accesses cannot be reordered by the compiler. If they were
766 * not volatile, they could be reordered which could lead to
767 * using invalid descriptor fields when read from rxd.
769 rxdp = &rx_ring[rx_id];
770 staterr = rxdp->wb.upper.status_error;
771 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
778 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
779 * is likely to be invalid and to be dropped by the various
780 * validation checks performed by the network stack.
782 * Allocate a new mbuf to replenish the RX ring descriptor.
783 * If the allocation fails:
784 * - arrange for that RX descriptor to be the first one
785 * being parsed the next time the receive function is
786 * invoked [on the same queue].
788 * - Stop parsing the RX ring and return immediately.
790 * This policy do not drop the packet received in the RX
791 * descriptor for which the allocation of a new mbuf failed.
792 * Thus, it allows that packet to be later retrieved if
793 * mbuf have been freed in the mean time.
794 * As a side effect, holding RX descriptors instead of
795 * systematically giving them back to the NIC may lead to
796 * RX ring exhaustion situations.
797 * However, the NIC can gracefully prevent such situations
798 * to happen by sending specific "back-pressure" flow control
799 * frames to its peer(s).
801 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
802 "ext_err_stat=0x%08x pkt_len=%u\n",
803 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
804 (unsigned) rx_id, (unsigned) staterr,
805 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
807 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
809 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
810 "queue_id=%u\n", (unsigned) rxq->port_id,
811 (unsigned) rxq->queue_id);
812 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
817 rxe = &sw_ring[rx_id];
819 if (rx_id == rxq->nb_rx_desc)
822 /* Prefetch next mbuf while processing current one. */
823 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
826 * When next RX descriptor is on a cache-line boundary,
827 * prefetch the next 4 RX descriptors and the next 8 pointers
830 if ((rx_id & 0x3) == 0) {
831 rte_ixgbe_prefetch(&rx_ring[rx_id]);
832 rte_ixgbe_prefetch(&sw_ring[rx_id]);
838 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
839 rxdp->read.hdr_addr = dma_addr;
840 rxdp->read.pkt_addr = dma_addr;
843 * Initialize the returned mbuf.
844 * 1) setup generic mbuf fields:
845 * - number of segments,
848 * - RX port identifier.
849 * 2) integrate hardware offload data, if any:
851 * - IP checksum flag,
852 * - VLAN TCI, if any,
855 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
857 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
858 rte_packet_prefetch(rxm->pkt.data);
859 rxm->pkt.nb_segs = 1;
860 rxm->pkt.next = NULL;
861 rxm->pkt.pkt_len = pkt_len;
862 rxm->pkt.data_len = pkt_len;
863 rxm->pkt.in_port = rxq->port_id;
865 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
866 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
867 rxm->pkt.vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
869 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
870 pkt_flags = (pkt_flags | rx_desc_status_to_pkt_flags(staterr));
871 pkt_flags = (pkt_flags | rx_desc_error_to_pkt_flags(staterr));
872 rxm->ol_flags = pkt_flags;
874 if (likely(pkt_flags & PKT_RX_RSS_HASH))
875 rxm->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
876 else if (pkt_flags & PKT_RX_FDIR) {
877 rxm->pkt.hash.fdir.hash =
878 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
879 & IXGBE_ATR_HASH_MASK);
880 rxm->pkt.hash.fdir.id = rxd.wb.lower.hi_dword.csum_ip.ip_id;
883 * Store the mbuf address into the next entry of the array
884 * of returned packets.
886 rx_pkts[nb_rx++] = rxm;
888 rxq->rx_tail = rx_id;
891 * If the number of free RX descriptors is greater than the RX free
892 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
894 * Update the RDT with the value of the last processed RX descriptor
895 * minus 1, to guarantee that the RDT register is never equal to the
896 * RDH register, which creates a "full" ring situtation from the
897 * hardware point of view...
899 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
900 if (nb_hold > rxq->rx_free_thresh) {
901 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
902 "nb_hold=%u nb_rx=%u\n",
903 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
904 (unsigned) rx_id, (unsigned) nb_hold,
906 rx_id = (uint16_t) ((rx_id == 0) ?
907 (rxq->nb_rx_desc - 1) : (rx_id - 1));
908 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
911 rxq->nb_rx_hold = nb_hold;
916 ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
919 struct igb_rx_queue *rxq;
920 volatile union ixgbe_adv_rx_desc *rx_ring;
921 volatile union ixgbe_adv_rx_desc *rxdp;
922 struct igb_rx_entry *sw_ring;
923 struct igb_rx_entry *rxe;
924 struct rte_mbuf *first_seg;
925 struct rte_mbuf *last_seg;
926 struct rte_mbuf *rxm;
927 struct rte_mbuf *nmb;
928 union ixgbe_adv_rx_desc rxd;
929 uint64_t dma; /* Physical address of mbuf data buffer */
931 uint32_t hlen_type_rss;
941 rx_id = rxq->rx_tail;
942 rx_ring = rxq->rx_ring;
943 sw_ring = rxq->sw_ring;
946 * Retrieve RX context of current packet, if any.
948 first_seg = rxq->pkt_first_seg;
949 last_seg = rxq->pkt_last_seg;
951 while (nb_rx < nb_pkts) {
954 * The order of operations here is important as the DD status
955 * bit must not be read after any other descriptor fields.
956 * rx_ring and rxdp are pointing to volatile data so the order
957 * of accesses cannot be reordered by the compiler. If they were
958 * not volatile, they could be reordered which could lead to
959 * using invalid descriptor fields when read from rxd.
961 rxdp = &rx_ring[rx_id];
962 staterr = rxdp->wb.upper.status_error;
963 if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
970 * Allocate a new mbuf to replenish the RX ring descriptor.
971 * If the allocation fails:
972 * - arrange for that RX descriptor to be the first one
973 * being parsed the next time the receive function is
974 * invoked [on the same queue].
976 * - Stop parsing the RX ring and return immediately.
978 * This policy does not drop the packet received in the RX
979 * descriptor for which the allocation of a new mbuf failed.
980 * Thus, it allows that packet to be later retrieved if
981 * mbuf have been freed in the mean time.
982 * As a side effect, holding RX descriptors instead of
983 * systematically giving them back to the NIC may lead to
984 * RX ring exhaustion situations.
985 * However, the NIC can gracefully prevent such situations
986 * to happen by sending specific "back-pressure" flow control
987 * frames to its peer(s).
989 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
990 "staterr=0x%x data_len=%u\n",
991 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
992 (unsigned) rx_id, (unsigned) staterr,
993 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
995 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
997 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
998 "queue_id=%u\n", (unsigned) rxq->port_id,
999 (unsigned) rxq->queue_id);
1000 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1005 rxe = &sw_ring[rx_id];
1007 if (rx_id == rxq->nb_rx_desc)
1010 /* Prefetch next mbuf while processing current one. */
1011 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1014 * When next RX descriptor is on a cache-line boundary,
1015 * prefetch the next 4 RX descriptors and the next 8 pointers
1018 if ((rx_id & 0x3) == 0) {
1019 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1020 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1024 * Update RX descriptor with the physical address of the new
1025 * data buffer of the new allocated mbuf.
1029 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
1030 rxdp->read.hdr_addr = dma;
1031 rxdp->read.pkt_addr = dma;
1034 * Set data length & data buffer address of mbuf.
1036 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1037 rxm->pkt.data_len = data_len;
1038 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
1041 * If this is the first buffer of the received packet,
1042 * set the pointer to the first mbuf of the packet and
1043 * initialize its context.
1044 * Otherwise, update the total length and the number of segments
1045 * of the current scattered packet, and update the pointer to
1046 * the last mbuf of the current packet.
1048 if (first_seg == NULL) {
1050 first_seg->pkt.pkt_len = data_len;
1051 first_seg->pkt.nb_segs = 1;
1053 first_seg->pkt.pkt_len = (uint16_t)(first_seg->pkt.pkt_len
1055 first_seg->pkt.nb_segs++;
1056 last_seg->pkt.next = rxm;
1060 * If this is not the last buffer of the received packet,
1061 * update the pointer to the last mbuf of the current scattered
1062 * packet and continue to parse the RX ring.
1064 if (! (staterr & IXGBE_RXDADV_STAT_EOP)) {
1070 * This is the last buffer of the received packet.
1071 * If the CRC is not stripped by the hardware:
1072 * - Subtract the CRC length from the total packet length.
1073 * - If the last buffer only contains the whole CRC or a part
1074 * of it, free the mbuf associated to the last buffer.
1075 * If part of the CRC is also contained in the previous
1076 * mbuf, subtract the length of that CRC part from the
1077 * data length of the previous mbuf.
1079 rxm->pkt.next = NULL;
1080 if (unlikely(rxq->crc_len > 0)) {
1081 first_seg->pkt.pkt_len -= ETHER_CRC_LEN;
1082 if (data_len <= ETHER_CRC_LEN) {
1083 rte_pktmbuf_free_seg(rxm);
1084 first_seg->pkt.nb_segs--;
1085 last_seg->pkt.data_len = (uint16_t)
1086 (last_seg->pkt.data_len -
1087 (ETHER_CRC_LEN - data_len));
1088 last_seg->pkt.next = NULL;
1091 (uint16_t) (data_len - ETHER_CRC_LEN);
1095 * Initialize the first mbuf of the returned packet:
1096 * - RX port identifier,
1097 * - hardware offload data, if any:
1098 * - RSS flag & hash,
1099 * - IP checksum flag,
1100 * - VLAN TCI, if any,
1103 first_seg->pkt.in_port = rxq->port_id;
1106 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1107 * set in the pkt_flags field.
1109 first_seg->pkt.vlan_tci =
1110 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1111 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1112 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
1113 pkt_flags = (pkt_flags |
1114 rx_desc_status_to_pkt_flags(staterr));
1115 pkt_flags = (pkt_flags |
1116 rx_desc_error_to_pkt_flags(staterr));
1117 first_seg->ol_flags = pkt_flags;
1119 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1120 first_seg->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
1121 else if (pkt_flags & PKT_RX_FDIR) {
1122 first_seg->pkt.hash.fdir.hash =
1123 (uint16_t)((rxd.wb.lower.hi_dword.csum_ip.csum)
1124 & IXGBE_ATR_HASH_MASK);
1125 first_seg->pkt.hash.fdir.id =
1126 rxd.wb.lower.hi_dword.csum_ip.ip_id;
1129 /* Prefetch data of first segment, if configured to do so. */
1130 rte_packet_prefetch(first_seg->pkt.data);
1133 * Store the mbuf address into the next entry of the array
1134 * of returned packets.
1136 rx_pkts[nb_rx++] = first_seg;
1139 * Setup receipt context for a new packet.
1145 * Record index of the next RX descriptor to probe.
1147 rxq->rx_tail = rx_id;
1150 * Save receive context.
1152 rxq->pkt_first_seg = first_seg;
1153 rxq->pkt_last_seg = last_seg;
1156 * If the number of free RX descriptors is greater than the RX free
1157 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1159 * Update the RDT with the value of the last processed RX descriptor
1160 * minus 1, to guarantee that the RDT register is never equal to the
1161 * RDH register, which creates a "full" ring situtation from the
1162 * hardware point of view...
1164 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1165 if (nb_hold > rxq->rx_free_thresh) {
1166 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1167 "nb_hold=%u nb_rx=%u\n",
1168 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1169 (unsigned) rx_id, (unsigned) nb_hold,
1171 rx_id = (uint16_t) ((rx_id == 0) ?
1172 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1173 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1176 rxq->nb_rx_hold = nb_hold;
1180 /*********************************************************************
1182 * Queue management functions
1184 **********************************************************************/
1187 * Rings setup and release.
1189 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1190 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
1191 * also optimize cache line size effect. H/W supports up to cache line size 128.
1193 #define IXGBE_ALIGN 128
1196 * Maximum number of Ring Descriptors.
1198 * Since RDLEN/TDLEN should be multiple of 128 bytes, the number of ring
1199 * descriptors should meet the following condition:
1200 * (num_ring_desc * sizeof(rx/tx descriptor)) % 128 == 0
1202 #define IXGBE_MIN_RING_DESC 64
1203 #define IXGBE_MAX_RING_DESC 4096
1206 * Create memzone for HW rings. malloc can't be used as the physical address is
1207 * needed. If the memzone is already created, then this function returns a ptr
1210 static const struct rte_memzone *
1211 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1212 uint16_t queue_id, uint32_t ring_size, int socket_id)
1214 char z_name[RTE_MEMZONE_NAMESIZE];
1215 const struct rte_memzone *mz;
1217 rte_snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1218 dev->driver->pci_drv.name, ring_name,
1219 dev->data->port_id, queue_id);
1221 mz = rte_memzone_lookup(z_name);
1225 return rte_memzone_reserve_aligned(z_name, (uint64_t) ring_size,
1226 socket_id, 0, IXGBE_ALIGN);
1230 ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1234 if (txq->sw_ring != NULL) {
1235 for (i = 0; i < txq->nb_tx_desc; i++) {
1236 if (txq->sw_ring[i].mbuf != NULL) {
1237 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1238 txq->sw_ring[i].mbuf = NULL;
1245 ixgbe_tx_queue_release(struct igb_tx_queue *txq)
1248 ixgbe_tx_queue_release_mbufs(txq);
1249 rte_free(txq->sw_ring);
1255 ixgbe_dev_tx_queue_release(void *txq)
1257 ixgbe_tx_queue_release(txq);
1260 /* (Re)set dynamic igb_tx_queue fields to defaults */
1262 ixgbe_reset_tx_queue(struct igb_tx_queue *txq)
1264 struct igb_tx_entry *txe = txq->sw_ring;
1267 /* Zero out HW ring memory */
1268 for (i = 0; i < sizeof(union ixgbe_adv_tx_desc) * txq->nb_tx_desc; i++) {
1269 ((volatile char *)txq->tx_ring)[i] = 0;
1272 /* Initialize SW ring entries */
1273 prev = (uint16_t) (txq->nb_tx_desc - 1);
1274 for (i = 0; i < txq->nb_tx_desc; i++) {
1275 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
1276 txd->wb.status = IXGBE_TXD_STAT_DD;
1279 txe[prev].next_id = i;
1284 txq->nb_tx_used = 0;
1286 * Always allow 1 descriptor to be un-allocated to avoid
1287 * a H/W race condition
1289 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1290 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1292 memset((void*)&txq->ctx_cache, 0,
1293 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
1297 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
1300 unsigned int socket_id,
1301 const struct rte_eth_txconf *tx_conf)
1303 const struct rte_memzone *tz;
1304 struct igb_tx_queue *txq;
1305 struct ixgbe_hw *hw;
1306 uint16_t tx_rs_thresh, tx_free_thresh;
1308 PMD_INIT_FUNC_TRACE();
1309 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1312 * Validate number of transmit descriptors.
1313 * It must not exceed hardware maximum, and must be multiple
1316 if (((nb_desc * sizeof(union ixgbe_adv_tx_desc)) % IXGBE_ALIGN) != 0 ||
1317 (nb_desc > IXGBE_MAX_RING_DESC) ||
1318 (nb_desc < IXGBE_MIN_RING_DESC)) {
1323 * The following two parameters control the setting of the RS bit on
1324 * transmit descriptors.
1325 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
1326 * descriptors have been used.
1327 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
1328 * descriptors are used or if the number of descriptors required
1329 * to transmit a packet is greater than the number of free TX
1331 * The following constraints must be satisfied:
1332 * tx_rs_thresh must be greater than 0.
1333 * tx_rs_thresh must be less than the size of the ring minus 2.
1334 * tx_rs_thresh must be less than or equal to tx_free_thresh.
1335 * tx_free_thresh must be greater than 0.
1336 * tx_free_thresh must be less than the size of the ring minus 3.
1337 * One descriptor in the TX ring is used as a sentinel to avoid a
1338 * H/W race condition, hence the maximum threshold constraints.
1339 * When set to zero use default values.
1341 tx_rs_thresh = (tx_conf->tx_rs_thresh) ?
1342 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH;
1343 tx_free_thresh = (tx_conf->tx_free_thresh) ?
1344 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH;
1345 if (tx_rs_thresh >= (nb_desc - 2)) {
1347 "tx_rs_thresh must be less than the "
1348 "number of TX descriptors minus 2. "
1349 "(tx_rs_thresh=%u port=%d queue=%d)",
1350 tx_rs_thresh, dev->data->port_id, queue_idx);
1353 if (tx_free_thresh >= (nb_desc - 3)) {
1355 "tx_rs_thresh must be less than the "
1356 "tx_free_thresh must be less than the "
1357 "number of TX descriptors minus 3. "
1358 "(tx_free_thresh=%u port=%d queue=%d)",
1359 tx_free_thresh, dev->data->port_id, queue_idx);
1362 if (tx_rs_thresh > tx_free_thresh) {
1364 "tx_rs_thresh must be less than or equal to "
1366 "(tx_free_thresh=%u tx_rs_thresh=%u "
1367 "port=%d queue=%d)",
1368 tx_free_thresh, tx_rs_thresh,
1369 dev->data->port_id, queue_idx);
1374 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
1375 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
1376 * by the NIC and all descriptors are written back after the NIC
1377 * accumulates WTHRESH descriptors.
1379 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
1381 "TX WTHRESH should be set to 0 if "
1382 "tx_rs_thresh is greater than 1. "
1383 "TX WTHRESH will be set to 0. "
1384 "(tx_rs_thresh=%u port=%d queue=%d)",
1386 dev->data->port_id, queue_idx);
1390 /* Free memory prior to re-allocation if needed... */
1391 if (dev->data->tx_queues[queue_idx] != NULL)
1392 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
1394 /* First allocate the tx queue data structure */
1395 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1401 * Allocate TX ring hardware descriptors. A memzone large enough to
1402 * handle the maximum ring size is allocated in order to allow for
1403 * resizing in later calls to the queue setup function.
1405 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1406 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
1409 ixgbe_tx_queue_release(txq);
1413 txq->nb_tx_desc = nb_desc;
1414 txq->tx_rs_thresh = tx_rs_thresh;
1415 txq->tx_free_thresh = tx_free_thresh;
1416 txq->pthresh = tx_conf->tx_thresh.pthresh;
1417 txq->hthresh = tx_conf->tx_thresh.hthresh;
1418 txq->wthresh = tx_conf->tx_thresh.wthresh;
1419 txq->queue_id = queue_idx;
1420 txq->port_id = dev->data->port_id;
1423 * Modification to set VFTDT for virtual function if vf is detected
1425 if (hw->mac.type == ixgbe_mac_82599_vf)
1426 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
1428 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(queue_idx));
1430 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1431 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
1433 /* Allocate software ring */
1434 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1435 sizeof(struct igb_tx_entry) * nb_desc,
1437 if (txq->sw_ring == NULL) {
1438 ixgbe_tx_queue_release(txq);
1441 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1442 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1444 ixgbe_reset_tx_queue(txq);
1446 dev->data->tx_queues[queue_idx] = txq;
1448 dev->tx_pkt_burst = ixgbe_xmit_pkts;
1454 ixgbe_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1458 if (rxq->sw_ring != NULL) {
1459 for (i = 0; i < rxq->nb_rx_desc; i++) {
1460 if (rxq->sw_ring[i].mbuf != NULL) {
1461 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1462 rxq->sw_ring[i].mbuf = NULL;
1469 ixgbe_rx_queue_release(struct igb_rx_queue *rxq)
1471 ixgbe_rx_queue_release_mbufs(rxq);
1472 rte_free(rxq->sw_ring);
1477 ixgbe_dev_rx_queue_release(void *rxq)
1479 ixgbe_rx_queue_release(rxq);
1483 if (dev->data->rx_queues == NULL) {
1484 dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues",
1485 sizeof(struct igb_rx_queue *) * nb_queues,
1487 if (dev->data->rx_queues == NULL) {
1488 dev->data->nb_rx_queues = 0;
1493 for (i = nb_queues; i < old_nb_queues; i++)
1494 ixgbe_rx_queue_release(dev->data->rx_queues[i]);
1495 rxq = rte_realloc(dev->data->rx_queues,
1496 sizeof(struct igb_rx_queue *) * nb_queues,
1501 dev->data->rx_queues = rxq;
1502 if (nb_queues > old_nb_queues)
1503 memset(&dev->data->rx_queues[old_nb_queues], 0,
1504 sizeof(struct igb_rx_queue *) *
1505 (nb_queues - old_nb_queues));
1507 dev->data->nb_rx_queues = nb_queues;
1511 /* (Re)set dynamic igb_rx_queue fields to defaults */
1513 ixgbe_reset_rx_queue(struct igb_rx_queue *rxq)
1517 /* Zero out HW ring memory */
1518 for (i = 0; i < rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc); i++) {
1519 ((volatile char *)rxq->rx_ring)[i] = 0;
1523 rxq->nb_rx_hold = 0;
1524 rxq->pkt_first_seg = NULL;
1525 rxq->pkt_last_seg = NULL;
1529 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
1532 unsigned int socket_id,
1533 const struct rte_eth_rxconf *rx_conf,
1534 struct rte_mempool *mp)
1536 const struct rte_memzone *rz;
1537 struct igb_rx_queue *rxq;
1538 struct ixgbe_hw *hw;
1540 PMD_INIT_FUNC_TRACE();
1541 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1544 * Validate number of receive descriptors.
1545 * It must not exceed hardware maximum, and must be multiple
1548 if (((nb_desc * sizeof(union ixgbe_adv_rx_desc)) % IXGBE_ALIGN) != 0 ||
1549 (nb_desc > IXGBE_MAX_RING_DESC) ||
1550 (nb_desc < IXGBE_MIN_RING_DESC)) {
1554 /* Free memory prior to re-allocation if needed... */
1555 if (dev->data->rx_queues[queue_idx] != NULL)
1556 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
1558 /* First allocate the rx queue data structure */
1559 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1564 rxq->nb_rx_desc = nb_desc;
1565 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1566 rxq->queue_id = queue_idx;
1567 rxq->port_id = dev->data->port_id;
1568 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1572 * Allocate TX ring hardware descriptors. A memzone large enough to
1573 * handle the maximum ring size is allocated in order to allow for
1574 * resizing in later calls to the queue setup function.
1576 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx,
1577 IXGBE_MAX_RING_DESC * sizeof(union ixgbe_adv_rx_desc),
1580 ixgbe_rx_queue_release(rxq);
1584 * Modified to setup VFRDT for Virtual Function
1586 if (hw->mac.type == ixgbe_mac_82599_vf)
1587 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
1589 rxq->rdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(queue_idx));
1591 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
1592 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
1594 /* Allocate software ring */
1595 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1596 sizeof(struct igb_rx_entry) * nb_desc,
1598 if (rxq->sw_ring == NULL) {
1599 ixgbe_rx_queue_release(rxq);
1602 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1603 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1605 dev->data->rx_queues[queue_idx] = rxq;
1607 ixgbe_reset_rx_queue(rxq);
1613 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
1617 PMD_INIT_FUNC_TRACE();
1619 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1620 struct igb_tx_queue *txq = dev->data->tx_queues[i];
1621 ixgbe_tx_queue_release_mbufs(txq);
1622 ixgbe_reset_tx_queue(txq);
1625 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1626 struct igb_rx_queue *rxq = dev->data->rx_queues[i];
1627 ixgbe_rx_queue_release_mbufs(rxq);
1628 ixgbe_reset_rx_queue(rxq);
1632 /*********************************************************************
1634 * Device RX/TX init functions
1636 **********************************************************************/
1639 * Receive Side Scaling (RSS)
1640 * See section 7.1.2.8 in the following document:
1641 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
1644 * The source and destination IP addresses of the IP header and the source
1645 * and destination ports of TCP/UDP headers, if any, of received packets are
1646 * hashed against a configurable random key to compute a 32-bit RSS hash result.
1647 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1648 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1649 * RSS output index which is used as the RX queue index where to store the
1651 * The following output is supplied in the RX write-back descriptor:
1652 * - 32-bit result of the Microsoft RSS hash function,
1653 * - 4-bit RSS type field.
1657 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
1658 * Used as the default key.
1660 static uint8_t rss_intel_key[40] = {
1661 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1662 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1663 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1664 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1665 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1669 ixgbe_rss_disable(struct rte_eth_dev *dev)
1671 struct ixgbe_hw *hw;
1674 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1675 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
1676 mrqc &= ~IXGBE_MRQC_RSSEN;
1677 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
1681 ixgbe_rss_configure(struct rte_eth_dev *dev)
1683 struct ixgbe_hw *hw;
1692 PMD_INIT_FUNC_TRACE();
1693 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1695 rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1696 if (rss_hf == 0) { /* Disable RSS */
1697 ixgbe_rss_disable(dev);
1700 hash_key = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1701 if (hash_key == NULL)
1702 hash_key = rss_intel_key; /* Default hash key */
1704 /* Fill in RSS hash key */
1705 for (i = 0; i < 10; i++) {
1706 rss_key = hash_key[(i * 4)];
1707 rss_key |= hash_key[(i * 4) + 1] << 8;
1708 rss_key |= hash_key[(i * 4) + 2] << 16;
1709 rss_key |= hash_key[(i * 4) + 3] << 24;
1710 IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RSSRK(0), i, rss_key);
1713 /* Fill in redirection table */
1715 for (i = 0, j = 0; i < 128; i++, j++) {
1716 if (j == dev->data->nb_rx_queues) j = 0;
1717 reta = (reta << 8) | j;
1719 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), rte_bswap32(reta));
1722 /* Set configured hashing functions in MRQC register */
1723 mrqc = IXGBE_MRQC_RSSEN; /* RSS enable */
1724 if (rss_hf & ETH_RSS_IPV4)
1725 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
1726 if (rss_hf & ETH_RSS_IPV4_TCP)
1727 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
1728 if (rss_hf & ETH_RSS_IPV6)
1729 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
1730 if (rss_hf & ETH_RSS_IPV6_EX)
1731 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
1732 if (rss_hf & ETH_RSS_IPV6_TCP)
1733 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
1734 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1735 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
1736 if (rss_hf & ETH_RSS_IPV4_UDP)
1737 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
1738 if (rss_hf & ETH_RSS_IPV6_UDP)
1739 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
1740 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1741 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
1742 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
1745 #define NUM_VFTA_REGISTERS 128
1746 #define NIC_RX_BUFFER_SIZE 0x200
1749 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
1751 struct rte_eth_vmdq_dcb_conf *cfg;
1752 struct ixgbe_hw *hw;
1753 enum rte_eth_nb_pools num_pools;
1754 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
1756 uint8_t nb_tcs; /* number of traffic classes */
1759 PMD_INIT_FUNC_TRACE();
1760 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1761 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
1762 num_pools = cfg->nb_queue_pools;
1763 /* Check we have a valid number of pools */
1764 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
1765 ixgbe_rss_disable(dev);
1768 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
1769 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
1773 * split rx buffer up into sections, each for 1 traffic class
1775 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
1776 for (i = 0 ; i < nb_tcs; i++) {
1777 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
1778 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
1779 /* clear 10 bits. */
1780 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
1781 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
1783 /* zero alloc all unused TCs */
1784 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
1785 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
1786 rxpbsize &= (~( 0x3FF << IXGBE_RXPBSIZE_SHIFT ));
1787 /* clear 10 bits. */
1788 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
1791 /* MRQC: enable vmdq and dcb */
1792 mrqc = ((num_pools == ETH_16_POOLS) ? \
1793 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN );
1794 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
1796 /* PFVTCTL: turn on virtualisation and set the default pool */
1797 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
1798 if (cfg->enable_default_pool) {
1799 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
1801 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
1803 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
1805 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
1807 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
1809 * mapping is done with 3 bits per priority,
1810 * so shift by i*3 each time
1812 queue_mapping |= ((cfg->dcb_queue[i] & 0x07) << (i * 3));
1814 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
1816 /* RTRPCS: DCB related */
1817 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
1819 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
1820 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
1821 vlanctrl |= IXGBE_VLNCTRL_VFE ; /* enable vlan filters */
1822 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
1824 /* VFTA - enable all vlan filters */
1825 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
1826 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
1829 /* VFRE: pool enabling for receive - 16 or 32 */
1830 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), \
1831 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
1834 * MPSAR - allow pools to read specific mac addresses
1835 * In this case, all pools should be able to read from mac addr 0
1837 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
1838 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
1840 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
1841 for (i = 0; i < cfg->nb_pool_maps; i++) {
1842 /* set vlan id in VF register and set the valid bit */
1843 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN | \
1844 (cfg->pool_map[i].vlan_id & 0xFFF)));
1846 * Put the allowed pools in VFB reg. As we only have 16 or 32
1847 * pools, we only need to use the first half of the register
1850 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
1855 ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
1857 struct igb_rx_entry *rxe = rxq->sw_ring;
1861 /* Initialize software ring entries */
1862 for (i = 0; i < rxq->nb_rx_desc; i++) {
1863 volatile union ixgbe_adv_rx_desc *rxd;
1864 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
1866 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u\n",
1867 (unsigned) rxq->queue_id);
1871 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
1872 rxd = &rxq->rx_ring[i];
1873 rxd->read.hdr_addr = dma_addr;
1874 rxd->read.pkt_addr = dma_addr;
1882 * Initializes Receive Unit.
1885 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
1887 struct ixgbe_hw *hw;
1888 struct igb_rx_queue *rxq;
1889 struct rte_pktmbuf_pool_private *mbp_priv;
1902 PMD_INIT_FUNC_TRACE();
1903 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1906 * Make sure receives are disabled while setting
1907 * up the RX context (registers, descriptor rings, etc.).
1909 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1910 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
1912 /* Enable receipt of broadcasted frames */
1913 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
1914 fctrl |= IXGBE_FCTRL_BAM;
1915 fctrl |= IXGBE_FCTRL_DPF;
1916 fctrl |= IXGBE_FCTRL_PMCF;
1917 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
1920 * Configure CRC stripping, if any.
1922 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
1923 if (dev->data->dev_conf.rxmode.hw_strip_crc)
1924 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
1926 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
1929 * Configure jumbo frame support, if any.
1931 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
1932 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
1933 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
1934 maxfrs &= 0x0000FFFF;
1935 maxfrs |= (dev->data->dev_conf.rxmode.max_rx_pkt_len << 16);
1936 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
1938 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
1940 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
1942 /* Setup RX queues */
1943 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1944 rxq = dev->data->rx_queues[i];
1946 /* Allocate buffers for descriptor rings */
1947 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
1952 * Reset crc_len in case it was changed after queue setup by a
1953 * call to configure.
1955 rxq->crc_len = (uint8_t)
1956 ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1959 /* Setup the Base and Length of the Rx Descriptor Rings */
1960 bus_addr = rxq->rx_ring_phys_addr;
1961 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
1962 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
1963 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i),
1964 (uint32_t)(bus_addr >> 32));
1965 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
1966 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
1967 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
1968 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
1970 /* Configure the SRRCTL register */
1971 #ifdef RTE_HEADER_SPLIT_ENABLE
1973 * Configure Header Split
1975 if (dev->data->dev_conf.rxmode.header_split) {
1976 if (hw->mac.type == ixgbe_mac_82599EB) {
1977 /* Must setup the PSRTYPE register */
1979 psrtype = IXGBE_PSRTYPE_TCPHDR |
1980 IXGBE_PSRTYPE_UDPHDR |
1981 IXGBE_PSRTYPE_IPV4HDR |
1982 IXGBE_PSRTYPE_IPV6HDR;
1983 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(i), psrtype);
1985 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
1986 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
1987 IXGBE_SRRCTL_BSIZEHDR_MASK);
1988 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
1991 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
1994 * Configure the RX buffer size in the BSIZEPACKET field of
1995 * the SRRCTL register of the queue.
1996 * The value is in 1 KB resolution. Valid values can be from
1999 mbp_priv = (struct rte_pktmbuf_pool_private *)
2000 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
2001 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
2002 RTE_PKTMBUF_HEADROOM);
2003 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
2004 IXGBE_SRRCTL_BSIZEPKT_MASK);
2005 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2007 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
2008 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
2009 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
2010 dev->data->scattered_rx = 1;
2011 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
2016 * Configure RSS if device configured with multiple RX queues.
2018 if (hw->mac.type == ixgbe_mac_82599EB) {
2019 if (dev->data->nb_rx_queues > 1)
2020 switch (dev->data->dev_conf.rxmode.mq_mode) {
2022 ixgbe_rss_configure(dev);
2026 ixgbe_vmdq_dcb_configure(dev);
2029 default: ixgbe_rss_disable(dev);
2032 ixgbe_rss_disable(dev);
2036 * Setup the Checksum Register.
2037 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
2038 * Enable IP/L4 checkum computation by hardware if requested to do so.
2040 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2041 rxcsum |= IXGBE_RXCSUM_PCSD;
2042 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
2043 rxcsum |= IXGBE_RXCSUM_IPPCSE;
2045 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
2047 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2049 if (hw->mac.type == ixgbe_mac_82599EB) {
2050 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2051 if (dev->data->dev_conf.rxmode.hw_strip_crc)
2052 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2054 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
2055 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2056 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2063 * Initializes Transmit Unit.
2066 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
2068 struct ixgbe_hw *hw;
2069 struct igb_tx_queue *txq;
2076 PMD_INIT_FUNC_TRACE();
2077 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2079 /* Enable TX CRC (checksum offload requirement) */
2080 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2081 hlreg0 |= IXGBE_HLREG0_TXCRCEN;
2082 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
2084 /* Setup the Base and Length of the Tx Descriptor Rings */
2085 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2086 txq = dev->data->tx_queues[i];
2088 bus_addr = txq->tx_ring_phys_addr;
2089 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
2090 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
2091 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i),
2092 (uint32_t)(bus_addr >> 32));
2093 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2094 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
2095 /* Setup the HW Tx Head and TX Tail descriptor pointers */
2096 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2097 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2100 * Disable Tx Head Writeback RO bit, since this hoses
2101 * bookkeeping if things aren't delivered in order.
2103 switch (hw->mac.type) {
2104 case ixgbe_mac_82598EB:
2105 txctrl = IXGBE_READ_REG(hw,
2106 IXGBE_DCA_TXCTRL(i));
2107 txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN;
2108 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i),
2112 case ixgbe_mac_82599EB:
2113 case ixgbe_mac_X540:
2115 txctrl = IXGBE_READ_REG(hw,
2116 IXGBE_DCA_TXCTRL_82599(i));
2117 txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN;
2118 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i),
2124 if (hw->mac.type != ixgbe_mac_82598EB) {
2125 /* disable arbiter before setting MTQC */
2126 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2127 rttdcs |= IXGBE_RTTDCS_ARBDIS;
2128 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2130 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2132 /* re-enable arbiter */
2133 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2134 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2139 * Start Transmit and Receive Units.
2142 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
2144 struct ixgbe_hw *hw;
2145 struct igb_tx_queue *txq;
2146 struct igb_rx_queue *rxq;
2154 PMD_INIT_FUNC_TRACE();
2155 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2157 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2158 txq = dev->data->tx_queues[i];
2159 /* Setup Transmit Threshold Registers */
2160 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
2161 txdctl |= txq->pthresh & 0x7F;
2162 txdctl |= ((txq->hthresh & 0x7F) << 8);
2163 txdctl |= ((txq->wthresh & 0x7F) << 16);
2164 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
2167 if (hw->mac.type != ixgbe_mac_82598EB) {
2168 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2169 dmatxctl |= IXGBE_DMATXCTL_TE;
2170 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2173 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2174 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
2175 txdctl |= IXGBE_TXDCTL_ENABLE;
2176 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
2178 /* Wait until TX Enable ready */
2179 if (hw->mac.type == ixgbe_mac_82599EB) {
2183 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
2184 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
2186 PMD_INIT_LOG(ERR, "Could not enable "
2187 "Tx Queue %d\n", i);
2190 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2191 rxq = dev->data->rx_queues[i];
2192 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2193 rxdctl |= IXGBE_RXDCTL_ENABLE;
2194 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
2196 /* Wait until RX Enable ready */
2200 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2201 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
2203 PMD_INIT_LOG(ERR, "Could not enable "
2204 "Rx Queue %d\n", i);
2206 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), rxq->nb_rx_desc - 1);
2209 /* Enable Receive engine */
2210 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
2211 if (hw->mac.type == ixgbe_mac_82598EB)
2212 rxctrl |= IXGBE_RXCTRL_DMBYPS;
2213 rxctrl |= IXGBE_RXCTRL_RXEN;
2214 hw->mac.ops.enable_rx_dma(hw, rxctrl);
2219 * [VF] Initializes Receive Unit.
2222 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
2224 struct ixgbe_hw *hw;
2225 struct igb_rx_queue *rxq;
2226 struct rte_pktmbuf_pool_private *mbp_priv;
2233 PMD_INIT_FUNC_TRACE();
2234 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2236 /* Setup RX queues */
2237 dev->rx_pkt_burst = ixgbe_recv_pkts;
2238 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2239 rxq = dev->data->rx_queues[i];
2241 /* Allocate buffers for descriptor rings */
2242 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
2246 /* Setup the Base and Length of the Rx Descriptor Rings */
2247 bus_addr = rxq->rx_ring_phys_addr;
2249 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
2250 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
2251 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
2252 (uint32_t)(bus_addr >> 32));
2253 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
2254 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
2255 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
2256 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
2259 /* Configure the SRRCTL register */
2260 #ifdef RTE_HEADER_SPLIT_ENABLE
2262 * Configure Header Split
2264 if (dev->data->dev_conf.rxmode.header_split) {
2266 /* Must setup the PSRTYPE register */
2268 psrtype = IXGBE_PSRTYPE_TCPHDR |
2269 IXGBE_PSRTYPE_UDPHDR |
2270 IXGBE_PSRTYPE_IPV4HDR |
2271 IXGBE_PSRTYPE_IPV6HDR;
2273 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE(i), psrtype);
2275 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
2276 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
2277 IXGBE_SRRCTL_BSIZEHDR_MASK);
2278 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2281 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2284 * Configure the RX buffer size in the BSIZEPACKET field of
2285 * the SRRCTL register of the queue.
2286 * The value is in 1 KB resolution. Valid values can be from
2289 mbp_priv = (struct rte_pktmbuf_pool_private *)
2290 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
2291 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
2292 RTE_PKTMBUF_HEADROOM);
2293 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
2294 IXGBE_SRRCTL_BSIZEPKT_MASK);
2297 * VF modification to write virtual function SRRCTL register
2299 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
2301 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
2302 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
2303 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
2304 dev->data->scattered_rx = 1;
2305 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts;
2312 * [VF] Initializes Transmit Unit.
2315 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
2317 struct ixgbe_hw *hw;
2318 struct igb_tx_queue *txq;
2323 PMD_INIT_FUNC_TRACE();
2324 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2326 /* Setup the Base and Length of the Tx Descriptor Rings */
2327 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2328 txq = dev->data->tx_queues[i];
2329 bus_addr = txq->tx_ring_phys_addr;
2330 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
2331 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
2332 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
2333 (uint32_t)(bus_addr >> 32));
2334 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
2335 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
2336 /* Setup the HW Tx Head and TX Tail descriptor pointers */
2337 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
2338 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
2341 * Disable Tx Head Writeback RO bit, since this hoses
2342 * bookkeeping if things aren't delivered in order.
2344 txctrl = IXGBE_READ_REG(hw,
2345 IXGBE_VFDCA_TXCTRL(i));
2346 txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN;
2347 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
2353 * [VF] Start Transmit and Receive Units.
2356 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
2358 struct ixgbe_hw *hw;
2359 struct igb_tx_queue *txq;
2360 struct igb_rx_queue *rxq;
2366 PMD_INIT_FUNC_TRACE();
2367 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2369 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2370 txq = dev->data->tx_queues[i];
2371 /* Setup Transmit Threshold Registers */
2372 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
2373 txdctl |= txq->pthresh & 0x7F;
2374 txdctl |= ((txq->hthresh & 0x7F) << 8);
2375 txdctl |= ((txq->wthresh & 0x7F) << 16);
2376 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
2379 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2381 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
2382 txdctl |= IXGBE_TXDCTL_ENABLE;
2383 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
2386 /* Wait until TX Enable ready */
2389 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
2390 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
2392 PMD_INIT_LOG(ERR, "Could not enable "
2393 "Tx Queue %d\n", i);
2395 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2397 rxq = dev->data->rx_queues[i];
2399 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
2400 rxdctl |= IXGBE_RXDCTL_ENABLE;
2401 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
2403 /* Wait until RX Enable ready */
2407 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
2408 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
2410 PMD_INIT_LOG(ERR, "Could not enable "
2411 "Rx Queue %d\n", i);
2413 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);