4 * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_interrupts.h>
47 #include <rte_byteorder.h>
48 #include <rte_common.h>
50 #include <rte_debug.h>
52 #include <rte_memory.h>
53 #include <rte_memcpy.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_tailq.h>
58 #include <rte_per_lcore.h>
59 #include <rte_lcore.h>
60 #include <rte_atomic.h>
61 #include <rte_branch_prediction.h>
63 #include <rte_mempool.h>
64 #include <rte_malloc.h>
66 #include <rte_ether.h>
67 #include <rte_ethdev.h>
68 #include <rte_prefetch.h>
72 #include <rte_string_fns.h>
74 #include "e1000_logs.h"
75 #include "igb/e1000_api.h"
76 #include "e1000_ethdev.h"
78 static inline struct rte_mbuf *
79 rte_rxmbuf_alloc(struct rte_mempool *mp)
83 m = __rte_mbuf_raw_alloc(mp);
84 __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
88 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
89 (uint64_t) ((mb)->buf_physaddr + \
90 (uint64_t) ((char *)((mb)->pkt.data) - \
91 (char *)(mb)->buf_addr))
93 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
94 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
97 * Structure associated with each descriptor of the RX ring of a RX queue.
100 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
104 * Structure associated with each descriptor of the TX ring of a TX queue.
106 struct igb_tx_entry {
107 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
108 uint16_t next_id; /**< Index of next descriptor in ring. */
109 uint16_t last_id; /**< Index of last scattered descriptor. */
113 * Structure associated with each RX queue.
115 struct igb_rx_queue {
116 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
117 volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
118 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
119 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
120 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
121 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
122 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
123 uint16_t nb_rx_desc; /**< number of RX descriptors. */
124 uint16_t rx_tail; /**< current value of RDT register. */
125 uint16_t nb_rx_hold; /**< number of held free RX desc. */
126 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
127 uint16_t queue_id; /**< RX queue index. */
128 uint8_t port_id; /**< Device port identifier. */
129 uint8_t pthresh; /**< Prefetch threshold register. */
130 uint8_t hthresh; /**< Host threshold register. */
131 uint8_t wthresh; /**< Write-back threshold register. */
132 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
133 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
137 * Hardware context number
139 enum igb_advctx_num {
140 IGB_CTX_0 = 0, /**< CTX0 */
141 IGB_CTX_1 = 1, /**< CTX1 */
142 IGB_CTX_NUM = 2, /**< CTX NUM */
146 * Strucutre to check if new context need be built
148 struct igb_advctx_info {
149 uint16_t flags; /**< ol_flags related to context build. */
150 uint32_t cmp_mask; /**< compare mask for vlan_macip_lens */
151 uint32_t vlan_macip_lens; /**< vlan, mac.ip length. */
155 * Structure associated with each TX queue.
157 struct igb_tx_queue {
158 volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
159 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
160 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
161 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
162 uint32_t txd_type; /**< Device-specific TXD type */
163 uint16_t nb_tx_desc; /**< number of TX descriptors. */
164 uint16_t tx_tail; /**< Current value of TDT register. */
165 uint16_t tx_head; /**< Index of first used TX descriptor. */
166 uint16_t queue_id; /**< TX queue index. */
167 uint8_t port_id; /**< Device port identifier. */
168 uint8_t pthresh; /**< Prefetch threshold register. */
169 uint8_t hthresh; /**< Host threshold register. */
170 uint8_t wthresh; /**< Write-back threshold register. */
171 uint32_t ctx_curr; /**< Current used hardware descriptor. */
172 uint32_t ctx_start;/**< Start context position for transmit queue. */
173 struct igb_advctx_info ctx_cache[IGB_CTX_NUM]; /**< Hardware context history.*/
177 #define RTE_PMD_USE_PREFETCH
180 #ifdef RTE_PMD_USE_PREFETCH
181 #define rte_igb_prefetch(p) rte_prefetch0(p)
183 #define rte_igb_prefetch(p) do {} while(0)
186 #ifdef RTE_PMD_PACKET_PREFETCH
187 #define rte_packet_prefetch(p) rte_prefetch1(p)
189 #define rte_packet_prefetch(p) do {} while(0)
192 /*********************************************************************
196 **********************************************************************/
199 * Advanced context descriptor are almost same between igb/ixgbe
200 * This is a separate function, looking for optimization opportunity here
201 * Rework required to go with the pre-defined values.
205 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
206 volatile struct e1000_adv_tx_context_desc *ctx_txd,
207 uint16_t ol_flags, uint32_t vlan_macip_lens)
209 uint32_t type_tucmd_mlhl;
210 uint32_t mss_l4len_idx;
211 uint32_t ctx_idx, ctx_curr;
214 ctx_curr = txq->ctx_curr;
215 ctx_idx = ctx_curr + txq->ctx_start;
220 if (ol_flags & PKT_TX_VLAN_PKT) {
221 cmp_mask |= TX_VLAN_CMP_MASK;
224 if (ol_flags & PKT_TX_IP_CKSUM) {
225 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
226 cmp_mask |= TX_MAC_LEN_CMP_MASK;
229 /* Specify which HW CTX to upload. */
230 mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
231 switch (ol_flags & PKT_TX_L4_MASK) {
232 case PKT_TX_UDP_CKSUM:
233 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
234 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
235 mss_l4len_idx |= sizeof(struct udp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
236 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
238 case PKT_TX_TCP_CKSUM:
239 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
240 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
241 mss_l4len_idx |= sizeof(struct tcp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
242 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
244 case PKT_TX_SCTP_CKSUM:
245 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
246 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
247 mss_l4len_idx |= sizeof(struct sctp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
248 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
251 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
252 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
256 txq->ctx_cache[ctx_curr].flags = ol_flags;
257 txq->ctx_cache[ctx_curr].cmp_mask = cmp_mask;
258 txq->ctx_cache[ctx_curr].vlan_macip_lens = vlan_macip_lens & cmp_mask;
260 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
261 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
262 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
263 ctx_txd->seqnum_seed = 0;
267 * Check which hardware context can be used. Use the existing match
268 * or create a new context descriptor.
270 static inline uint32_t
271 what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
272 uint32_t vlan_macip_lens)
274 /* If match with the current context */
275 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
276 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens ==
277 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
278 return txq->ctx_curr;
281 /* If match with the second context */
283 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
284 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens ==
285 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
286 return txq->ctx_curr;
289 /* Mismatch, use the previous context */
290 return (IGB_CTX_NUM);
293 static inline uint32_t
294 tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
296 static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
297 static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
300 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
301 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
305 static inline uint32_t
306 tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
308 static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
309 return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
313 eth_igb_xmit_pkts(struct igb_tx_queue *txq, struct rte_mbuf **tx_pkts,
316 struct igb_tx_entry *sw_ring;
317 struct igb_tx_entry *txe, *txn;
318 volatile union e1000_adv_tx_desc *txr;
319 volatile union e1000_adv_tx_desc *txd;
320 struct rte_mbuf *tx_pkt;
321 struct rte_mbuf *m_seg;
322 uint64_t buf_dma_addr;
323 uint32_t olinfo_status;
324 uint32_t cmd_type_len;
335 uint32_t vlan_macip_lens;
337 sw_ring = txq->sw_ring;
339 tx_id = txq->tx_tail;
340 txe = &sw_ring[tx_id];
342 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
344 pkt_len = tx_pkt->pkt.pkt_len;
346 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
349 * The number of descriptors that must be allocated for a
350 * packet is the number of segments of that packet, plus 1
351 * Context Descriptor for the VLAN Tag Identifier, if any.
352 * Determine the last TX descriptor to allocate in the TX ring
353 * for the packet, starting from the current position (tx_id)
356 tx_last = (uint16_t) (tx_id + tx_pkt->pkt.nb_segs - 1);
358 ol_flags = tx_pkt->ol_flags;
359 vlan_macip_lens = (tx_pkt->pkt.vlan_tci << 16) | (tx_pkt->pkt.l2_len << E1000_ADVTXD_MACLEN_SHIFT) | tx_pkt->pkt.l3_len;
360 tx_ol_req = (ol_flags & PKT_TX_OFFLOAD_MASK);
362 /* If a Context Descriptor need be built . */
364 ctx = what_advctx_update(txq, tx_ol_req,vlan_macip_lens);
365 /* Only allocate context descriptor if required*/
366 new_ctx = (ctx == IGB_CTX_NUM);
368 tx_last = (uint16_t) (tx_last + new_ctx);
370 if (tx_last >= txq->nb_tx_desc)
371 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
373 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
374 " tx_first=%u tx_last=%u\n",
375 (unsigned) txq->port_id,
376 (unsigned) txq->queue_id,
382 * Check if there are enough free descriptors in the TX ring
383 * to transmit the next packet.
384 * This operation is based on the two following rules:
386 * 1- Only check that the last needed TX descriptor can be
387 * allocated (by construction, if that descriptor is free,
388 * all intermediate ones are also free).
390 * For this purpose, the index of the last TX descriptor
391 * used for a packet (the "last descriptor" of a packet)
392 * is recorded in the TX entries (the last one included)
393 * that are associated with all TX descriptors allocated
396 * 2- Avoid to allocate the last free TX descriptor of the
397 * ring, in order to never set the TDT register with the
398 * same value stored in parallel by the NIC in the TDH
399 * register, which makes the TX engine of the NIC enter
400 * in a deadlock situation.
402 * By extension, avoid to allocate a free descriptor that
403 * belongs to the last set of free descriptors allocated
404 * to the same packet previously transmitted.
408 * The "last descriptor" of the previously sent packet, if any,
409 * which used the last descriptor to allocate.
411 tx_end = sw_ring[tx_last].last_id;
414 * The next descriptor following that "last descriptor" in the
417 tx_end = sw_ring[tx_end].next_id;
420 * The "last descriptor" associated with that next descriptor.
422 tx_end = sw_ring[tx_end].last_id;
425 * Check that this descriptor is free.
427 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
434 * Set common flags of all TX Data Descriptors.
436 * The following bits must be set in all Data Descriptors:
437 * - E1000_ADVTXD_DTYP_DATA
438 * - E1000_ADVTXD_DCMD_DEXT
440 * The following bits must be set in the first Data Descriptor
441 * and are ignored in the other ones:
442 * - E1000_ADVTXD_DCMD_IFCS
443 * - E1000_ADVTXD_MAC_1588
444 * - E1000_ADVTXD_DCMD_VLE
446 * The following bits must only be set in the last Data
448 * - E1000_TXD_CMD_EOP
450 * The following bits can be set in any Data Descriptor, but
451 * are only set in the last Data Descriptor:
454 cmd_type_len = txq->txd_type |
455 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
456 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
457 #if defined(RTE_LIBRTE_IEEE1588)
458 if (ol_flags & PKT_TX_IEEE1588_TMST)
459 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
462 /* Setup TX Advanced context descriptor if required */
464 volatile struct e1000_adv_tx_context_desc *
467 ctx_txd = (volatile struct
468 e1000_adv_tx_context_desc *)
471 txn = &sw_ring[txe->next_id];
472 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
474 if (txe->mbuf != NULL) {
475 rte_pktmbuf_free_seg(txe->mbuf);
479 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
482 txe->last_id = tx_last;
483 tx_id = txe->next_id;
487 /* Setup the TX Advanced Data Descriptor */
488 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
489 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
490 olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
495 txn = &sw_ring[txe->next_id];
498 if (txe->mbuf != NULL)
499 rte_pktmbuf_free_seg(txe->mbuf);
503 * Set up transmit descriptor.
505 slen = (uint16_t) m_seg->pkt.data_len;
506 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
507 txd->read.buffer_addr =
508 rte_cpu_to_le_64(buf_dma_addr);
509 txd->read.cmd_type_len =
510 rte_cpu_to_le_32(cmd_type_len | slen);
511 txd->read.olinfo_status =
512 rte_cpu_to_le_32(olinfo_status);
513 txe->last_id = tx_last;
514 tx_id = txe->next_id;
516 m_seg = m_seg->pkt.next;
517 } while (m_seg != NULL);
520 * The last packet data descriptor needs End Of Packet (EOP)
521 * and Report Status (RS).
523 txd->read.cmd_type_len |=
524 rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
530 * Set the Transmit Descriptor Tail (TDT).
532 E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
533 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
534 (unsigned) txq->port_id, (unsigned) txq->queue_id,
535 (unsigned) tx_id, (unsigned) nb_tx);
536 txq->tx_tail = tx_id;
541 /*********************************************************************
545 **********************************************************************/
546 static inline uint16_t
547 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
551 static uint16_t ip_pkt_types_map[16] = {
552 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
553 PKT_RX_IPV6_HDR, 0, 0, 0,
554 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
555 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
558 #if defined(RTE_LIBRTE_IEEE1588)
559 static uint32_t ip_pkt_etqf_map[8] = {
560 0, 0, 0, PKT_RX_IEEE1588_PTP,
564 pkt_flags = (uint16_t) (hl_tp_rs & E1000_RXDADV_PKTTYPE_ETQF) ?
565 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
566 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F];
568 pkt_flags = (uint16_t) (hl_tp_rs & E1000_RXDADV_PKTTYPE_ETQF) ? 0 :
569 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F];
571 return pkt_flags | (uint16_t) (((hl_tp_rs & 0x0F) == 0) ? 0 :
575 static inline uint16_t
576 rx_desc_status_to_pkt_flags(uint32_t rx_status)
580 /* Check if VLAN present */
581 pkt_flags = (uint16_t) (rx_status & E1000_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
583 #if defined(RTE_LIBRTE_IEEE1588)
584 if (rx_status & E1000_RXD_STAT_TMST)
585 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
590 static inline uint16_t
591 rx_desc_error_to_pkt_flags(uint32_t rx_status)
594 * Bit 30: IPE, IPv4 checksum error
595 * Bit 29: L4I, L4I integrity error
598 static uint16_t error_to_pkt_flags_map[4] = {
599 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
600 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
602 return error_to_pkt_flags_map[(rx_status >>
603 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
607 eth_igb_recv_pkts(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
610 volatile union e1000_adv_rx_desc *rx_ring;
611 volatile union e1000_adv_rx_desc *rxdp;
612 struct igb_rx_entry *sw_ring;
613 struct igb_rx_entry *rxe;
614 struct rte_mbuf *rxm;
615 struct rte_mbuf *nmb;
616 union e1000_adv_rx_desc rxd;
619 uint32_t hlen_type_rss;
628 rx_id = rxq->rx_tail;
629 rx_ring = rxq->rx_ring;
630 sw_ring = rxq->sw_ring;
631 while (nb_rx < nb_pkts) {
633 * The order of operations here is important as the DD status
634 * bit must not be read after any other descriptor fields.
635 * rx_ring and rxdp are pointing to volatile data so the order
636 * of accesses cannot be reordered by the compiler. If they were
637 * not volatile, they could be reordered which could lead to
638 * using invalid descriptor fields when read from rxd.
640 rxdp = &rx_ring[rx_id];
641 staterr = rxdp->wb.upper.status_error;
642 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
649 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
650 * likely to be invalid and to be dropped by the various
651 * validation checks performed by the network stack.
653 * Allocate a new mbuf to replenish the RX ring descriptor.
654 * If the allocation fails:
655 * - arrange for that RX descriptor to be the first one
656 * being parsed the next time the receive function is
657 * invoked [on the same queue].
659 * - Stop parsing the RX ring and return immediately.
661 * This policy do not drop the packet received in the RX
662 * descriptor for which the allocation of a new mbuf failed.
663 * Thus, it allows that packet to be later retrieved if
664 * mbuf have been freed in the mean time.
665 * As a side effect, holding RX descriptors instead of
666 * systematically giving them back to the NIC may lead to
667 * RX ring exhaustion situations.
668 * However, the NIC can gracefully prevent such situations
669 * to happen by sending specific "back-pressure" flow control
670 * frames to its peer(s).
672 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
673 "staterr=0x%x pkt_len=%u\n",
674 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
675 (unsigned) rx_id, (unsigned) staterr,
676 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
678 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
680 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
681 "queue_id=%u\n", (unsigned) rxq->port_id,
682 (unsigned) rxq->queue_id);
683 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
688 rxe = &sw_ring[rx_id];
690 if (rx_id == rxq->nb_rx_desc)
693 /* Prefetch next mbuf while processing current one. */
694 rte_igb_prefetch(sw_ring[rx_id].mbuf);
697 * When next RX descriptor is on a cache-line boundary,
698 * prefetch the next 4 RX descriptors and the next 8 pointers
701 if ((rx_id & 0x3) == 0) {
702 rte_igb_prefetch(&rx_ring[rx_id]);
703 rte_igb_prefetch(&sw_ring[rx_id]);
709 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
710 rxdp->read.hdr_addr = dma_addr;
711 rxdp->read.pkt_addr = dma_addr;
714 * Initialize the returned mbuf.
715 * 1) setup generic mbuf fields:
716 * - number of segments,
719 * - RX port identifier.
720 * 2) integrate hardware offload data, if any:
722 * - IP checksum flag,
723 * - VLAN TCI, if any,
726 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
728 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
729 rte_packet_prefetch(rxm->pkt.data);
730 rxm->pkt.nb_segs = 1;
731 rxm->pkt.next = NULL;
732 rxm->pkt.pkt_len = pkt_len;
733 rxm->pkt.data_len = pkt_len;
734 rxm->pkt.in_port = rxq->port_id;
736 rxm->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
737 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
738 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
739 rxm->pkt.vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
741 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
742 pkt_flags = (pkt_flags |
743 rx_desc_status_to_pkt_flags(staterr));
744 pkt_flags = (pkt_flags |
745 rx_desc_error_to_pkt_flags(staterr));
746 rxm->ol_flags = pkt_flags;
749 * Store the mbuf address into the next entry of the array
750 * of returned packets.
752 rx_pkts[nb_rx++] = rxm;
754 rxq->rx_tail = rx_id;
757 * If the number of free RX descriptors is greater than the RX free
758 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
760 * Update the RDT with the value of the last processed RX descriptor
761 * minus 1, to guarantee that the RDT register is never equal to the
762 * RDH register, which creates a "full" ring situtation from the
763 * hardware point of view...
765 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
766 if (nb_hold > rxq->rx_free_thresh) {
767 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
768 "nb_hold=%u nb_rx=%u\n",
769 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
770 (unsigned) rx_id, (unsigned) nb_hold,
772 rx_id = (uint16_t) ((rx_id == 0) ?
773 (rxq->nb_rx_desc - 1) : (rx_id - 1));
774 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
777 rxq->nb_rx_hold = nb_hold;
782 eth_igb_recv_scattered_pkts(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
785 volatile union e1000_adv_rx_desc *rx_ring;
786 volatile union e1000_adv_rx_desc *rxdp;
787 struct igb_rx_entry *sw_ring;
788 struct igb_rx_entry *rxe;
789 struct rte_mbuf *first_seg;
790 struct rte_mbuf *last_seg;
791 struct rte_mbuf *rxm;
792 struct rte_mbuf *nmb;
793 union e1000_adv_rx_desc rxd;
794 uint64_t dma; /* Physical address of mbuf data buffer */
796 uint32_t hlen_type_rss;
805 rx_id = rxq->rx_tail;
806 rx_ring = rxq->rx_ring;
807 sw_ring = rxq->sw_ring;
810 * Retrieve RX context of current packet, if any.
812 first_seg = rxq->pkt_first_seg;
813 last_seg = rxq->pkt_last_seg;
815 while (nb_rx < nb_pkts) {
818 * The order of operations here is important as the DD status
819 * bit must not be read after any other descriptor fields.
820 * rx_ring and rxdp are pointing to volatile data so the order
821 * of accesses cannot be reordered by the compiler. If they were
822 * not volatile, they could be reordered which could lead to
823 * using invalid descriptor fields when read from rxd.
825 rxdp = &rx_ring[rx_id];
826 staterr = rxdp->wb.upper.status_error;
827 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
834 * Allocate a new mbuf to replenish the RX ring descriptor.
835 * If the allocation fails:
836 * - arrange for that RX descriptor to be the first one
837 * being parsed the next time the receive function is
838 * invoked [on the same queue].
840 * - Stop parsing the RX ring and return immediately.
842 * This policy does not drop the packet received in the RX
843 * descriptor for which the allocation of a new mbuf failed.
844 * Thus, it allows that packet to be later retrieved if
845 * mbuf have been freed in the mean time.
846 * As a side effect, holding RX descriptors instead of
847 * systematically giving them back to the NIC may lead to
848 * RX ring exhaustion situations.
849 * However, the NIC can gracefully prevent such situations
850 * to happen by sending specific "back-pressure" flow control
851 * frames to its peer(s).
853 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
854 "staterr=0x%x data_len=%u\n",
855 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
856 (unsigned) rx_id, (unsigned) staterr,
857 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
859 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
861 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
862 "queue_id=%u\n", (unsigned) rxq->port_id,
863 (unsigned) rxq->queue_id);
864 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
869 rxe = &sw_ring[rx_id];
871 if (rx_id == rxq->nb_rx_desc)
874 /* Prefetch next mbuf while processing current one. */
875 rte_igb_prefetch(sw_ring[rx_id].mbuf);
878 * When next RX descriptor is on a cache-line boundary,
879 * prefetch the next 4 RX descriptors and the next 8 pointers
882 if ((rx_id & 0x3) == 0) {
883 rte_igb_prefetch(&rx_ring[rx_id]);
884 rte_igb_prefetch(&sw_ring[rx_id]);
888 * Update RX descriptor with the physical address of the new
889 * data buffer of the new allocated mbuf.
893 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
894 rxdp->read.pkt_addr = dma;
895 rxdp->read.hdr_addr = dma;
898 * Set data length & data buffer address of mbuf.
900 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
901 rxm->pkt.data_len = data_len;
902 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
905 * If this is the first buffer of the received packet,
906 * set the pointer to the first mbuf of the packet and
907 * initialize its context.
908 * Otherwise, update the total length and the number of segments
909 * of the current scattered packet, and update the pointer to
910 * the last mbuf of the current packet.
912 if (first_seg == NULL) {
914 first_seg->pkt.pkt_len = data_len;
915 first_seg->pkt.nb_segs = 1;
917 first_seg->pkt.pkt_len += data_len;
918 first_seg->pkt.nb_segs++;
919 last_seg->pkt.next = rxm;
923 * If this is not the last buffer of the received packet,
924 * update the pointer to the last mbuf of the current scattered
925 * packet and continue to parse the RX ring.
927 if (! (staterr & E1000_RXD_STAT_EOP)) {
933 * This is the last buffer of the received packet.
934 * If the CRC is not stripped by the hardware:
935 * - Subtract the CRC length from the total packet length.
936 * - If the last buffer only contains the whole CRC or a part
937 * of it, free the mbuf associated to the last buffer.
938 * If part of the CRC is also contained in the previous
939 * mbuf, subtract the length of that CRC part from the
940 * data length of the previous mbuf.
942 rxm->pkt.next = NULL;
943 if (unlikely(rxq->crc_len > 0)) {
944 first_seg->pkt.pkt_len -= ETHER_CRC_LEN;
945 if (data_len <= ETHER_CRC_LEN) {
946 rte_pktmbuf_free_seg(rxm);
947 first_seg->pkt.nb_segs--;
948 last_seg->pkt.data_len = (uint16_t)
949 (last_seg->pkt.data_len -
950 (ETHER_CRC_LEN - data_len));
951 last_seg->pkt.next = NULL;
954 (uint16_t) (data_len - ETHER_CRC_LEN);
958 * Initialize the first mbuf of the returned packet:
959 * - RX port identifier,
960 * - hardware offload data, if any:
962 * - IP checksum flag,
963 * - VLAN TCI, if any,
966 first_seg->pkt.in_port = rxq->port_id;
967 first_seg->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
970 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
971 * set in the pkt_flags field.
973 first_seg->pkt.vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
974 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
975 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
976 pkt_flags = (pkt_flags | rx_desc_status_to_pkt_flags(staterr));
977 pkt_flags = (pkt_flags | rx_desc_error_to_pkt_flags(staterr));
978 first_seg->ol_flags = pkt_flags;
980 /* Prefetch data of first segment, if configured to do so. */
981 rte_packet_prefetch(first_seg->pkt.data);
984 * Store the mbuf address into the next entry of the array
985 * of returned packets.
987 rx_pkts[nb_rx++] = first_seg;
990 * Setup receipt context for a new packet.
996 * Record index of the next RX descriptor to probe.
998 rxq->rx_tail = rx_id;
1001 * Save receive context.
1003 rxq->pkt_first_seg = first_seg;
1004 rxq->pkt_last_seg = last_seg;
1007 * If the number of free RX descriptors is greater than the RX free
1008 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1010 * Update the RDT with the value of the last processed RX descriptor
1011 * minus 1, to guarantee that the RDT register is never equal to the
1012 * RDH register, which creates a "full" ring situtation from the
1013 * hardware point of view...
1015 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1016 if (nb_hold > rxq->rx_free_thresh) {
1017 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1018 "nb_hold=%u nb_rx=%u\n",
1019 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1020 (unsigned) rx_id, (unsigned) nb_hold,
1022 rx_id = (uint16_t) ((rx_id == 0) ?
1023 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1024 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1027 rxq->nb_rx_hold = nb_hold;
1032 * Rings setup and release.
1034 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1035 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary.
1036 * This will also optimize cache line size effect.
1037 * H/W supports up to cache line size 128.
1039 #define IGB_ALIGN 128
1042 * Maximum number of Ring Descriptors.
1044 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1045 * desscriptors should meet the following condition:
1046 * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1048 #define IGB_MIN_RING_DESC 32
1049 #define IGB_MAX_RING_DESC 4096
1051 static const struct rte_memzone *
1052 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1053 uint16_t queue_id, uint32_t ring_size, int socket_id)
1055 char z_name[RTE_MEMZONE_NAMESIZE];
1056 const struct rte_memzone *mz;
1058 rte_snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1059 dev->driver->pci_drv.name, ring_name,
1060 dev->data->port_id, queue_id);
1061 mz = rte_memzone_lookup(z_name);
1065 return rte_memzone_reserve_aligned(z_name, (uint64_t)ring_size,
1066 socket_id, 0, IGB_ALIGN);
1070 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1074 if (txq->sw_ring != NULL) {
1075 for (i = 0; i < txq->nb_tx_desc; i++) {
1076 if (txq->sw_ring[i].mbuf != NULL) {
1077 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1078 txq->sw_ring[i].mbuf = NULL;
1085 igb_tx_queue_release(struct igb_tx_queue *txq)
1087 igb_tx_queue_release_mbufs(txq);
1088 rte_free(txq->sw_ring);
1093 igb_dev_tx_queue_alloc(struct rte_eth_dev *dev, uint16_t nb_queues)
1095 uint16_t i, old_nb_queues = dev->data->nb_tx_queues;
1096 struct igb_tx_queue **txq;
1098 if (dev->data->tx_queues == NULL) {
1099 dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues",
1100 sizeof(struct igb_tx_queue *) * nb_queues,
1102 if (dev->data->tx_queues == NULL) {
1103 dev->data->nb_tx_queues = 0;
1107 if (nb_queues < old_nb_queues)
1108 for (i = nb_queues; i < old_nb_queues; i++)
1109 igb_tx_queue_release(dev->data->tx_queues[i]);
1111 if (nb_queues != old_nb_queues) {
1112 txq = rte_realloc(dev->data->tx_queues,
1113 sizeof(struct igb_tx_queue *) * nb_queues,
1118 dev->data->tx_queues = txq;
1119 if (nb_queues > old_nb_queues)
1120 memset(&(txq[old_nb_queues]), 0,
1121 sizeof(struct igb_tx_queue *) *
1122 (nb_queues - old_nb_queues));
1125 dev->data->nb_tx_queues = nb_queues;
1131 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1136 memset((void*)&txq->ctx_cache, 0,
1137 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1141 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1143 struct igb_tx_entry *txe = txq->sw_ring;
1146 struct e1000_hw *hw;
1148 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1149 size = sizeof(union e1000_adv_tx_desc) * txq->nb_tx_desc;
1150 /* Zero out HW ring memory */
1151 for (i = 0; i < size; i++) {
1152 ((volatile char *)txq->tx_ring)[i] = 0;
1155 /* Initialize ring entries */
1156 prev = txq->nb_tx_desc - 1;
1157 for (i = 0; i < txq->nb_tx_desc; i++) {
1158 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1160 txd->wb.status = E1000_TXD_STAT_DD;
1163 txe[prev].next_id = i;
1167 txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1168 /* 82575 specific, each tx queue will use 2 hw contexts */
1169 if (hw->mac.type == e1000_82575)
1170 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1172 igb_reset_tx_queue_stat(txq);
1176 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1179 unsigned int socket_id,
1180 const struct rte_eth_txconf *tx_conf)
1182 const struct rte_memzone *tz;
1183 struct igb_tx_queue *txq;
1184 struct e1000_hw *hw;
1187 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1190 * Validate number of transmit descriptors.
1191 * It must not exceed hardware maximum, and must be multiple
1194 if (((nb_desc * sizeof(union e1000_adv_tx_desc)) % IGB_ALIGN) != 0 ||
1195 (nb_desc > IGB_MAX_RING_DESC) || (nb_desc < IGB_MIN_RING_DESC)) {
1200 * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1203 if (tx_conf->tx_free_thresh != 0)
1204 RTE_LOG(WARNING, PMD,
1205 "The tx_free_thresh parameter is not "
1206 "used for the 1G driver.");
1207 if (tx_conf->tx_rs_thresh != 0)
1208 RTE_LOG(WARNING, PMD,
1209 "The tx_rs_thresh parameter is not "
1210 "used for the 1G driver.");
1211 if (tx_conf->tx_thresh.wthresh == 0)
1212 RTE_LOG(WARNING, PMD,
1213 "To improve 1G driver performance, consider setting "
1214 "the TX WTHRESH value to 4, 8, or 16.");
1216 /* Free memory prior to re-allocation if needed */
1217 if (dev->data->tx_queues[queue_idx] != NULL)
1218 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1220 /* First allocate the tx queue data structure */
1221 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1227 * Allocate TX ring hardware descriptors. A memzone large enough to
1228 * handle the maximum ring size is allocated in order to allow for
1229 * resizing in later calls to the queue setup function.
1231 size = sizeof(union e1000_adv_tx_desc) * IGB_MAX_RING_DESC;
1232 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1235 igb_tx_queue_release(txq);
1239 txq->nb_tx_desc = nb_desc;
1240 txq->pthresh = tx_conf->tx_thresh.pthresh;
1241 txq->hthresh = tx_conf->tx_thresh.hthresh;
1242 txq->wthresh = tx_conf->tx_thresh.wthresh;
1243 txq->queue_id = queue_idx;
1244 txq->port_id = dev->data->port_id;
1246 txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(queue_idx));
1247 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1248 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1250 size = sizeof(union e1000_adv_tx_desc) * nb_desc;
1252 /* Allocate software ring */
1253 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1254 sizeof(struct igb_tx_entry) * nb_desc,
1256 if (txq->sw_ring == NULL) {
1257 igb_tx_queue_release(txq);
1260 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1261 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1263 igb_reset_tx_queue(txq, dev);
1264 dev->tx_pkt_burst = eth_igb_xmit_pkts;
1265 dev->data->tx_queues[queue_idx] = txq;
1271 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1275 if (rxq->sw_ring != NULL) {
1276 for (i = 0; i < rxq->nb_rx_desc; i++) {
1277 if (rxq->sw_ring[i].mbuf != NULL) {
1278 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1279 rxq->sw_ring[i].mbuf = NULL;
1286 igb_rx_queue_release(struct igb_rx_queue *rxq)
1288 igb_rx_queue_release_mbufs(rxq);
1289 rte_free(rxq->sw_ring);
1294 igb_dev_rx_queue_alloc(struct rte_eth_dev *dev, uint16_t nb_queues)
1296 uint16_t i, old_nb_queues = dev->data->nb_rx_queues;
1297 struct igb_rx_queue **rxq;
1299 if (dev->data->rx_queues == NULL) {
1300 dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues",
1301 sizeof(struct igb_rx_queue *) * nb_queues,
1303 if (dev->data->rx_queues == NULL) {
1304 dev->data->nb_rx_queues = 0;
1308 for (i = nb_queues; i < old_nb_queues; i++) {
1309 igb_rx_queue_release(dev->data->rx_queues[i]);
1310 dev->data->rx_queues[i] = NULL;
1312 if (nb_queues != old_nb_queues) {
1313 rxq = rte_realloc(dev->data->rx_queues,
1314 sizeof(struct igb_rx_queue *) * nb_queues,
1319 dev->data->rx_queues = rxq;
1320 if (nb_queues > old_nb_queues)
1321 memset(&(rxq[old_nb_queues]), 0,
1322 sizeof(struct igb_rx_queue *) *
1323 (nb_queues - old_nb_queues));
1326 dev->data->nb_rx_queues = nb_queues;
1332 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1337 /* Zero out HW ring memory */
1338 size = sizeof(union e1000_adv_rx_desc) * rxq->nb_rx_desc;
1339 for (i = 0; i < size; i++) {
1340 ((volatile char *)rxq->rx_ring)[i] = 0;
1344 rxq->pkt_first_seg = NULL;
1345 rxq->pkt_last_seg = NULL;
1349 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1352 unsigned int socket_id,
1353 const struct rte_eth_rxconf *rx_conf,
1354 struct rte_mempool *mp)
1356 const struct rte_memzone *rz;
1357 struct igb_rx_queue *rxq;
1358 struct e1000_hw *hw;
1361 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1364 * Validate number of receive descriptors.
1365 * It must not exceed hardware maximum, and must be multiple
1368 if (((nb_desc * sizeof(union e1000_adv_rx_desc)) % IGB_ALIGN) != 0 ||
1369 (nb_desc > IGB_MAX_RING_DESC) || (nb_desc < IGB_MIN_RING_DESC)) {
1373 /* Free memory prior to re-allocation if needed */
1374 if (dev->data->rx_queues[queue_idx] != NULL) {
1375 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1376 dev->data->rx_queues[queue_idx] = NULL;
1379 /* First allocate the RX queue data structure. */
1380 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1385 rxq->nb_rx_desc = nb_desc;
1386 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1387 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1388 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1389 rxq->drop_en = rx_conf->rx_drop_en;
1390 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1391 rxq->queue_id = queue_idx;
1392 rxq->port_id = dev->data->port_id;
1393 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1397 * Allocate RX ring hardware descriptors. A memzone large enough to
1398 * handle the maximum ring size is allocated in order to allow for
1399 * resizing in later calls to the queue setup function.
1401 size = sizeof(union e1000_adv_rx_desc) * IGB_MAX_RING_DESC;
1402 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx, size, socket_id);
1404 igb_rx_queue_release(rxq);
1407 rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(queue_idx));
1408 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
1409 rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1411 /* Allocate software ring. */
1412 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1413 sizeof(struct igb_rx_entry) * nb_desc,
1415 if (rxq->sw_ring == NULL) {
1416 igb_rx_queue_release(rxq);
1419 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1420 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1422 dev->data->rx_queues[queue_idx] = rxq;
1423 igb_reset_rx_queue(rxq);
1429 igb_dev_clear_queues(struct rte_eth_dev *dev)
1432 struct igb_tx_queue *txq;
1433 struct igb_rx_queue *rxq;
1435 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1436 txq = dev->data->tx_queues[i];
1437 igb_tx_queue_release_mbufs(txq);
1438 igb_reset_tx_queue(txq, dev);
1441 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1442 rxq = dev->data->rx_queues[i];
1443 igb_rx_queue_release_mbufs(rxq);
1444 igb_reset_rx_queue(rxq);
1449 * Receive Side Scaling (RSS).
1450 * See section 7.1.1.7 in the following document:
1451 * "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1454 * The source and destination IP addresses of the IP header and the source and
1455 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1456 * against a configurable random key to compute a 32-bit RSS hash result.
1457 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1458 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1459 * RSS output index which is used as the RX queue index where to store the
1461 * The following output is supplied in the RX write-back descriptor:
1462 * - 32-bit result of the Microsoft RSS hash function,
1463 * - 4-bit RSS type field.
1467 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1468 * Used as the default key.
1470 static uint8_t rss_intel_key[40] = {
1471 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1472 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1473 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1474 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1475 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1479 igb_rss_disable(struct rte_eth_dev *dev)
1481 struct e1000_hw *hw;
1484 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1485 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1486 mrqc &= ~E1000_MRQC_ENABLE_MASK;
1487 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1491 igb_rss_configure(struct rte_eth_dev *dev)
1493 struct e1000_hw *hw;
1501 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1503 rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1504 if (rss_hf == 0) /* Disable RSS. */ {
1505 igb_rss_disable(dev);
1508 hash_key = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1509 if (hash_key == NULL)
1510 hash_key = rss_intel_key; /* Default hash key. */
1512 /* Fill in RSS hash key. */
1513 for (i = 0; i < 10; i++) {
1514 rss_key = hash_key[(i * 4)];
1515 rss_key |= hash_key[(i * 4) + 1] << 8;
1516 rss_key |= hash_key[(i * 4) + 2] << 16;
1517 rss_key |= hash_key[(i * 4) + 3] << 24;
1518 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1521 /* Fill in redirection table. */
1522 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
1523 for (i = 0; i < 128; i++) {
1530 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
1531 i % dev->data->nb_rx_queues : 0);
1532 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
1534 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
1537 /* Set configured hashing functions in MRQC register. */
1538 mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1539 if (rss_hf & ETH_RSS_IPV4)
1540 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1541 if (rss_hf & ETH_RSS_IPV4_TCP)
1542 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1543 if (rss_hf & ETH_RSS_IPV6)
1544 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1545 if (rss_hf & ETH_RSS_IPV6_EX)
1546 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1547 if (rss_hf & ETH_RSS_IPV6_TCP)
1548 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1549 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1550 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1551 if (rss_hf & ETH_RSS_IPV4_UDP)
1552 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1553 if (rss_hf & ETH_RSS_IPV6_UDP)
1554 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1555 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1556 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1557 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1560 /*********************************************************************
1562 * Enable receive unit.
1564 **********************************************************************/
1567 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
1569 struct igb_rx_entry *rxe = rxq->sw_ring;
1573 /* Initialize software ring entries. */
1574 for (i = 0; i < rxq->nb_rx_desc; i++) {
1575 volatile union e1000_adv_rx_desc *rxd;
1576 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
1579 PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
1580 "queue_id=%hu\n", rxq->queue_id);
1581 igb_rx_queue_release(rxq);
1585 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
1586 rxd = &rxq->rx_ring[i];
1587 rxd->read.hdr_addr = dma_addr;
1588 rxd->read.pkt_addr = dma_addr;
1596 eth_igb_rx_init(struct rte_eth_dev *dev)
1598 struct e1000_hw *hw;
1599 struct igb_rx_queue *rxq;
1600 struct rte_pktmbuf_pool_private *mbp_priv;
1605 uint16_t rctl_bsize;
1609 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1613 * Make sure receives are disabled while setting
1614 * up the descriptor ring.
1616 rctl = E1000_READ_REG(hw, E1000_RCTL);
1617 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
1620 * Configure support of jumbo frames, if any.
1622 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
1623 rctl |= E1000_RCTL_LPE;
1625 /* Set maximum packet length. */
1626 E1000_WRITE_REG(hw, E1000_RLPML,
1627 dev->data->dev_conf.rxmode.max_rx_pkt_len);
1629 rctl &= ~E1000_RCTL_LPE;
1631 /* Configure and enable each RX queue. */
1633 dev->rx_pkt_burst = eth_igb_recv_pkts;
1634 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1638 rxq = dev->data->rx_queues[i];
1640 /* Allocate buffers for descriptor rings and set up queue */
1641 ret = igb_alloc_rx_queue_mbufs(rxq);
1643 igb_dev_clear_queues(dev);
1648 * Reset crc_len in case it was changed after queue setup by a
1652 (uint8_t)(dev->data->dev_conf.rxmode.hw_strip_crc ?
1655 bus_addr = rxq->rx_ring_phys_addr;
1656 E1000_WRITE_REG(hw, E1000_RDLEN(i),
1658 sizeof(union e1000_adv_rx_desc));
1659 E1000_WRITE_REG(hw, E1000_RDBAH(i),
1660 (uint32_t)(bus_addr >> 32));
1661 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
1663 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
1666 * Configure RX buffer size.
1668 mbp_priv = (struct rte_pktmbuf_pool_private *)
1669 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
1670 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
1671 RTE_PKTMBUF_HEADROOM);
1672 if (buf_size >= 1024) {
1674 * Configure the BSIZEPACKET field of the SRRCTL
1675 * register of the queue.
1676 * Value is in 1 KB resolution, from 1 KB to 127 KB.
1677 * If this field is equal to 0b, then RCTL.BSIZE
1678 * determines the RX packet buffer size.
1680 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
1681 E1000_SRRCTL_BSIZEPKT_MASK);
1682 buf_size = (uint16_t) ((srrctl &
1683 E1000_SRRCTL_BSIZEPKT_MASK) <<
1684 E1000_SRRCTL_BSIZEPKT_SHIFT);
1686 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
1687 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1688 dev->data->scattered_rx = 1;
1692 * Use BSIZE field of the device RCTL register.
1694 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
1695 rctl_bsize = buf_size;
1696 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1697 dev->data->scattered_rx = 1;
1700 /* Set if packets are dropped when no descriptors available */
1702 srrctl |= E1000_SRRCTL_DROP_EN;
1704 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
1706 /* Enable this RX queue. */
1707 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
1708 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
1709 rxdctl &= 0xFFF00000;
1710 rxdctl |= (rxq->pthresh & 0x1F);
1711 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
1712 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
1713 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
1717 * Setup BSIZE field of RCTL register, if needed.
1718 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
1719 * register, since the code above configures the SRRCTL register of
1720 * the RX queue in such a case.
1721 * All configurable sizes are:
1722 * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
1723 * 8192: rctl |= (E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX);
1724 * 4096: rctl |= (E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX);
1725 * 2048: rctl |= E1000_RCTL_SZ_2048;
1726 * 1024: rctl |= E1000_RCTL_SZ_1024;
1727 * 512: rctl |= E1000_RCTL_SZ_512;
1728 * 256: rctl |= E1000_RCTL_SZ_256;
1730 if (rctl_bsize > 0) {
1731 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
1732 rctl |= E1000_RCTL_SZ_512;
1733 else /* 256 <= buf_size < 512 - use 256 */
1734 rctl |= E1000_RCTL_SZ_256;
1738 * Configure RSS if device configured with multiple RX queues.
1740 if (dev->data->nb_rx_queues > 1)
1741 igb_rss_configure(dev);
1743 igb_rss_disable(dev);
1746 * Setup the Checksum Register.
1747 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
1749 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
1750 rxcsum |= E1000_RXCSUM_PCSD;
1752 /* Enable both L3/L4 rx checksum offload */
1753 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
1754 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
1756 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
1757 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
1759 /* Setup the Receive Control Register. */
1760 if (dev->data->dev_conf.rxmode.hw_strip_crc) {
1761 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
1763 /* set STRCRC bit in all queues for Powerville */
1764 if (hw->mac.type == e1000_i350) {
1765 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1766 uint32_t dvmolr = E1000_READ_REG(hw, E1000_DVMOLR(i));
1767 dvmolr |= E1000_DVMOLR_STRCRC;
1768 E1000_WRITE_REG(hw, E1000_DVMOLR(i), dvmolr);
1773 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
1775 /* clear STRCRC bit in all queues for Powerville */
1776 if (hw->mac.type == e1000_i350) {
1777 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1778 uint32_t dvmolr = E1000_READ_REG(hw, E1000_DVMOLR(i));
1779 dvmolr &= ~E1000_DVMOLR_STRCRC;
1780 E1000_WRITE_REG(hw, E1000_DVMOLR(i), dvmolr);
1785 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
1786 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
1787 E1000_RCTL_RDMTS_HALF |
1788 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
1790 /* Make sure VLAN Filters are off. */
1791 rctl &= ~E1000_RCTL_VFE;
1792 /* Don't store bad packets. */
1793 rctl &= ~E1000_RCTL_SBP;
1795 /* Enable Receives. */
1796 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
1799 * Setup the HW Rx Head and Tail Descriptor Pointers.
1800 * This needs to be done after enable.
1802 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1803 rxq = dev->data->rx_queues[i];
1804 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
1805 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
1811 /*********************************************************************
1813 * Enable transmit unit.
1815 **********************************************************************/
1817 eth_igb_tx_init(struct rte_eth_dev *dev)
1819 struct e1000_hw *hw;
1820 struct igb_tx_queue *txq;
1825 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1827 /* Setup the Base and Length of the Tx Descriptor Rings. */
1828 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1830 txq = dev->data->tx_queues[i];
1831 bus_addr = txq->tx_ring_phys_addr;
1833 E1000_WRITE_REG(hw, E1000_TDLEN(i),
1835 sizeof(union e1000_adv_tx_desc));
1836 E1000_WRITE_REG(hw, E1000_TDBAH(i),
1837 (uint32_t)(bus_addr >> 32));
1838 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
1840 /* Setup the HW Tx Head and Tail descriptor pointers. */
1841 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
1842 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
1844 /* Setup Transmit threshold registers. */
1845 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
1846 txdctl |= txq->pthresh & 0x1F;
1847 txdctl |= ((txq->hthresh & 0x1F) << 8);
1848 txdctl |= ((txq->wthresh & 0x1F) << 16);
1849 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
1850 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
1853 /* Program the Transmit Control Register. */
1854 tctl = E1000_READ_REG(hw, E1000_TCTL);
1855 tctl &= ~E1000_TCTL_CT;
1856 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
1857 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
1859 e1000_config_collision_dist(hw);
1861 /* This write will effectively turn on the transmit unit. */
1862 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
1865 /*********************************************************************
1867 * Enable VF receive unit.
1869 **********************************************************************/
1871 eth_igbvf_rx_init(struct rte_eth_dev *dev)
1873 struct e1000_hw *hw;
1874 struct igb_rx_queue *rxq;
1875 struct rte_pktmbuf_pool_private *mbp_priv;
1878 uint16_t rctl_bsize;
1882 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1884 /* Configure and enable each RX queue. */
1886 dev->rx_pkt_burst = eth_igb_recv_pkts;
1887 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1891 rxq = dev->data->rx_queues[i];
1893 /* Allocate buffers for descriptor rings and set up queue */
1894 ret = igb_alloc_rx_queue_mbufs(rxq);
1898 bus_addr = rxq->rx_ring_phys_addr;
1899 E1000_WRITE_REG(hw, E1000_RDLEN(i),
1901 sizeof(union e1000_adv_rx_desc));
1902 E1000_WRITE_REG(hw, E1000_RDBAH(i),
1903 (uint32_t)(bus_addr >> 32));
1904 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
1906 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
1909 * Configure RX buffer size.
1911 mbp_priv = (struct rte_pktmbuf_pool_private *)
1912 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
1913 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
1914 RTE_PKTMBUF_HEADROOM);
1915 if (buf_size >= 1024) {
1917 * Configure the BSIZEPACKET field of the SRRCTL
1918 * register of the queue.
1919 * Value is in 1 KB resolution, from 1 KB to 127 KB.
1920 * If this field is equal to 0b, then RCTL.BSIZE
1921 * determines the RX packet buffer size.
1923 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
1924 E1000_SRRCTL_BSIZEPKT_MASK);
1925 buf_size = (uint16_t) ((srrctl &
1926 E1000_SRRCTL_BSIZEPKT_MASK) <<
1927 E1000_SRRCTL_BSIZEPKT_SHIFT);
1929 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
1930 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1931 dev->data->scattered_rx = 1;
1935 * Use BSIZE field of the device RCTL register.
1937 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
1938 rctl_bsize = buf_size;
1939 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1940 dev->data->scattered_rx = 1;
1943 /* Set if packets are dropped when no descriptors available */
1945 srrctl |= E1000_SRRCTL_DROP_EN;
1947 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
1949 /* Enable this RX queue. */
1950 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
1951 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
1952 rxdctl &= 0xFFF00000;
1953 rxdctl |= (rxq->pthresh & 0x1F);
1954 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
1955 if (hw->mac.type == e1000_82576) {
1957 * Workaround of 82576 VF Erratum
1958 * force set WTHRESH to 1
1959 * to avoid Write-Back not triggered sometimes
1962 PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !\n");
1965 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
1966 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
1970 * Setup the HW Rx Head and Tail Descriptor Pointers.
1971 * This needs to be done after enable.
1973 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1974 rxq = dev->data->rx_queues[i];
1975 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
1976 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
1982 /*********************************************************************
1984 * Enable VF transmit unit.
1986 **********************************************************************/
1988 eth_igbvf_tx_init(struct rte_eth_dev *dev)
1990 struct e1000_hw *hw;
1991 struct igb_tx_queue *txq;
1995 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1997 /* Setup the Base and Length of the Tx Descriptor Rings. */
1998 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2001 txq = dev->data->tx_queues[i];
2002 bus_addr = txq->tx_ring_phys_addr;
2003 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2005 sizeof(union e1000_adv_tx_desc));
2006 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2007 (uint32_t)(bus_addr >> 32));
2008 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2010 /* Setup the HW Tx Head and Tail descriptor pointers. */
2011 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2012 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2014 /* Setup Transmit threshold registers. */
2015 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2016 txdctl |= txq->pthresh & 0x1F;
2017 txdctl |= ((txq->hthresh & 0x1F) << 8);
2018 if (hw->mac.type == e1000_82576) {
2020 * Workaround of 82576 VF Erratum
2021 * force set WTHRESH to 1
2022 * to avoid Write-Back not triggered sometimes
2025 PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !\n");
2028 txdctl |= ((txq->wthresh & 0x1F) << 16);
2029 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2030 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);