4 * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_interrupts.h>
47 #include <rte_byteorder.h>
48 #include <rte_common.h>
50 #include <rte_debug.h>
52 #include <rte_memory.h>
53 #include <rte_memcpy.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_tailq.h>
58 #include <rte_per_lcore.h>
59 #include <rte_lcore.h>
60 #include <rte_atomic.h>
61 #include <rte_branch_prediction.h>
63 #include <rte_mempool.h>
64 #include <rte_malloc.h>
66 #include <rte_ether.h>
67 #include <rte_ethdev.h>
68 #include <rte_prefetch.h>
72 #include <rte_string_fns.h>
74 #include "e1000_logs.h"
75 #include "e1000/e1000_api.h"
76 #include "e1000_ethdev.h"
78 static inline struct rte_mbuf *
79 rte_rxmbuf_alloc(struct rte_mempool *mp)
83 m = __rte_mbuf_raw_alloc(mp);
84 __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
88 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
89 (uint64_t) ((mb)->buf_physaddr + \
90 (uint64_t) ((char *)((mb)->pkt.data) - \
91 (char *)(mb)->buf_addr))
93 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
94 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
97 * Structure associated with each descriptor of the RX ring of a RX queue.
100 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
104 * Structure associated with each descriptor of the TX ring of a TX queue.
106 struct igb_tx_entry {
107 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
108 uint16_t next_id; /**< Index of next descriptor in ring. */
109 uint16_t last_id; /**< Index of last scattered descriptor. */
113 * Structure associated with each RX queue.
115 struct igb_rx_queue {
116 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
117 volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
118 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
119 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
120 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
121 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
122 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
123 uint16_t nb_rx_desc; /**< number of RX descriptors. */
124 uint16_t rx_tail; /**< current value of RDT register. */
125 uint16_t nb_rx_hold; /**< number of held free RX desc. */
126 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
127 uint16_t queue_id; /**< RX queue index. */
128 uint8_t port_id; /**< Device port identifier. */
129 uint8_t pthresh; /**< Prefetch threshold register. */
130 uint8_t hthresh; /**< Host threshold register. */
131 uint8_t wthresh; /**< Write-back threshold register. */
132 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
133 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
137 * Hardware context number
139 enum igb_advctx_num {
140 IGB_CTX_0 = 0, /**< CTX0 */
141 IGB_CTX_1 = 1, /**< CTX1 */
142 IGB_CTX_NUM = 2, /**< CTX_NUM */
146 * Strucutre to check if new context need be built
148 struct igb_advctx_info {
149 uint16_t flags; /**< ol_flags related to context build. */
150 uint32_t cmp_mask; /**< compare mask for vlan_macip_lens */
151 union rte_vlan_macip vlan_macip_lens; /**< vlan, mac & ip length. */
155 * Structure associated with each TX queue.
157 struct igb_tx_queue {
158 volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
159 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
160 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
161 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
162 uint32_t txd_type; /**< Device-specific TXD type */
163 uint16_t nb_tx_desc; /**< number of TX descriptors. */
164 uint16_t tx_tail; /**< Current value of TDT register. */
166 /**< Index of first used TX descriptor. */
167 uint16_t queue_id; /**< TX queue index. */
168 uint8_t port_id; /**< Device port identifier. */
169 uint8_t pthresh; /**< Prefetch threshold register. */
170 uint8_t hthresh; /**< Host threshold register. */
171 uint8_t wthresh; /**< Write-back threshold register. */
173 /**< Current used hardware descriptor. */
175 /**< Start context position for transmit queue. */
176 struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
177 /**< Hardware context history.*/
181 #define RTE_PMD_USE_PREFETCH
184 #ifdef RTE_PMD_USE_PREFETCH
185 #define rte_igb_prefetch(p) rte_prefetch0(p)
187 #define rte_igb_prefetch(p) do {} while(0)
190 #ifdef RTE_PMD_PACKET_PREFETCH
191 #define rte_packet_prefetch(p) rte_prefetch1(p)
193 #define rte_packet_prefetch(p) do {} while(0)
196 /*********************************************************************
200 **********************************************************************/
203 * Advanced context descriptor are almost same between igb/ixgbe
204 * This is a separate function, looking for optimization opportunity here
205 * Rework required to go with the pre-defined values.
209 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
210 volatile struct e1000_adv_tx_context_desc *ctx_txd,
211 uint16_t ol_flags, uint32_t vlan_macip_lens)
213 uint32_t type_tucmd_mlhl;
214 uint32_t mss_l4len_idx;
215 uint32_t ctx_idx, ctx_curr;
218 ctx_curr = txq->ctx_curr;
219 ctx_idx = ctx_curr + txq->ctx_start;
224 if (ol_flags & PKT_TX_VLAN_PKT) {
225 cmp_mask |= TX_VLAN_CMP_MASK;
228 if (ol_flags & PKT_TX_IP_CKSUM) {
229 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
230 cmp_mask |= TX_MAC_LEN_CMP_MASK;
233 /* Specify which HW CTX to upload. */
234 mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
235 switch (ol_flags & PKT_TX_L4_MASK) {
236 case PKT_TX_UDP_CKSUM:
237 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
238 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
239 mss_l4len_idx |= sizeof(struct udp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
240 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
242 case PKT_TX_TCP_CKSUM:
243 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
244 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
245 mss_l4len_idx |= sizeof(struct tcp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
246 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
248 case PKT_TX_SCTP_CKSUM:
249 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
250 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
251 mss_l4len_idx |= sizeof(struct sctp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
252 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
255 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
256 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
260 txq->ctx_cache[ctx_curr].flags = ol_flags;
261 txq->ctx_cache[ctx_curr].cmp_mask = cmp_mask;
262 txq->ctx_cache[ctx_curr].vlan_macip_lens.data =
263 vlan_macip_lens & cmp_mask;
265 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
266 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
267 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
268 ctx_txd->seqnum_seed = 0;
272 * Check which hardware context can be used. Use the existing match
273 * or create a new context descriptor.
275 static inline uint32_t
276 what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
277 uint32_t vlan_macip_lens)
279 /* If match with the current context */
280 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
281 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
282 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
283 return txq->ctx_curr;
286 /* If match with the second context */
288 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
289 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
290 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
291 return txq->ctx_curr;
294 /* Mismatch, use the previous context */
295 return (IGB_CTX_NUM);
298 static inline uint32_t
299 tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
301 static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
302 static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
305 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
306 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
310 static inline uint32_t
311 tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
313 static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
314 return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
318 eth_igb_xmit_pkts(struct igb_tx_queue *txq, struct rte_mbuf **tx_pkts,
321 struct igb_tx_entry *sw_ring;
322 struct igb_tx_entry *txe, *txn;
323 volatile union e1000_adv_tx_desc *txr;
324 volatile union e1000_adv_tx_desc *txd;
325 struct rte_mbuf *tx_pkt;
326 struct rte_mbuf *m_seg;
327 uint64_t buf_dma_addr;
328 uint32_t olinfo_status;
329 uint32_t cmd_type_len;
340 uint32_t vlan_macip_lens;
342 sw_ring = txq->sw_ring;
344 tx_id = txq->tx_tail;
345 txe = &sw_ring[tx_id];
347 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
349 pkt_len = tx_pkt->pkt.pkt_len;
351 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
354 * The number of descriptors that must be allocated for a
355 * packet is the number of segments of that packet, plus 1
356 * Context Descriptor for the VLAN Tag Identifier, if any.
357 * Determine the last TX descriptor to allocate in the TX ring
358 * for the packet, starting from the current position (tx_id)
361 tx_last = (uint16_t) (tx_id + tx_pkt->pkt.nb_segs - 1);
363 ol_flags = tx_pkt->ol_flags;
364 vlan_macip_lens = tx_pkt->pkt.vlan_macip.data;
365 tx_ol_req = (ol_flags & PKT_TX_OFFLOAD_MASK);
367 /* If a Context Descriptor need be built . */
369 ctx = what_advctx_update(txq, tx_ol_req,
371 /* Only allocate context descriptor if required*/
372 new_ctx = (ctx == IGB_CTX_NUM);
374 tx_last = (uint16_t) (tx_last + new_ctx);
376 if (tx_last >= txq->nb_tx_desc)
377 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
379 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
380 " tx_first=%u tx_last=%u\n",
381 (unsigned) txq->port_id,
382 (unsigned) txq->queue_id,
388 * Check if there are enough free descriptors in the TX ring
389 * to transmit the next packet.
390 * This operation is based on the two following rules:
392 * 1- Only check that the last needed TX descriptor can be
393 * allocated (by construction, if that descriptor is free,
394 * all intermediate ones are also free).
396 * For this purpose, the index of the last TX descriptor
397 * used for a packet (the "last descriptor" of a packet)
398 * is recorded in the TX entries (the last one included)
399 * that are associated with all TX descriptors allocated
402 * 2- Avoid to allocate the last free TX descriptor of the
403 * ring, in order to never set the TDT register with the
404 * same value stored in parallel by the NIC in the TDH
405 * register, which makes the TX engine of the NIC enter
406 * in a deadlock situation.
408 * By extension, avoid to allocate a free descriptor that
409 * belongs to the last set of free descriptors allocated
410 * to the same packet previously transmitted.
414 * The "last descriptor" of the previously sent packet, if any,
415 * which used the last descriptor to allocate.
417 tx_end = sw_ring[tx_last].last_id;
420 * The next descriptor following that "last descriptor" in the
423 tx_end = sw_ring[tx_end].next_id;
426 * The "last descriptor" associated with that next descriptor.
428 tx_end = sw_ring[tx_end].last_id;
431 * Check that this descriptor is free.
433 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
440 * Set common flags of all TX Data Descriptors.
442 * The following bits must be set in all Data Descriptors:
443 * - E1000_ADVTXD_DTYP_DATA
444 * - E1000_ADVTXD_DCMD_DEXT
446 * The following bits must be set in the first Data Descriptor
447 * and are ignored in the other ones:
448 * - E1000_ADVTXD_DCMD_IFCS
449 * - E1000_ADVTXD_MAC_1588
450 * - E1000_ADVTXD_DCMD_VLE
452 * The following bits must only be set in the last Data
454 * - E1000_TXD_CMD_EOP
456 * The following bits can be set in any Data Descriptor, but
457 * are only set in the last Data Descriptor:
460 cmd_type_len = txq->txd_type |
461 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
462 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
463 #if defined(RTE_LIBRTE_IEEE1588)
464 if (ol_flags & PKT_TX_IEEE1588_TMST)
465 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
468 /* Setup TX Advanced context descriptor if required */
470 volatile struct e1000_adv_tx_context_desc *
473 ctx_txd = (volatile struct
474 e1000_adv_tx_context_desc *)
477 txn = &sw_ring[txe->next_id];
478 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
480 if (txe->mbuf != NULL) {
481 rte_pktmbuf_free_seg(txe->mbuf);
485 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
488 txe->last_id = tx_last;
489 tx_id = txe->next_id;
493 /* Setup the TX Advanced Data Descriptor */
494 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
495 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
496 olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
501 txn = &sw_ring[txe->next_id];
504 if (txe->mbuf != NULL)
505 rte_pktmbuf_free_seg(txe->mbuf);
509 * Set up transmit descriptor.
511 slen = (uint16_t) m_seg->pkt.data_len;
512 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
513 txd->read.buffer_addr =
514 rte_cpu_to_le_64(buf_dma_addr);
515 txd->read.cmd_type_len =
516 rte_cpu_to_le_32(cmd_type_len | slen);
517 txd->read.olinfo_status =
518 rte_cpu_to_le_32(olinfo_status);
519 txe->last_id = tx_last;
520 tx_id = txe->next_id;
522 m_seg = m_seg->pkt.next;
523 } while (m_seg != NULL);
526 * The last packet data descriptor needs End Of Packet (EOP)
527 * and Report Status (RS).
529 txd->read.cmd_type_len |=
530 rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
536 * Set the Transmit Descriptor Tail (TDT).
538 E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
539 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
540 (unsigned) txq->port_id, (unsigned) txq->queue_id,
541 (unsigned) tx_id, (unsigned) nb_tx);
542 txq->tx_tail = tx_id;
547 /*********************************************************************
551 **********************************************************************/
552 static inline uint16_t
553 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
557 static uint16_t ip_pkt_types_map[16] = {
558 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
559 PKT_RX_IPV6_HDR, 0, 0, 0,
560 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
561 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
564 #if defined(RTE_LIBRTE_IEEE1588)
565 static uint32_t ip_pkt_etqf_map[8] = {
566 0, 0, 0, PKT_RX_IEEE1588_PTP,
570 pkt_flags = (uint16_t) (hl_tp_rs & E1000_RXDADV_PKTTYPE_ETQF) ?
571 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
572 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F];
574 pkt_flags = (uint16_t) (hl_tp_rs & E1000_RXDADV_PKTTYPE_ETQF) ? 0 :
575 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F];
577 return pkt_flags | (uint16_t) (((hl_tp_rs & 0x0F) == 0) ? 0 :
581 static inline uint16_t
582 rx_desc_status_to_pkt_flags(uint32_t rx_status)
586 /* Check if VLAN present */
587 pkt_flags = (uint16_t) (rx_status & E1000_RXD_STAT_VP) ? PKT_RX_VLAN_PKT : 0;
589 #if defined(RTE_LIBRTE_IEEE1588)
590 if (rx_status & E1000_RXD_STAT_TMST)
591 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
596 static inline uint16_t
597 rx_desc_error_to_pkt_flags(uint32_t rx_status)
600 * Bit 30: IPE, IPv4 checksum error
601 * Bit 29: L4I, L4I integrity error
604 static uint16_t error_to_pkt_flags_map[4] = {
605 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
606 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
608 return error_to_pkt_flags_map[(rx_status >>
609 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
613 eth_igb_recv_pkts(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
616 volatile union e1000_adv_rx_desc *rx_ring;
617 volatile union e1000_adv_rx_desc *rxdp;
618 struct igb_rx_entry *sw_ring;
619 struct igb_rx_entry *rxe;
620 struct rte_mbuf *rxm;
621 struct rte_mbuf *nmb;
622 union e1000_adv_rx_desc rxd;
625 uint32_t hlen_type_rss;
634 rx_id = rxq->rx_tail;
635 rx_ring = rxq->rx_ring;
636 sw_ring = rxq->sw_ring;
637 while (nb_rx < nb_pkts) {
639 * The order of operations here is important as the DD status
640 * bit must not be read after any other descriptor fields.
641 * rx_ring and rxdp are pointing to volatile data so the order
642 * of accesses cannot be reordered by the compiler. If they were
643 * not volatile, they could be reordered which could lead to
644 * using invalid descriptor fields when read from rxd.
646 rxdp = &rx_ring[rx_id];
647 staterr = rxdp->wb.upper.status_error;
648 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
655 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
656 * likely to be invalid and to be dropped by the various
657 * validation checks performed by the network stack.
659 * Allocate a new mbuf to replenish the RX ring descriptor.
660 * If the allocation fails:
661 * - arrange for that RX descriptor to be the first one
662 * being parsed the next time the receive function is
663 * invoked [on the same queue].
665 * - Stop parsing the RX ring and return immediately.
667 * This policy do not drop the packet received in the RX
668 * descriptor for which the allocation of a new mbuf failed.
669 * Thus, it allows that packet to be later retrieved if
670 * mbuf have been freed in the mean time.
671 * As a side effect, holding RX descriptors instead of
672 * systematically giving them back to the NIC may lead to
673 * RX ring exhaustion situations.
674 * However, the NIC can gracefully prevent such situations
675 * to happen by sending specific "back-pressure" flow control
676 * frames to its peer(s).
678 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
679 "staterr=0x%x pkt_len=%u\n",
680 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
681 (unsigned) rx_id, (unsigned) staterr,
682 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
684 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
686 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
687 "queue_id=%u\n", (unsigned) rxq->port_id,
688 (unsigned) rxq->queue_id);
689 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
694 rxe = &sw_ring[rx_id];
696 if (rx_id == rxq->nb_rx_desc)
699 /* Prefetch next mbuf while processing current one. */
700 rte_igb_prefetch(sw_ring[rx_id].mbuf);
703 * When next RX descriptor is on a cache-line boundary,
704 * prefetch the next 4 RX descriptors and the next 8 pointers
707 if ((rx_id & 0x3) == 0) {
708 rte_igb_prefetch(&rx_ring[rx_id]);
709 rte_igb_prefetch(&sw_ring[rx_id]);
715 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
716 rxdp->read.hdr_addr = dma_addr;
717 rxdp->read.pkt_addr = dma_addr;
720 * Initialize the returned mbuf.
721 * 1) setup generic mbuf fields:
722 * - number of segments,
725 * - RX port identifier.
726 * 2) integrate hardware offload data, if any:
728 * - IP checksum flag,
729 * - VLAN TCI, if any,
732 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
734 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
735 rte_packet_prefetch(rxm->pkt.data);
736 rxm->pkt.nb_segs = 1;
737 rxm->pkt.next = NULL;
738 rxm->pkt.pkt_len = pkt_len;
739 rxm->pkt.data_len = pkt_len;
740 rxm->pkt.in_port = rxq->port_id;
742 rxm->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
743 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
744 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
745 rxm->pkt.vlan_macip.f.vlan_tci =
746 rte_le_to_cpu_16(rxd.wb.upper.vlan);
748 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
749 pkt_flags = (pkt_flags |
750 rx_desc_status_to_pkt_flags(staterr));
751 pkt_flags = (pkt_flags |
752 rx_desc_error_to_pkt_flags(staterr));
753 rxm->ol_flags = pkt_flags;
756 * Store the mbuf address into the next entry of the array
757 * of returned packets.
759 rx_pkts[nb_rx++] = rxm;
761 rxq->rx_tail = rx_id;
764 * If the number of free RX descriptors is greater than the RX free
765 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
767 * Update the RDT with the value of the last processed RX descriptor
768 * minus 1, to guarantee that the RDT register is never equal to the
769 * RDH register, which creates a "full" ring situtation from the
770 * hardware point of view...
772 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
773 if (nb_hold > rxq->rx_free_thresh) {
774 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
775 "nb_hold=%u nb_rx=%u\n",
776 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
777 (unsigned) rx_id, (unsigned) nb_hold,
779 rx_id = (uint16_t) ((rx_id == 0) ?
780 (rxq->nb_rx_desc - 1) : (rx_id - 1));
781 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
784 rxq->nb_rx_hold = nb_hold;
789 eth_igb_recv_scattered_pkts(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,
792 volatile union e1000_adv_rx_desc *rx_ring;
793 volatile union e1000_adv_rx_desc *rxdp;
794 struct igb_rx_entry *sw_ring;
795 struct igb_rx_entry *rxe;
796 struct rte_mbuf *first_seg;
797 struct rte_mbuf *last_seg;
798 struct rte_mbuf *rxm;
799 struct rte_mbuf *nmb;
800 union e1000_adv_rx_desc rxd;
801 uint64_t dma; /* Physical address of mbuf data buffer */
803 uint32_t hlen_type_rss;
812 rx_id = rxq->rx_tail;
813 rx_ring = rxq->rx_ring;
814 sw_ring = rxq->sw_ring;
817 * Retrieve RX context of current packet, if any.
819 first_seg = rxq->pkt_first_seg;
820 last_seg = rxq->pkt_last_seg;
822 while (nb_rx < nb_pkts) {
825 * The order of operations here is important as the DD status
826 * bit must not be read after any other descriptor fields.
827 * rx_ring and rxdp are pointing to volatile data so the order
828 * of accesses cannot be reordered by the compiler. If they were
829 * not volatile, they could be reordered which could lead to
830 * using invalid descriptor fields when read from rxd.
832 rxdp = &rx_ring[rx_id];
833 staterr = rxdp->wb.upper.status_error;
834 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
841 * Allocate a new mbuf to replenish the RX ring descriptor.
842 * If the allocation fails:
843 * - arrange for that RX descriptor to be the first one
844 * being parsed the next time the receive function is
845 * invoked [on the same queue].
847 * - Stop parsing the RX ring and return immediately.
849 * This policy does not drop the packet received in the RX
850 * descriptor for which the allocation of a new mbuf failed.
851 * Thus, it allows that packet to be later retrieved if
852 * mbuf have been freed in the mean time.
853 * As a side effect, holding RX descriptors instead of
854 * systematically giving them back to the NIC may lead to
855 * RX ring exhaustion situations.
856 * However, the NIC can gracefully prevent such situations
857 * to happen by sending specific "back-pressure" flow control
858 * frames to its peer(s).
860 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
861 "staterr=0x%x data_len=%u\n",
862 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
863 (unsigned) rx_id, (unsigned) staterr,
864 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
866 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
868 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
869 "queue_id=%u\n", (unsigned) rxq->port_id,
870 (unsigned) rxq->queue_id);
871 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
876 rxe = &sw_ring[rx_id];
878 if (rx_id == rxq->nb_rx_desc)
881 /* Prefetch next mbuf while processing current one. */
882 rte_igb_prefetch(sw_ring[rx_id].mbuf);
885 * When next RX descriptor is on a cache-line boundary,
886 * prefetch the next 4 RX descriptors and the next 8 pointers
889 if ((rx_id & 0x3) == 0) {
890 rte_igb_prefetch(&rx_ring[rx_id]);
891 rte_igb_prefetch(&sw_ring[rx_id]);
895 * Update RX descriptor with the physical address of the new
896 * data buffer of the new allocated mbuf.
900 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
901 rxdp->read.pkt_addr = dma;
902 rxdp->read.hdr_addr = dma;
905 * Set data length & data buffer address of mbuf.
907 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
908 rxm->pkt.data_len = data_len;
909 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
912 * If this is the first buffer of the received packet,
913 * set the pointer to the first mbuf of the packet and
914 * initialize its context.
915 * Otherwise, update the total length and the number of segments
916 * of the current scattered packet, and update the pointer to
917 * the last mbuf of the current packet.
919 if (first_seg == NULL) {
921 first_seg->pkt.pkt_len = data_len;
922 first_seg->pkt.nb_segs = 1;
924 first_seg->pkt.pkt_len += data_len;
925 first_seg->pkt.nb_segs++;
926 last_seg->pkt.next = rxm;
930 * If this is not the last buffer of the received packet,
931 * update the pointer to the last mbuf of the current scattered
932 * packet and continue to parse the RX ring.
934 if (! (staterr & E1000_RXD_STAT_EOP)) {
940 * This is the last buffer of the received packet.
941 * If the CRC is not stripped by the hardware:
942 * - Subtract the CRC length from the total packet length.
943 * - If the last buffer only contains the whole CRC or a part
944 * of it, free the mbuf associated to the last buffer.
945 * If part of the CRC is also contained in the previous
946 * mbuf, subtract the length of that CRC part from the
947 * data length of the previous mbuf.
949 rxm->pkt.next = NULL;
950 if (unlikely(rxq->crc_len > 0)) {
951 first_seg->pkt.pkt_len -= ETHER_CRC_LEN;
952 if (data_len <= ETHER_CRC_LEN) {
953 rte_pktmbuf_free_seg(rxm);
954 first_seg->pkt.nb_segs--;
955 last_seg->pkt.data_len = (uint16_t)
956 (last_seg->pkt.data_len -
957 (ETHER_CRC_LEN - data_len));
958 last_seg->pkt.next = NULL;
961 (uint16_t) (data_len - ETHER_CRC_LEN);
965 * Initialize the first mbuf of the returned packet:
966 * - RX port identifier,
967 * - hardware offload data, if any:
969 * - IP checksum flag,
970 * - VLAN TCI, if any,
973 first_seg->pkt.in_port = rxq->port_id;
974 first_seg->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
977 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
978 * set in the pkt_flags field.
980 first_seg->pkt.vlan_macip.f.vlan_tci =
981 rte_le_to_cpu_16(rxd.wb.upper.vlan);
982 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
983 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
984 pkt_flags = (pkt_flags | rx_desc_status_to_pkt_flags(staterr));
985 pkt_flags = (pkt_flags | rx_desc_error_to_pkt_flags(staterr));
986 first_seg->ol_flags = pkt_flags;
988 /* Prefetch data of first segment, if configured to do so. */
989 rte_packet_prefetch(first_seg->pkt.data);
992 * Store the mbuf address into the next entry of the array
993 * of returned packets.
995 rx_pkts[nb_rx++] = first_seg;
998 * Setup receipt context for a new packet.
1004 * Record index of the next RX descriptor to probe.
1006 rxq->rx_tail = rx_id;
1009 * Save receive context.
1011 rxq->pkt_first_seg = first_seg;
1012 rxq->pkt_last_seg = last_seg;
1015 * If the number of free RX descriptors is greater than the RX free
1016 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1018 * Update the RDT with the value of the last processed RX descriptor
1019 * minus 1, to guarantee that the RDT register is never equal to the
1020 * RDH register, which creates a "full" ring situtation from the
1021 * hardware point of view...
1023 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1024 if (nb_hold > rxq->rx_free_thresh) {
1025 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1026 "nb_hold=%u nb_rx=%u\n",
1027 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1028 (unsigned) rx_id, (unsigned) nb_hold,
1030 rx_id = (uint16_t) ((rx_id == 0) ?
1031 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1032 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1035 rxq->nb_rx_hold = nb_hold;
1040 * Rings setup and release.
1042 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1043 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary.
1044 * This will also optimize cache line size effect.
1045 * H/W supports up to cache line size 128.
1047 #define IGB_ALIGN 128
1050 * Maximum number of Ring Descriptors.
1052 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1053 * desscriptors should meet the following condition:
1054 * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1056 #define IGB_MIN_RING_DESC 32
1057 #define IGB_MAX_RING_DESC 4096
1059 static const struct rte_memzone *
1060 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1061 uint16_t queue_id, uint32_t ring_size, int socket_id)
1063 char z_name[RTE_MEMZONE_NAMESIZE];
1064 const struct rte_memzone *mz;
1066 rte_snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1067 dev->driver->pci_drv.name, ring_name,
1068 dev->data->port_id, queue_id);
1069 mz = rte_memzone_lookup(z_name);
1073 return rte_memzone_reserve_aligned(z_name, (uint64_t)ring_size,
1074 socket_id, 0, IGB_ALIGN);
1078 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1082 if (txq->sw_ring != NULL) {
1083 for (i = 0; i < txq->nb_tx_desc; i++) {
1084 if (txq->sw_ring[i].mbuf != NULL) {
1085 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1086 txq->sw_ring[i].mbuf = NULL;
1093 igb_tx_queue_release(struct igb_tx_queue *txq)
1095 igb_tx_queue_release_mbufs(txq);
1096 rte_free(txq->sw_ring);
1101 igb_dev_tx_queue_alloc(struct rte_eth_dev *dev, uint16_t nb_queues)
1103 uint16_t i, old_nb_queues = dev->data->nb_tx_queues;
1104 struct igb_tx_queue **txq;
1106 if (dev->data->tx_queues == NULL) {
1107 dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues",
1108 sizeof(struct igb_tx_queue *) * nb_queues,
1110 if (dev->data->tx_queues == NULL) {
1111 dev->data->nb_tx_queues = 0;
1115 if (nb_queues < old_nb_queues)
1116 for (i = nb_queues; i < old_nb_queues; i++)
1117 igb_tx_queue_release(dev->data->tx_queues[i]);
1119 if (nb_queues != old_nb_queues) {
1120 txq = rte_realloc(dev->data->tx_queues,
1121 sizeof(struct igb_tx_queue *) * nb_queues,
1126 dev->data->tx_queues = txq;
1127 if (nb_queues > old_nb_queues)
1128 memset(&(txq[old_nb_queues]), 0,
1129 sizeof(struct igb_tx_queue *) *
1130 (nb_queues - old_nb_queues));
1133 dev->data->nb_tx_queues = nb_queues;
1139 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1144 memset((void*)&txq->ctx_cache, 0,
1145 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1149 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1151 struct igb_tx_entry *txe = txq->sw_ring;
1154 struct e1000_hw *hw;
1156 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1157 size = sizeof(union e1000_adv_tx_desc) * txq->nb_tx_desc;
1158 /* Zero out HW ring memory */
1159 for (i = 0; i < size; i++) {
1160 ((volatile char *)txq->tx_ring)[i] = 0;
1163 /* Initialize ring entries */
1164 prev = txq->nb_tx_desc - 1;
1165 for (i = 0; i < txq->nb_tx_desc; i++) {
1166 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1168 txd->wb.status = E1000_TXD_STAT_DD;
1171 txe[prev].next_id = i;
1175 txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1176 /* 82575 specific, each tx queue will use 2 hw contexts */
1177 if (hw->mac.type == e1000_82575)
1178 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1180 igb_reset_tx_queue_stat(txq);
1184 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1187 unsigned int socket_id,
1188 const struct rte_eth_txconf *tx_conf)
1190 const struct rte_memzone *tz;
1191 struct igb_tx_queue *txq;
1192 struct e1000_hw *hw;
1195 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1198 * Validate number of transmit descriptors.
1199 * It must not exceed hardware maximum, and must be multiple
1202 if (((nb_desc * sizeof(union e1000_adv_tx_desc)) % IGB_ALIGN) != 0 ||
1203 (nb_desc > IGB_MAX_RING_DESC) || (nb_desc < IGB_MIN_RING_DESC)) {
1208 * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1211 if (tx_conf->tx_free_thresh != 0)
1212 RTE_LOG(WARNING, PMD,
1213 "The tx_free_thresh parameter is not "
1214 "used for the 1G driver.\n");
1215 if (tx_conf->tx_rs_thresh != 0)
1216 RTE_LOG(WARNING, PMD,
1217 "The tx_rs_thresh parameter is not "
1218 "used for the 1G driver.\n");
1219 if (tx_conf->tx_thresh.wthresh == 0)
1220 RTE_LOG(WARNING, PMD,
1221 "To improve 1G driver performance, consider setting "
1222 "the TX WTHRESH value to 4, 8, or 16.\n");
1224 /* Free memory prior to re-allocation if needed */
1225 if (dev->data->tx_queues[queue_idx] != NULL)
1226 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1228 /* First allocate the tx queue data structure */
1229 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1235 * Allocate TX ring hardware descriptors. A memzone large enough to
1236 * handle the maximum ring size is allocated in order to allow for
1237 * resizing in later calls to the queue setup function.
1239 size = sizeof(union e1000_adv_tx_desc) * IGB_MAX_RING_DESC;
1240 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1243 igb_tx_queue_release(txq);
1247 txq->nb_tx_desc = nb_desc;
1248 txq->pthresh = tx_conf->tx_thresh.pthresh;
1249 txq->hthresh = tx_conf->tx_thresh.hthresh;
1250 txq->wthresh = tx_conf->tx_thresh.wthresh;
1251 txq->queue_id = queue_idx;
1252 txq->port_id = dev->data->port_id;
1254 txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(queue_idx));
1255 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1256 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1258 size = sizeof(union e1000_adv_tx_desc) * nb_desc;
1260 /* Allocate software ring */
1261 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1262 sizeof(struct igb_tx_entry) * nb_desc,
1264 if (txq->sw_ring == NULL) {
1265 igb_tx_queue_release(txq);
1268 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1269 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1271 igb_reset_tx_queue(txq, dev);
1272 dev->tx_pkt_burst = eth_igb_xmit_pkts;
1273 dev->data->tx_queues[queue_idx] = txq;
1279 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1283 if (rxq->sw_ring != NULL) {
1284 for (i = 0; i < rxq->nb_rx_desc; i++) {
1285 if (rxq->sw_ring[i].mbuf != NULL) {
1286 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1287 rxq->sw_ring[i].mbuf = NULL;
1294 igb_rx_queue_release(struct igb_rx_queue *rxq)
1296 igb_rx_queue_release_mbufs(rxq);
1297 rte_free(rxq->sw_ring);
1302 igb_dev_rx_queue_alloc(struct rte_eth_dev *dev, uint16_t nb_queues)
1304 uint16_t i, old_nb_queues = dev->data->nb_rx_queues;
1305 struct igb_rx_queue **rxq;
1307 if (dev->data->rx_queues == NULL) {
1308 dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues",
1309 sizeof(struct igb_rx_queue *) * nb_queues,
1311 if (dev->data->rx_queues == NULL) {
1312 dev->data->nb_rx_queues = 0;
1316 for (i = nb_queues; i < old_nb_queues; i++) {
1317 igb_rx_queue_release(dev->data->rx_queues[i]);
1318 dev->data->rx_queues[i] = NULL;
1320 if (nb_queues != old_nb_queues) {
1321 rxq = rte_realloc(dev->data->rx_queues,
1322 sizeof(struct igb_rx_queue *) * nb_queues,
1327 dev->data->rx_queues = rxq;
1328 if (nb_queues > old_nb_queues)
1329 memset(&(rxq[old_nb_queues]), 0,
1330 sizeof(struct igb_rx_queue *) *
1331 (nb_queues - old_nb_queues));
1334 dev->data->nb_rx_queues = nb_queues;
1340 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1345 /* Zero out HW ring memory */
1346 size = sizeof(union e1000_adv_rx_desc) * rxq->nb_rx_desc;
1347 for (i = 0; i < size; i++) {
1348 ((volatile char *)rxq->rx_ring)[i] = 0;
1352 rxq->pkt_first_seg = NULL;
1353 rxq->pkt_last_seg = NULL;
1357 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1360 unsigned int socket_id,
1361 const struct rte_eth_rxconf *rx_conf,
1362 struct rte_mempool *mp)
1364 const struct rte_memzone *rz;
1365 struct igb_rx_queue *rxq;
1366 struct e1000_hw *hw;
1369 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1372 * Validate number of receive descriptors.
1373 * It must not exceed hardware maximum, and must be multiple
1376 if (((nb_desc * sizeof(union e1000_adv_rx_desc)) % IGB_ALIGN) != 0 ||
1377 (nb_desc > IGB_MAX_RING_DESC) || (nb_desc < IGB_MIN_RING_DESC)) {
1381 /* Free memory prior to re-allocation if needed */
1382 if (dev->data->rx_queues[queue_idx] != NULL) {
1383 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1384 dev->data->rx_queues[queue_idx] = NULL;
1387 /* First allocate the RX queue data structure. */
1388 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1393 rxq->nb_rx_desc = nb_desc;
1394 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1395 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1396 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1397 rxq->drop_en = rx_conf->rx_drop_en;
1398 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1399 rxq->queue_id = queue_idx;
1400 rxq->port_id = dev->data->port_id;
1401 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1405 * Allocate RX ring hardware descriptors. A memzone large enough to
1406 * handle the maximum ring size is allocated in order to allow for
1407 * resizing in later calls to the queue setup function.
1409 size = sizeof(union e1000_adv_rx_desc) * IGB_MAX_RING_DESC;
1410 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx, size, socket_id);
1412 igb_rx_queue_release(rxq);
1415 rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(queue_idx));
1416 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
1417 rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1419 /* Allocate software ring. */
1420 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1421 sizeof(struct igb_rx_entry) * nb_desc,
1423 if (rxq->sw_ring == NULL) {
1424 igb_rx_queue_release(rxq);
1427 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1428 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1430 dev->data->rx_queues[queue_idx] = rxq;
1431 igb_reset_rx_queue(rxq);
1437 igb_dev_clear_queues(struct rte_eth_dev *dev)
1440 struct igb_tx_queue *txq;
1441 struct igb_rx_queue *rxq;
1443 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1444 txq = dev->data->tx_queues[i];
1445 igb_tx_queue_release_mbufs(txq);
1446 igb_reset_tx_queue(txq, dev);
1449 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1450 rxq = dev->data->rx_queues[i];
1451 igb_rx_queue_release_mbufs(rxq);
1452 igb_reset_rx_queue(rxq);
1457 * Receive Side Scaling (RSS).
1458 * See section 7.1.1.7 in the following document:
1459 * "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1462 * The source and destination IP addresses of the IP header and the source and
1463 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1464 * against a configurable random key to compute a 32-bit RSS hash result.
1465 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1466 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1467 * RSS output index which is used as the RX queue index where to store the
1469 * The following output is supplied in the RX write-back descriptor:
1470 * - 32-bit result of the Microsoft RSS hash function,
1471 * - 4-bit RSS type field.
1475 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1476 * Used as the default key.
1478 static uint8_t rss_intel_key[40] = {
1479 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1480 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1481 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1482 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1483 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1487 igb_rss_disable(struct rte_eth_dev *dev)
1489 struct e1000_hw *hw;
1492 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1493 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1494 mrqc &= ~E1000_MRQC_ENABLE_MASK;
1495 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1499 igb_rss_configure(struct rte_eth_dev *dev)
1501 struct e1000_hw *hw;
1509 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1511 rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1512 if (rss_hf == 0) /* Disable RSS. */ {
1513 igb_rss_disable(dev);
1516 hash_key = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1517 if (hash_key == NULL)
1518 hash_key = rss_intel_key; /* Default hash key. */
1520 /* Fill in RSS hash key. */
1521 for (i = 0; i < 10; i++) {
1522 rss_key = hash_key[(i * 4)];
1523 rss_key |= hash_key[(i * 4) + 1] << 8;
1524 rss_key |= hash_key[(i * 4) + 2] << 16;
1525 rss_key |= hash_key[(i * 4) + 3] << 24;
1526 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1529 /* Fill in redirection table. */
1530 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
1531 for (i = 0; i < 128; i++) {
1538 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
1539 i % dev->data->nb_rx_queues : 0);
1540 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
1542 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
1545 /* Set configured hashing functions in MRQC register. */
1546 mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1547 if (rss_hf & ETH_RSS_IPV4)
1548 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1549 if (rss_hf & ETH_RSS_IPV4_TCP)
1550 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1551 if (rss_hf & ETH_RSS_IPV6)
1552 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1553 if (rss_hf & ETH_RSS_IPV6_EX)
1554 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1555 if (rss_hf & ETH_RSS_IPV6_TCP)
1556 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1557 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1558 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1559 if (rss_hf & ETH_RSS_IPV4_UDP)
1560 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1561 if (rss_hf & ETH_RSS_IPV6_UDP)
1562 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1563 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1564 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1565 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1568 /*********************************************************************
1570 * Enable receive unit.
1572 **********************************************************************/
1575 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
1577 struct igb_rx_entry *rxe = rxq->sw_ring;
1581 /* Initialize software ring entries. */
1582 for (i = 0; i < rxq->nb_rx_desc; i++) {
1583 volatile union e1000_adv_rx_desc *rxd;
1584 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
1587 PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
1588 "queue_id=%hu\n", rxq->queue_id);
1589 igb_rx_queue_release(rxq);
1593 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
1594 rxd = &rxq->rx_ring[i];
1595 rxd->read.hdr_addr = dma_addr;
1596 rxd->read.pkt_addr = dma_addr;
1604 eth_igb_rx_init(struct rte_eth_dev *dev)
1606 struct e1000_hw *hw;
1607 struct igb_rx_queue *rxq;
1608 struct rte_pktmbuf_pool_private *mbp_priv;
1613 uint16_t rctl_bsize;
1617 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1621 * Make sure receives are disabled while setting
1622 * up the descriptor ring.
1624 rctl = E1000_READ_REG(hw, E1000_RCTL);
1625 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
1628 * Configure support of jumbo frames, if any.
1630 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
1631 rctl |= E1000_RCTL_LPE;
1633 /* Set maximum packet length. */
1634 E1000_WRITE_REG(hw, E1000_RLPML,
1635 dev->data->dev_conf.rxmode.max_rx_pkt_len);
1637 rctl &= ~E1000_RCTL_LPE;
1639 /* Configure and enable each RX queue. */
1641 dev->rx_pkt_burst = eth_igb_recv_pkts;
1642 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1646 rxq = dev->data->rx_queues[i];
1648 /* Allocate buffers for descriptor rings and set up queue */
1649 ret = igb_alloc_rx_queue_mbufs(rxq);
1651 igb_dev_clear_queues(dev);
1656 * Reset crc_len in case it was changed after queue setup by a
1660 (uint8_t)(dev->data->dev_conf.rxmode.hw_strip_crc ?
1663 bus_addr = rxq->rx_ring_phys_addr;
1664 E1000_WRITE_REG(hw, E1000_RDLEN(i),
1666 sizeof(union e1000_adv_rx_desc));
1667 E1000_WRITE_REG(hw, E1000_RDBAH(i),
1668 (uint32_t)(bus_addr >> 32));
1669 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
1671 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
1674 * Configure RX buffer size.
1676 mbp_priv = (struct rte_pktmbuf_pool_private *)
1677 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
1678 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
1679 RTE_PKTMBUF_HEADROOM);
1680 if (buf_size >= 1024) {
1682 * Configure the BSIZEPACKET field of the SRRCTL
1683 * register of the queue.
1684 * Value is in 1 KB resolution, from 1 KB to 127 KB.
1685 * If this field is equal to 0b, then RCTL.BSIZE
1686 * determines the RX packet buffer size.
1688 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
1689 E1000_SRRCTL_BSIZEPKT_MASK);
1690 buf_size = (uint16_t) ((srrctl &
1691 E1000_SRRCTL_BSIZEPKT_MASK) <<
1692 E1000_SRRCTL_BSIZEPKT_SHIFT);
1694 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
1695 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1696 dev->data->scattered_rx = 1;
1700 * Use BSIZE field of the device RCTL register.
1702 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
1703 rctl_bsize = buf_size;
1704 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1705 dev->data->scattered_rx = 1;
1708 /* Set if packets are dropped when no descriptors available */
1710 srrctl |= E1000_SRRCTL_DROP_EN;
1712 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
1714 /* Enable this RX queue. */
1715 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
1716 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
1717 rxdctl &= 0xFFF00000;
1718 rxdctl |= (rxq->pthresh & 0x1F);
1719 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
1720 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
1721 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
1725 * Setup BSIZE field of RCTL register, if needed.
1726 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
1727 * register, since the code above configures the SRRCTL register of
1728 * the RX queue in such a case.
1729 * All configurable sizes are:
1730 * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
1731 * 8192: rctl |= (E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX);
1732 * 4096: rctl |= (E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX);
1733 * 2048: rctl |= E1000_RCTL_SZ_2048;
1734 * 1024: rctl |= E1000_RCTL_SZ_1024;
1735 * 512: rctl |= E1000_RCTL_SZ_512;
1736 * 256: rctl |= E1000_RCTL_SZ_256;
1738 if (rctl_bsize > 0) {
1739 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
1740 rctl |= E1000_RCTL_SZ_512;
1741 else /* 256 <= buf_size < 512 - use 256 */
1742 rctl |= E1000_RCTL_SZ_256;
1746 * Configure RSS if device configured with multiple RX queues.
1748 if (dev->data->nb_rx_queues > 1)
1749 igb_rss_configure(dev);
1751 igb_rss_disable(dev);
1754 * Setup the Checksum Register.
1755 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
1757 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
1758 rxcsum |= E1000_RXCSUM_PCSD;
1760 /* Enable both L3/L4 rx checksum offload */
1761 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
1762 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
1764 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
1765 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
1767 /* Setup the Receive Control Register. */
1768 if (dev->data->dev_conf.rxmode.hw_strip_crc) {
1769 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
1771 /* set STRCRC bit in all queues for Powerville */
1772 if (hw->mac.type == e1000_i350) {
1773 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1774 uint32_t dvmolr = E1000_READ_REG(hw,
1776 dvmolr |= E1000_DVMOLR_STRCRC;
1777 E1000_WRITE_REG(hw, E1000_DVMOLR(i), dvmolr);
1781 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
1783 /* clear STRCRC bit in all queues for Powerville */
1784 if (hw->mac.type == e1000_i350) {
1785 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1786 uint32_t dvmolr = E1000_READ_REG(hw,
1788 dvmolr &= ~E1000_DVMOLR_STRCRC;
1789 E1000_WRITE_REG(hw, E1000_DVMOLR(i), dvmolr);
1794 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
1795 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
1796 E1000_RCTL_RDMTS_HALF |
1797 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
1799 /* Make sure VLAN Filters are off. */
1800 rctl &= ~E1000_RCTL_VFE;
1801 /* Don't store bad packets. */
1802 rctl &= ~E1000_RCTL_SBP;
1804 /* Enable Receives. */
1805 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
1808 * Setup the HW Rx Head and Tail Descriptor Pointers.
1809 * This needs to be done after enable.
1811 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1812 rxq = dev->data->rx_queues[i];
1813 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
1814 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
1820 /*********************************************************************
1822 * Enable transmit unit.
1824 **********************************************************************/
1826 eth_igb_tx_init(struct rte_eth_dev *dev)
1828 struct e1000_hw *hw;
1829 struct igb_tx_queue *txq;
1834 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1836 /* Setup the Base and Length of the Tx Descriptor Rings. */
1837 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1839 txq = dev->data->tx_queues[i];
1840 bus_addr = txq->tx_ring_phys_addr;
1842 E1000_WRITE_REG(hw, E1000_TDLEN(i),
1844 sizeof(union e1000_adv_tx_desc));
1845 E1000_WRITE_REG(hw, E1000_TDBAH(i),
1846 (uint32_t)(bus_addr >> 32));
1847 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
1849 /* Setup the HW Tx Head and Tail descriptor pointers. */
1850 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
1851 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
1853 /* Setup Transmit threshold registers. */
1854 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
1855 txdctl |= txq->pthresh & 0x1F;
1856 txdctl |= ((txq->hthresh & 0x1F) << 8);
1857 txdctl |= ((txq->wthresh & 0x1F) << 16);
1858 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
1859 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
1862 /* Program the Transmit Control Register. */
1863 tctl = E1000_READ_REG(hw, E1000_TCTL);
1864 tctl &= ~E1000_TCTL_CT;
1865 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
1866 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
1868 e1000_config_collision_dist(hw);
1870 /* This write will effectively turn on the transmit unit. */
1871 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
1874 /*********************************************************************
1876 * Enable VF receive unit.
1878 **********************************************************************/
1880 eth_igbvf_rx_init(struct rte_eth_dev *dev)
1882 struct e1000_hw *hw;
1883 struct igb_rx_queue *rxq;
1884 struct rte_pktmbuf_pool_private *mbp_priv;
1887 uint16_t rctl_bsize;
1891 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1893 /* Configure and enable each RX queue. */
1895 dev->rx_pkt_burst = eth_igb_recv_pkts;
1896 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1900 rxq = dev->data->rx_queues[i];
1902 /* Allocate buffers for descriptor rings and set up queue */
1903 ret = igb_alloc_rx_queue_mbufs(rxq);
1907 bus_addr = rxq->rx_ring_phys_addr;
1908 E1000_WRITE_REG(hw, E1000_RDLEN(i),
1910 sizeof(union e1000_adv_rx_desc));
1911 E1000_WRITE_REG(hw, E1000_RDBAH(i),
1912 (uint32_t)(bus_addr >> 32));
1913 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
1915 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
1918 * Configure RX buffer size.
1920 mbp_priv = (struct rte_pktmbuf_pool_private *)
1921 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
1922 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
1923 RTE_PKTMBUF_HEADROOM);
1924 if (buf_size >= 1024) {
1926 * Configure the BSIZEPACKET field of the SRRCTL
1927 * register of the queue.
1928 * Value is in 1 KB resolution, from 1 KB to 127 KB.
1929 * If this field is equal to 0b, then RCTL.BSIZE
1930 * determines the RX packet buffer size.
1932 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
1933 E1000_SRRCTL_BSIZEPKT_MASK);
1934 buf_size = (uint16_t) ((srrctl &
1935 E1000_SRRCTL_BSIZEPKT_MASK) <<
1936 E1000_SRRCTL_BSIZEPKT_SHIFT);
1938 if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size){
1939 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1940 dev->data->scattered_rx = 1;
1944 * Use BSIZE field of the device RCTL register.
1946 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
1947 rctl_bsize = buf_size;
1948 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1949 dev->data->scattered_rx = 1;
1952 /* Set if packets are dropped when no descriptors available */
1954 srrctl |= E1000_SRRCTL_DROP_EN;
1956 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
1958 /* Enable this RX queue. */
1959 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
1960 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
1961 rxdctl &= 0xFFF00000;
1962 rxdctl |= (rxq->pthresh & 0x1F);
1963 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
1964 if (hw->mac.type == e1000_82576) {
1966 * Workaround of 82576 VF Erratum
1967 * force set WTHRESH to 1
1968 * to avoid Write-Back not triggered sometimes
1971 PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !\n");
1974 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
1975 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
1979 * Setup the HW Rx Head and Tail Descriptor Pointers.
1980 * This needs to be done after enable.
1982 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1983 rxq = dev->data->rx_queues[i];
1984 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
1985 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
1991 /*********************************************************************
1993 * Enable VF transmit unit.
1995 **********************************************************************/
1997 eth_igbvf_tx_init(struct rte_eth_dev *dev)
1999 struct e1000_hw *hw;
2000 struct igb_tx_queue *txq;
2004 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2006 /* Setup the Base and Length of the Tx Descriptor Rings. */
2007 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2010 txq = dev->data->tx_queues[i];
2011 bus_addr = txq->tx_ring_phys_addr;
2012 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2014 sizeof(union e1000_adv_tx_desc));
2015 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2016 (uint32_t)(bus_addr >> 32));
2017 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2019 /* Setup the HW Tx Head and Tail descriptor pointers. */
2020 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2021 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2023 /* Setup Transmit threshold registers. */
2024 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2025 txdctl |= txq->pthresh & 0x1F;
2026 txdctl |= ((txq->hthresh & 0x1F) << 8);
2027 if (hw->mac.type == e1000_82576) {
2029 * Workaround of 82576 VF Erratum
2030 * force set WTHRESH to 1
2031 * to avoid Write-Back not triggered sometimes
2034 PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !\n");
2037 txdctl |= ((txq->wthresh & 0x1F) << 16);
2038 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2039 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);