4 * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_interrupts.h>
47 #include <rte_byteorder.h>
48 #include <rte_common.h>
50 #include <rte_debug.h>
52 #include <rte_memory.h>
53 #include <rte_memcpy.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_tailq.h>
58 #include <rte_per_lcore.h>
59 #include <rte_lcore.h>
60 #include <rte_atomic.h>
61 #include <rte_branch_prediction.h>
63 #include <rte_mempool.h>
64 #include <rte_malloc.h>
66 #include <rte_ether.h>
67 #include <rte_ethdev.h>
68 #include <rte_prefetch.h>
72 #include <rte_string_fns.h>
74 #include "e1000_logs.h"
75 #include "e1000/e1000_api.h"
76 #include "e1000_ethdev.h"
78 static inline struct rte_mbuf *
79 rte_rxmbuf_alloc(struct rte_mempool *mp)
83 m = __rte_mbuf_raw_alloc(mp);
84 __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
88 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
89 (uint64_t) ((mb)->buf_physaddr + \
90 (uint64_t) ((char *)((mb)->pkt.data) - \
91 (char *)(mb)->buf_addr))
93 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
94 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
97 * Structure associated with each descriptor of the RX ring of a RX queue.
100 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
104 * Structure associated with each descriptor of the TX ring of a TX queue.
106 struct igb_tx_entry {
107 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
108 uint16_t next_id; /**< Index of next descriptor in ring. */
109 uint16_t last_id; /**< Index of last scattered descriptor. */
113 * Structure associated with each RX queue.
115 struct igb_rx_queue {
116 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
117 volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
118 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
119 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
120 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
121 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
122 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
123 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
124 uint16_t nb_rx_desc; /**< number of RX descriptors. */
125 uint16_t rx_tail; /**< current value of RDT register. */
126 uint16_t nb_rx_hold; /**< number of held free RX desc. */
127 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
128 uint16_t queue_id; /**< RX queue index. */
129 uint16_t reg_idx; /**< RX queue register index. */
130 uint8_t port_id; /**< Device port identifier. */
131 uint8_t pthresh; /**< Prefetch threshold register. */
132 uint8_t hthresh; /**< Host threshold register. */
133 uint8_t wthresh; /**< Write-back threshold register. */
134 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
135 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
139 * Hardware context number
141 enum igb_advctx_num {
142 IGB_CTX_0 = 0, /**< CTX0 */
143 IGB_CTX_1 = 1, /**< CTX1 */
144 IGB_CTX_NUM = 2, /**< CTX_NUM */
148 * Strucutre to check if new context need be built
150 struct igb_advctx_info {
151 uint16_t flags; /**< ol_flags related to context build. */
152 uint32_t cmp_mask; /**< compare mask for vlan_macip_lens */
153 union rte_vlan_macip vlan_macip_lens; /**< vlan, mac & ip length. */
157 * Structure associated with each TX queue.
159 struct igb_tx_queue {
160 volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
161 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
162 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
163 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
164 uint32_t txd_type; /**< Device-specific TXD type */
165 uint16_t nb_tx_desc; /**< number of TX descriptors. */
166 uint16_t tx_tail; /**< Current value of TDT register. */
168 /**< Index of first used TX descriptor. */
169 uint16_t queue_id; /**< TX queue index. */
170 uint16_t reg_idx; /**< TX queue register index. */
171 uint8_t port_id; /**< Device port identifier. */
172 uint8_t pthresh; /**< Prefetch threshold register. */
173 uint8_t hthresh; /**< Host threshold register. */
174 uint8_t wthresh; /**< Write-back threshold register. */
176 /**< Current used hardware descriptor. */
178 /**< Start context position for transmit queue. */
179 struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
180 /**< Hardware context history.*/
184 #define RTE_PMD_USE_PREFETCH
187 #ifdef RTE_PMD_USE_PREFETCH
188 #define rte_igb_prefetch(p) rte_prefetch0(p)
190 #define rte_igb_prefetch(p) do {} while(0)
193 #ifdef RTE_PMD_PACKET_PREFETCH
194 #define rte_packet_prefetch(p) rte_prefetch1(p)
196 #define rte_packet_prefetch(p) do {} while(0)
199 /*********************************************************************
203 **********************************************************************/
206 * Advanced context descriptor are almost same between igb/ixgbe
207 * This is a separate function, looking for optimization opportunity here
208 * Rework required to go with the pre-defined values.
212 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
213 volatile struct e1000_adv_tx_context_desc *ctx_txd,
214 uint16_t ol_flags, uint32_t vlan_macip_lens)
216 uint32_t type_tucmd_mlhl;
217 uint32_t mss_l4len_idx;
218 uint32_t ctx_idx, ctx_curr;
221 ctx_curr = txq->ctx_curr;
222 ctx_idx = ctx_curr + txq->ctx_start;
227 if (ol_flags & PKT_TX_VLAN_PKT) {
228 cmp_mask |= TX_VLAN_CMP_MASK;
231 if (ol_flags & PKT_TX_IP_CKSUM) {
232 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
233 cmp_mask |= TX_MAC_LEN_CMP_MASK;
236 /* Specify which HW CTX to upload. */
237 mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
238 switch (ol_flags & PKT_TX_L4_MASK) {
239 case PKT_TX_UDP_CKSUM:
240 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
241 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
242 mss_l4len_idx |= sizeof(struct udp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
243 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
245 case PKT_TX_TCP_CKSUM:
246 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
247 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
248 mss_l4len_idx |= sizeof(struct tcp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
249 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
251 case PKT_TX_SCTP_CKSUM:
252 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
253 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
254 mss_l4len_idx |= sizeof(struct sctp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
255 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
258 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
259 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
263 txq->ctx_cache[ctx_curr].flags = ol_flags;
264 txq->ctx_cache[ctx_curr].cmp_mask = cmp_mask;
265 txq->ctx_cache[ctx_curr].vlan_macip_lens.data =
266 vlan_macip_lens & cmp_mask;
268 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
269 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
270 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
271 ctx_txd->seqnum_seed = 0;
275 * Check which hardware context can be used. Use the existing match
276 * or create a new context descriptor.
278 static inline uint32_t
279 what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
280 uint32_t vlan_macip_lens)
282 /* If match with the current context */
283 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
284 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
285 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
286 return txq->ctx_curr;
289 /* If match with the second context */
291 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
292 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
293 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
294 return txq->ctx_curr;
297 /* Mismatch, use the previous context */
298 return (IGB_CTX_NUM);
301 static inline uint32_t
302 tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
304 static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
305 static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
308 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
309 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
313 static inline uint32_t
314 tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
316 static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
317 return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
321 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
324 struct igb_tx_queue *txq;
325 struct igb_tx_entry *sw_ring;
326 struct igb_tx_entry *txe, *txn;
327 volatile union e1000_adv_tx_desc *txr;
328 volatile union e1000_adv_tx_desc *txd;
329 struct rte_mbuf *tx_pkt;
330 struct rte_mbuf *m_seg;
331 uint64_t buf_dma_addr;
332 uint32_t olinfo_status;
333 uint32_t cmd_type_len;
342 uint32_t new_ctx = 0;
344 uint32_t vlan_macip_lens;
347 sw_ring = txq->sw_ring;
349 tx_id = txq->tx_tail;
350 txe = &sw_ring[tx_id];
352 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
354 pkt_len = tx_pkt->pkt.pkt_len;
356 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
359 * The number of descriptors that must be allocated for a
360 * packet is the number of segments of that packet, plus 1
361 * Context Descriptor for the VLAN Tag Identifier, if any.
362 * Determine the last TX descriptor to allocate in the TX ring
363 * for the packet, starting from the current position (tx_id)
366 tx_last = (uint16_t) (tx_id + tx_pkt->pkt.nb_segs - 1);
368 ol_flags = tx_pkt->ol_flags;
369 vlan_macip_lens = tx_pkt->pkt.vlan_macip.data;
370 tx_ol_req = (uint16_t)(ol_flags & PKT_TX_OFFLOAD_MASK);
372 /* If a Context Descriptor need be built . */
374 ctx = what_advctx_update(txq, tx_ol_req,
376 /* Only allocate context descriptor if required*/
377 new_ctx = (ctx == IGB_CTX_NUM);
379 tx_last = (uint16_t) (tx_last + new_ctx);
381 if (tx_last >= txq->nb_tx_desc)
382 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
384 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
385 " tx_first=%u tx_last=%u\n",
386 (unsigned) txq->port_id,
387 (unsigned) txq->queue_id,
393 * Check if there are enough free descriptors in the TX ring
394 * to transmit the next packet.
395 * This operation is based on the two following rules:
397 * 1- Only check that the last needed TX descriptor can be
398 * allocated (by construction, if that descriptor is free,
399 * all intermediate ones are also free).
401 * For this purpose, the index of the last TX descriptor
402 * used for a packet (the "last descriptor" of a packet)
403 * is recorded in the TX entries (the last one included)
404 * that are associated with all TX descriptors allocated
407 * 2- Avoid to allocate the last free TX descriptor of the
408 * ring, in order to never set the TDT register with the
409 * same value stored in parallel by the NIC in the TDH
410 * register, which makes the TX engine of the NIC enter
411 * in a deadlock situation.
413 * By extension, avoid to allocate a free descriptor that
414 * belongs to the last set of free descriptors allocated
415 * to the same packet previously transmitted.
419 * The "last descriptor" of the previously sent packet, if any,
420 * which used the last descriptor to allocate.
422 tx_end = sw_ring[tx_last].last_id;
425 * The next descriptor following that "last descriptor" in the
428 tx_end = sw_ring[tx_end].next_id;
431 * The "last descriptor" associated with that next descriptor.
433 tx_end = sw_ring[tx_end].last_id;
436 * Check that this descriptor is free.
438 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
445 * Set common flags of all TX Data Descriptors.
447 * The following bits must be set in all Data Descriptors:
448 * - E1000_ADVTXD_DTYP_DATA
449 * - E1000_ADVTXD_DCMD_DEXT
451 * The following bits must be set in the first Data Descriptor
452 * and are ignored in the other ones:
453 * - E1000_ADVTXD_DCMD_IFCS
454 * - E1000_ADVTXD_MAC_1588
455 * - E1000_ADVTXD_DCMD_VLE
457 * The following bits must only be set in the last Data
459 * - E1000_TXD_CMD_EOP
461 * The following bits can be set in any Data Descriptor, but
462 * are only set in the last Data Descriptor:
465 cmd_type_len = txq->txd_type |
466 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
467 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
468 #if defined(RTE_LIBRTE_IEEE1588)
469 if (ol_flags & PKT_TX_IEEE1588_TMST)
470 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
473 /* Setup TX Advanced context descriptor if required */
475 volatile struct e1000_adv_tx_context_desc *
478 ctx_txd = (volatile struct
479 e1000_adv_tx_context_desc *)
482 txn = &sw_ring[txe->next_id];
483 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
485 if (txe->mbuf != NULL) {
486 rte_pktmbuf_free_seg(txe->mbuf);
490 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
493 txe->last_id = tx_last;
494 tx_id = txe->next_id;
498 /* Setup the TX Advanced Data Descriptor */
499 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
500 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
501 olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
506 txn = &sw_ring[txe->next_id];
509 if (txe->mbuf != NULL)
510 rte_pktmbuf_free_seg(txe->mbuf);
514 * Set up transmit descriptor.
516 slen = (uint16_t) m_seg->pkt.data_len;
517 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
518 txd->read.buffer_addr =
519 rte_cpu_to_le_64(buf_dma_addr);
520 txd->read.cmd_type_len =
521 rte_cpu_to_le_32(cmd_type_len | slen);
522 txd->read.olinfo_status =
523 rte_cpu_to_le_32(olinfo_status);
524 txe->last_id = tx_last;
525 tx_id = txe->next_id;
527 m_seg = m_seg->pkt.next;
528 } while (m_seg != NULL);
531 * The last packet data descriptor needs End Of Packet (EOP)
532 * and Report Status (RS).
534 txd->read.cmd_type_len |=
535 rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
541 * Set the Transmit Descriptor Tail (TDT).
543 E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
544 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
545 (unsigned) txq->port_id, (unsigned) txq->queue_id,
546 (unsigned) tx_id, (unsigned) nb_tx);
547 txq->tx_tail = tx_id;
552 /*********************************************************************
556 **********************************************************************/
557 static inline uint16_t
558 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
562 static uint16_t ip_pkt_types_map[16] = {
563 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
564 PKT_RX_IPV6_HDR, 0, 0, 0,
565 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
566 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
569 #if defined(RTE_LIBRTE_IEEE1588)
570 static uint32_t ip_pkt_etqf_map[8] = {
571 0, 0, 0, PKT_RX_IEEE1588_PTP,
575 pkt_flags = (uint16_t)((hl_tp_rs & E1000_RXDADV_PKTTYPE_ETQF) ?
576 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
577 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
579 pkt_flags = (uint16_t)((hl_tp_rs & E1000_RXDADV_PKTTYPE_ETQF) ? 0 :
580 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
582 return (uint16_t)(pkt_flags | (((hl_tp_rs & 0x0F) == 0) ?
583 0 : PKT_RX_RSS_HASH));
586 static inline uint16_t
587 rx_desc_status_to_pkt_flags(uint32_t rx_status)
591 /* Check if VLAN present */
592 pkt_flags = (uint16_t)((rx_status & E1000_RXD_STAT_VP) ?
593 PKT_RX_VLAN_PKT : 0);
595 #if defined(RTE_LIBRTE_IEEE1588)
596 if (rx_status & E1000_RXD_STAT_TMST)
597 pkt_flags = (uint16_t)(pkt_flags | PKT_RX_IEEE1588_TMST);
602 static inline uint16_t
603 rx_desc_error_to_pkt_flags(uint32_t rx_status)
606 * Bit 30: IPE, IPv4 checksum error
607 * Bit 29: L4I, L4I integrity error
610 static uint16_t error_to_pkt_flags_map[4] = {
611 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
612 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
614 return error_to_pkt_flags_map[(rx_status >>
615 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
619 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
622 struct igb_rx_queue *rxq;
623 volatile union e1000_adv_rx_desc *rx_ring;
624 volatile union e1000_adv_rx_desc *rxdp;
625 struct igb_rx_entry *sw_ring;
626 struct igb_rx_entry *rxe;
627 struct rte_mbuf *rxm;
628 struct rte_mbuf *nmb;
629 union e1000_adv_rx_desc rxd;
632 uint32_t hlen_type_rss;
642 rx_id = rxq->rx_tail;
643 rx_ring = rxq->rx_ring;
644 sw_ring = rxq->sw_ring;
645 while (nb_rx < nb_pkts) {
647 * The order of operations here is important as the DD status
648 * bit must not be read after any other descriptor fields.
649 * rx_ring and rxdp are pointing to volatile data so the order
650 * of accesses cannot be reordered by the compiler. If they were
651 * not volatile, they could be reordered which could lead to
652 * using invalid descriptor fields when read from rxd.
654 rxdp = &rx_ring[rx_id];
655 staterr = rxdp->wb.upper.status_error;
656 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
663 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
664 * likely to be invalid and to be dropped by the various
665 * validation checks performed by the network stack.
667 * Allocate a new mbuf to replenish the RX ring descriptor.
668 * If the allocation fails:
669 * - arrange for that RX descriptor to be the first one
670 * being parsed the next time the receive function is
671 * invoked [on the same queue].
673 * - Stop parsing the RX ring and return immediately.
675 * This policy do not drop the packet received in the RX
676 * descriptor for which the allocation of a new mbuf failed.
677 * Thus, it allows that packet to be later retrieved if
678 * mbuf have been freed in the mean time.
679 * As a side effect, holding RX descriptors instead of
680 * systematically giving them back to the NIC may lead to
681 * RX ring exhaustion situations.
682 * However, the NIC can gracefully prevent such situations
683 * to happen by sending specific "back-pressure" flow control
684 * frames to its peer(s).
686 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
687 "staterr=0x%x pkt_len=%u\n",
688 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
689 (unsigned) rx_id, (unsigned) staterr,
690 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
692 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
694 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
695 "queue_id=%u\n", (unsigned) rxq->port_id,
696 (unsigned) rxq->queue_id);
697 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
702 rxe = &sw_ring[rx_id];
704 if (rx_id == rxq->nb_rx_desc)
707 /* Prefetch next mbuf while processing current one. */
708 rte_igb_prefetch(sw_ring[rx_id].mbuf);
711 * When next RX descriptor is on a cache-line boundary,
712 * prefetch the next 4 RX descriptors and the next 8 pointers
715 if ((rx_id & 0x3) == 0) {
716 rte_igb_prefetch(&rx_ring[rx_id]);
717 rte_igb_prefetch(&sw_ring[rx_id]);
723 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
724 rxdp->read.hdr_addr = dma_addr;
725 rxdp->read.pkt_addr = dma_addr;
728 * Initialize the returned mbuf.
729 * 1) setup generic mbuf fields:
730 * - number of segments,
733 * - RX port identifier.
734 * 2) integrate hardware offload data, if any:
736 * - IP checksum flag,
737 * - VLAN TCI, if any,
740 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
742 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
743 rte_packet_prefetch(rxm->pkt.data);
744 rxm->pkt.nb_segs = 1;
745 rxm->pkt.next = NULL;
746 rxm->pkt.pkt_len = pkt_len;
747 rxm->pkt.data_len = pkt_len;
748 rxm->pkt.in_port = rxq->port_id;
750 rxm->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
751 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
752 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
753 rxm->pkt.vlan_macip.f.vlan_tci =
754 rte_le_to_cpu_16(rxd.wb.upper.vlan);
756 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
757 pkt_flags = (uint16_t)(pkt_flags |
758 rx_desc_status_to_pkt_flags(staterr));
759 pkt_flags = (uint16_t)(pkt_flags |
760 rx_desc_error_to_pkt_flags(staterr));
761 rxm->ol_flags = pkt_flags;
764 * Store the mbuf address into the next entry of the array
765 * of returned packets.
767 rx_pkts[nb_rx++] = rxm;
769 rxq->rx_tail = rx_id;
772 * If the number of free RX descriptors is greater than the RX free
773 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
775 * Update the RDT with the value of the last processed RX descriptor
776 * minus 1, to guarantee that the RDT register is never equal to the
777 * RDH register, which creates a "full" ring situtation from the
778 * hardware point of view...
780 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
781 if (nb_hold > rxq->rx_free_thresh) {
782 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
783 "nb_hold=%u nb_rx=%u\n",
784 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
785 (unsigned) rx_id, (unsigned) nb_hold,
787 rx_id = (uint16_t) ((rx_id == 0) ?
788 (rxq->nb_rx_desc - 1) : (rx_id - 1));
789 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
792 rxq->nb_rx_hold = nb_hold;
797 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
800 struct igb_rx_queue *rxq;
801 volatile union e1000_adv_rx_desc *rx_ring;
802 volatile union e1000_adv_rx_desc *rxdp;
803 struct igb_rx_entry *sw_ring;
804 struct igb_rx_entry *rxe;
805 struct rte_mbuf *first_seg;
806 struct rte_mbuf *last_seg;
807 struct rte_mbuf *rxm;
808 struct rte_mbuf *nmb;
809 union e1000_adv_rx_desc rxd;
810 uint64_t dma; /* Physical address of mbuf data buffer */
812 uint32_t hlen_type_rss;
822 rx_id = rxq->rx_tail;
823 rx_ring = rxq->rx_ring;
824 sw_ring = rxq->sw_ring;
827 * Retrieve RX context of current packet, if any.
829 first_seg = rxq->pkt_first_seg;
830 last_seg = rxq->pkt_last_seg;
832 while (nb_rx < nb_pkts) {
835 * The order of operations here is important as the DD status
836 * bit must not be read after any other descriptor fields.
837 * rx_ring and rxdp are pointing to volatile data so the order
838 * of accesses cannot be reordered by the compiler. If they were
839 * not volatile, they could be reordered which could lead to
840 * using invalid descriptor fields when read from rxd.
842 rxdp = &rx_ring[rx_id];
843 staterr = rxdp->wb.upper.status_error;
844 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
851 * Allocate a new mbuf to replenish the RX ring descriptor.
852 * If the allocation fails:
853 * - arrange for that RX descriptor to be the first one
854 * being parsed the next time the receive function is
855 * invoked [on the same queue].
857 * - Stop parsing the RX ring and return immediately.
859 * This policy does not drop the packet received in the RX
860 * descriptor for which the allocation of a new mbuf failed.
861 * Thus, it allows that packet to be later retrieved if
862 * mbuf have been freed in the mean time.
863 * As a side effect, holding RX descriptors instead of
864 * systematically giving them back to the NIC may lead to
865 * RX ring exhaustion situations.
866 * However, the NIC can gracefully prevent such situations
867 * to happen by sending specific "back-pressure" flow control
868 * frames to its peer(s).
870 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
871 "staterr=0x%x data_len=%u\n",
872 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
873 (unsigned) rx_id, (unsigned) staterr,
874 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
876 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
878 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
879 "queue_id=%u\n", (unsigned) rxq->port_id,
880 (unsigned) rxq->queue_id);
881 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
886 rxe = &sw_ring[rx_id];
888 if (rx_id == rxq->nb_rx_desc)
891 /* Prefetch next mbuf while processing current one. */
892 rte_igb_prefetch(sw_ring[rx_id].mbuf);
895 * When next RX descriptor is on a cache-line boundary,
896 * prefetch the next 4 RX descriptors and the next 8 pointers
899 if ((rx_id & 0x3) == 0) {
900 rte_igb_prefetch(&rx_ring[rx_id]);
901 rte_igb_prefetch(&sw_ring[rx_id]);
905 * Update RX descriptor with the physical address of the new
906 * data buffer of the new allocated mbuf.
910 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
911 rxdp->read.pkt_addr = dma;
912 rxdp->read.hdr_addr = dma;
915 * Set data length & data buffer address of mbuf.
917 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
918 rxm->pkt.data_len = data_len;
919 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
922 * If this is the first buffer of the received packet,
923 * set the pointer to the first mbuf of the packet and
924 * initialize its context.
925 * Otherwise, update the total length and the number of segments
926 * of the current scattered packet, and update the pointer to
927 * the last mbuf of the current packet.
929 if (first_seg == NULL) {
931 first_seg->pkt.pkt_len = data_len;
932 first_seg->pkt.nb_segs = 1;
934 first_seg->pkt.pkt_len += data_len;
935 first_seg->pkt.nb_segs++;
936 last_seg->pkt.next = rxm;
940 * If this is not the last buffer of the received packet,
941 * update the pointer to the last mbuf of the current scattered
942 * packet and continue to parse the RX ring.
944 if (! (staterr & E1000_RXD_STAT_EOP)) {
950 * This is the last buffer of the received packet.
951 * If the CRC is not stripped by the hardware:
952 * - Subtract the CRC length from the total packet length.
953 * - If the last buffer only contains the whole CRC or a part
954 * of it, free the mbuf associated to the last buffer.
955 * If part of the CRC is also contained in the previous
956 * mbuf, subtract the length of that CRC part from the
957 * data length of the previous mbuf.
959 rxm->pkt.next = NULL;
960 if (unlikely(rxq->crc_len > 0)) {
961 first_seg->pkt.pkt_len -= ETHER_CRC_LEN;
962 if (data_len <= ETHER_CRC_LEN) {
963 rte_pktmbuf_free_seg(rxm);
964 first_seg->pkt.nb_segs--;
965 last_seg->pkt.data_len = (uint16_t)
966 (last_seg->pkt.data_len -
967 (ETHER_CRC_LEN - data_len));
968 last_seg->pkt.next = NULL;
971 (uint16_t) (data_len - ETHER_CRC_LEN);
975 * Initialize the first mbuf of the returned packet:
976 * - RX port identifier,
977 * - hardware offload data, if any:
979 * - IP checksum flag,
980 * - VLAN TCI, if any,
983 first_seg->pkt.in_port = rxq->port_id;
984 first_seg->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
987 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
988 * set in the pkt_flags field.
990 first_seg->pkt.vlan_macip.f.vlan_tci =
991 rte_le_to_cpu_16(rxd.wb.upper.vlan);
992 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
993 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
994 pkt_flags = (uint16_t)(pkt_flags |
995 rx_desc_status_to_pkt_flags(staterr));
996 pkt_flags = (uint16_t)(pkt_flags |
997 rx_desc_error_to_pkt_flags(staterr));
998 first_seg->ol_flags = pkt_flags;
1000 /* Prefetch data of first segment, if configured to do so. */
1001 rte_packet_prefetch(first_seg->pkt.data);
1004 * Store the mbuf address into the next entry of the array
1005 * of returned packets.
1007 rx_pkts[nb_rx++] = first_seg;
1010 * Setup receipt context for a new packet.
1016 * Record index of the next RX descriptor to probe.
1018 rxq->rx_tail = rx_id;
1021 * Save receive context.
1023 rxq->pkt_first_seg = first_seg;
1024 rxq->pkt_last_seg = last_seg;
1027 * If the number of free RX descriptors is greater than the RX free
1028 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1030 * Update the RDT with the value of the last processed RX descriptor
1031 * minus 1, to guarantee that the RDT register is never equal to the
1032 * RDH register, which creates a "full" ring situtation from the
1033 * hardware point of view...
1035 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1036 if (nb_hold > rxq->rx_free_thresh) {
1037 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1038 "nb_hold=%u nb_rx=%u\n",
1039 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1040 (unsigned) rx_id, (unsigned) nb_hold,
1042 rx_id = (uint16_t) ((rx_id == 0) ?
1043 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1044 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1047 rxq->nb_rx_hold = nb_hold;
1052 * Rings setup and release.
1054 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1055 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary.
1056 * This will also optimize cache line size effect.
1057 * H/W supports up to cache line size 128.
1059 #define IGB_ALIGN 128
1062 * Maximum number of Ring Descriptors.
1064 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1065 * desscriptors should meet the following condition:
1066 * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1068 #define IGB_MIN_RING_DESC 32
1069 #define IGB_MAX_RING_DESC 4096
1071 static const struct rte_memzone *
1072 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1073 uint16_t queue_id, uint32_t ring_size, int socket_id)
1075 char z_name[RTE_MEMZONE_NAMESIZE];
1076 const struct rte_memzone *mz;
1078 rte_snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1079 dev->driver->pci_drv.name, ring_name,
1080 dev->data->port_id, queue_id);
1081 mz = rte_memzone_lookup(z_name);
1085 return rte_memzone_reserve_aligned(z_name, ring_size,
1086 socket_id, 0, IGB_ALIGN);
1090 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1094 if (txq->sw_ring != NULL) {
1095 for (i = 0; i < txq->nb_tx_desc; i++) {
1096 if (txq->sw_ring[i].mbuf != NULL) {
1097 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1098 txq->sw_ring[i].mbuf = NULL;
1105 igb_tx_queue_release(struct igb_tx_queue *txq)
1108 igb_tx_queue_release_mbufs(txq);
1109 rte_free(txq->sw_ring);
1115 eth_igb_tx_queue_release(void *txq)
1117 igb_tx_queue_release(txq);
1121 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1126 memset((void*)&txq->ctx_cache, 0,
1127 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1131 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1133 struct igb_tx_entry *txe = txq->sw_ring;
1136 struct e1000_hw *hw;
1138 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1139 size = sizeof(union e1000_adv_tx_desc) * txq->nb_tx_desc;
1140 /* Zero out HW ring memory */
1141 for (i = 0; i < size; i++) {
1142 ((volatile char *)txq->tx_ring)[i] = 0;
1145 /* Initialize ring entries */
1146 prev = (uint16_t)(txq->nb_tx_desc - 1);
1147 for (i = 0; i < txq->nb_tx_desc; i++) {
1148 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1150 txd->wb.status = E1000_TXD_STAT_DD;
1153 txe[prev].next_id = i;
1157 txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1158 /* 82575 specific, each tx queue will use 2 hw contexts */
1159 if (hw->mac.type == e1000_82575)
1160 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1162 igb_reset_tx_queue_stat(txq);
1166 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1169 unsigned int socket_id,
1170 const struct rte_eth_txconf *tx_conf)
1172 const struct rte_memzone *tz;
1173 struct igb_tx_queue *txq;
1174 struct e1000_hw *hw;
1177 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1180 * Validate number of transmit descriptors.
1181 * It must not exceed hardware maximum, and must be multiple
1184 if (((nb_desc * sizeof(union e1000_adv_tx_desc)) % IGB_ALIGN) != 0 ||
1185 (nb_desc > IGB_MAX_RING_DESC) || (nb_desc < IGB_MIN_RING_DESC)) {
1190 * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1193 if (tx_conf->tx_free_thresh != 0)
1194 RTE_LOG(WARNING, PMD,
1195 "The tx_free_thresh parameter is not "
1196 "used for the 1G driver.\n");
1197 if (tx_conf->tx_rs_thresh != 0)
1198 RTE_LOG(WARNING, PMD,
1199 "The tx_rs_thresh parameter is not "
1200 "used for the 1G driver.\n");
1201 if (tx_conf->tx_thresh.wthresh == 0)
1202 RTE_LOG(WARNING, PMD,
1203 "To improve 1G driver performance, consider setting "
1204 "the TX WTHRESH value to 4, 8, or 16.\n");
1206 /* Free memory prior to re-allocation if needed */
1207 if (dev->data->tx_queues[queue_idx] != NULL)
1208 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1210 /* First allocate the tx queue data structure */
1211 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1217 * Allocate TX ring hardware descriptors. A memzone large enough to
1218 * handle the maximum ring size is allocated in order to allow for
1219 * resizing in later calls to the queue setup function.
1221 size = sizeof(union e1000_adv_tx_desc) * IGB_MAX_RING_DESC;
1222 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1225 igb_tx_queue_release(txq);
1229 txq->nb_tx_desc = nb_desc;
1230 txq->pthresh = tx_conf->tx_thresh.pthresh;
1231 txq->hthresh = tx_conf->tx_thresh.hthresh;
1232 txq->wthresh = tx_conf->tx_thresh.wthresh;
1233 txq->queue_id = queue_idx;
1234 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1235 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1236 txq->port_id = dev->data->port_id;
1238 txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1239 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1240 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1242 /* Allocate software ring */
1243 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1244 sizeof(struct igb_tx_entry) * nb_desc,
1246 if (txq->sw_ring == NULL) {
1247 igb_tx_queue_release(txq);
1250 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1251 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1253 igb_reset_tx_queue(txq, dev);
1254 dev->tx_pkt_burst = eth_igb_xmit_pkts;
1255 dev->data->tx_queues[queue_idx] = txq;
1261 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1265 if (rxq->sw_ring != NULL) {
1266 for (i = 0; i < rxq->nb_rx_desc; i++) {
1267 if (rxq->sw_ring[i].mbuf != NULL) {
1268 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1269 rxq->sw_ring[i].mbuf = NULL;
1276 igb_rx_queue_release(struct igb_rx_queue *rxq)
1279 igb_rx_queue_release_mbufs(rxq);
1280 rte_free(rxq->sw_ring);
1286 eth_igb_rx_queue_release(void *rxq)
1288 igb_rx_queue_release(rxq);
1292 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1297 /* Zero out HW ring memory */
1298 size = sizeof(union e1000_adv_rx_desc) * rxq->nb_rx_desc;
1299 for (i = 0; i < size; i++) {
1300 ((volatile char *)rxq->rx_ring)[i] = 0;
1304 rxq->pkt_first_seg = NULL;
1305 rxq->pkt_last_seg = NULL;
1309 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1312 unsigned int socket_id,
1313 const struct rte_eth_rxconf *rx_conf,
1314 struct rte_mempool *mp)
1316 const struct rte_memzone *rz;
1317 struct igb_rx_queue *rxq;
1318 struct e1000_hw *hw;
1321 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1324 * Validate number of receive descriptors.
1325 * It must not exceed hardware maximum, and must be multiple
1328 if (((nb_desc * sizeof(union e1000_adv_rx_desc)) % IGB_ALIGN) != 0 ||
1329 (nb_desc > IGB_MAX_RING_DESC) || (nb_desc < IGB_MIN_RING_DESC)) {
1333 /* Free memory prior to re-allocation if needed */
1334 if (dev->data->rx_queues[queue_idx] != NULL) {
1335 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1336 dev->data->rx_queues[queue_idx] = NULL;
1339 /* First allocate the RX queue data structure. */
1340 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1345 rxq->nb_rx_desc = nb_desc;
1346 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1347 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1348 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1349 rxq->drop_en = rx_conf->rx_drop_en;
1350 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1351 rxq->queue_id = queue_idx;
1352 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1353 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1354 rxq->port_id = dev->data->port_id;
1355 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1359 * Allocate RX ring hardware descriptors. A memzone large enough to
1360 * handle the maximum ring size is allocated in order to allow for
1361 * resizing in later calls to the queue setup function.
1363 size = sizeof(union e1000_adv_rx_desc) * IGB_MAX_RING_DESC;
1364 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx, size, socket_id);
1366 igb_rx_queue_release(rxq);
1369 rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1370 rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1371 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
1372 rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1374 /* Allocate software ring. */
1375 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1376 sizeof(struct igb_rx_entry) * nb_desc,
1378 if (rxq->sw_ring == NULL) {
1379 igb_rx_queue_release(rxq);
1382 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1383 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1385 dev->data->rx_queues[queue_idx] = rxq;
1386 igb_reset_rx_queue(rxq);
1392 eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1394 #define IGB_RXQ_SCAN_INTERVAL 4
1395 volatile union e1000_adv_rx_desc *rxdp;
1396 struct igb_rx_queue *rxq;
1399 if (rx_queue_id >= dev->data->nb_rx_queues) {
1400 PMD_RX_LOG(ERR, "Invalid RX queue id=%d\n", rx_queue_id);
1404 rxq = dev->data->rx_queues[rx_queue_id];
1405 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1407 while ((desc < rxq->nb_rx_desc) &&
1408 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1409 desc += IGB_RXQ_SCAN_INTERVAL;
1410 rxdp += IGB_RXQ_SCAN_INTERVAL;
1411 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1412 rxdp = &(rxq->rx_ring[rxq->rx_tail +
1413 desc - rxq->nb_rx_desc]);
1420 igb_dev_clear_queues(struct rte_eth_dev *dev)
1423 struct igb_tx_queue *txq;
1424 struct igb_rx_queue *rxq;
1426 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1427 txq = dev->data->tx_queues[i];
1429 igb_tx_queue_release_mbufs(txq);
1430 igb_reset_tx_queue(txq, dev);
1434 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1435 rxq = dev->data->rx_queues[i];
1437 igb_rx_queue_release_mbufs(rxq);
1438 igb_reset_rx_queue(rxq);
1444 * Receive Side Scaling (RSS).
1445 * See section 7.1.1.7 in the following document:
1446 * "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1449 * The source and destination IP addresses of the IP header and the source and
1450 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1451 * against a configurable random key to compute a 32-bit RSS hash result.
1452 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1453 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1454 * RSS output index which is used as the RX queue index where to store the
1456 * The following output is supplied in the RX write-back descriptor:
1457 * - 32-bit result of the Microsoft RSS hash function,
1458 * - 4-bit RSS type field.
1462 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1463 * Used as the default key.
1465 static uint8_t rss_intel_key[40] = {
1466 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1467 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1468 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1469 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1470 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1474 igb_rss_disable(struct rte_eth_dev *dev)
1476 struct e1000_hw *hw;
1479 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1480 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1481 mrqc &= ~E1000_MRQC_ENABLE_MASK;
1482 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1486 igb_rss_configure(struct rte_eth_dev *dev)
1488 struct e1000_hw *hw;
1496 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1498 rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1499 if (rss_hf == 0) /* Disable RSS. */ {
1500 igb_rss_disable(dev);
1503 hash_key = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1504 if (hash_key == NULL)
1505 hash_key = rss_intel_key; /* Default hash key. */
1507 /* Fill in RSS hash key. */
1508 for (i = 0; i < 10; i++) {
1509 rss_key = hash_key[(i * 4)];
1510 rss_key |= hash_key[(i * 4) + 1] << 8;
1511 rss_key |= hash_key[(i * 4) + 2] << 16;
1512 rss_key |= hash_key[(i * 4) + 3] << 24;
1513 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1516 /* Fill in redirection table. */
1517 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
1518 for (i = 0; i < 128; i++) {
1525 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
1526 i % dev->data->nb_rx_queues : 0);
1527 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
1529 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
1532 /* Set configured hashing functions in MRQC register. */
1533 mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1534 if (rss_hf & ETH_RSS_IPV4)
1535 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1536 if (rss_hf & ETH_RSS_IPV4_TCP)
1537 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1538 if (rss_hf & ETH_RSS_IPV6)
1539 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1540 if (rss_hf & ETH_RSS_IPV6_EX)
1541 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1542 if (rss_hf & ETH_RSS_IPV6_TCP)
1543 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1544 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1545 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1546 if (rss_hf & ETH_RSS_IPV4_UDP)
1547 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1548 if (rss_hf & ETH_RSS_IPV6_UDP)
1549 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1550 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1551 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1552 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1555 /*********************************************************************
1557 * Enable receive unit.
1559 **********************************************************************/
1562 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
1564 struct igb_rx_entry *rxe = rxq->sw_ring;
1568 /* Initialize software ring entries. */
1569 for (i = 0; i < rxq->nb_rx_desc; i++) {
1570 volatile union e1000_adv_rx_desc *rxd;
1571 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
1574 PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
1575 "queue_id=%hu\n", rxq->queue_id);
1576 igb_rx_queue_release(rxq);
1580 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
1581 rxd = &rxq->rx_ring[i];
1582 rxd->read.hdr_addr = dma_addr;
1583 rxd->read.pkt_addr = dma_addr;
1590 #define E1000_MRQC_DEF_Q_SHIFT (3)
1592 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
1594 struct e1000_hw *hw =
1595 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1598 if (RTE_ETH_DEV_SRIOV(dev).active == ETH_8_POOLS) {
1600 * SRIOV active scheme
1601 * FIXME if support RSS together with VMDq & SRIOV
1603 mrqc = E1000_MRQC_ENABLE_VMDQ;
1604 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
1605 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
1606 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1607 } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
1609 * SRIOV inactive scheme
1611 if (dev->data->nb_rx_queues > 1)
1612 igb_rss_configure(dev);
1614 igb_rss_disable(dev);
1621 eth_igb_rx_init(struct rte_eth_dev *dev)
1623 struct e1000_hw *hw;
1624 struct igb_rx_queue *rxq;
1625 struct rte_pktmbuf_pool_private *mbp_priv;
1630 uint16_t rctl_bsize;
1634 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1638 * Make sure receives are disabled while setting
1639 * up the descriptor ring.
1641 rctl = E1000_READ_REG(hw, E1000_RCTL);
1642 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
1645 * Configure support of jumbo frames, if any.
1647 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
1648 rctl |= E1000_RCTL_LPE;
1651 * Set maximum packet length by default, and might be updated
1652 * together with enabling/disabling dual VLAN.
1654 E1000_WRITE_REG(hw, E1000_RLPML,
1655 dev->data->dev_conf.rxmode.max_rx_pkt_len +
1658 rctl &= ~E1000_RCTL_LPE;
1660 /* Configure and enable each RX queue. */
1662 dev->rx_pkt_burst = eth_igb_recv_pkts;
1663 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1667 rxq = dev->data->rx_queues[i];
1669 /* Allocate buffers for descriptor rings and set up queue */
1670 ret = igb_alloc_rx_queue_mbufs(rxq);
1675 * Reset crc_len in case it was changed after queue setup by a
1679 (uint8_t)(dev->data->dev_conf.rxmode.hw_strip_crc ?
1682 bus_addr = rxq->rx_ring_phys_addr;
1683 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
1685 sizeof(union e1000_adv_rx_desc));
1686 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
1687 (uint32_t)(bus_addr >> 32));
1688 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
1690 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
1693 * Configure RX buffer size.
1695 mbp_priv = (struct rte_pktmbuf_pool_private *)
1696 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
1697 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
1698 RTE_PKTMBUF_HEADROOM);
1699 if (buf_size >= 1024) {
1701 * Configure the BSIZEPACKET field of the SRRCTL
1702 * register of the queue.
1703 * Value is in 1 KB resolution, from 1 KB to 127 KB.
1704 * If this field is equal to 0b, then RCTL.BSIZE
1705 * determines the RX packet buffer size.
1707 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
1708 E1000_SRRCTL_BSIZEPKT_MASK);
1709 buf_size = (uint16_t) ((srrctl &
1710 E1000_SRRCTL_BSIZEPKT_MASK) <<
1711 E1000_SRRCTL_BSIZEPKT_SHIFT);
1713 /* It adds dual VLAN length for supporting dual VLAN */
1714 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
1715 2 * VLAN_TAG_SIZE) > buf_size){
1716 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1717 dev->data->scattered_rx = 1;
1721 * Use BSIZE field of the device RCTL register.
1723 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
1724 rctl_bsize = buf_size;
1725 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1726 dev->data->scattered_rx = 1;
1729 /* Set if packets are dropped when no descriptors available */
1731 srrctl |= E1000_SRRCTL_DROP_EN;
1733 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
1735 /* Enable this RX queue. */
1736 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
1737 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
1738 rxdctl &= 0xFFF00000;
1739 rxdctl |= (rxq->pthresh & 0x1F);
1740 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
1741 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
1742 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
1746 * Setup BSIZE field of RCTL register, if needed.
1747 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
1748 * register, since the code above configures the SRRCTL register of
1749 * the RX queue in such a case.
1750 * All configurable sizes are:
1751 * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
1752 * 8192: rctl |= (E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX);
1753 * 4096: rctl |= (E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX);
1754 * 2048: rctl |= E1000_RCTL_SZ_2048;
1755 * 1024: rctl |= E1000_RCTL_SZ_1024;
1756 * 512: rctl |= E1000_RCTL_SZ_512;
1757 * 256: rctl |= E1000_RCTL_SZ_256;
1759 if (rctl_bsize > 0) {
1760 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
1761 rctl |= E1000_RCTL_SZ_512;
1762 else /* 256 <= buf_size < 512 - use 256 */
1763 rctl |= E1000_RCTL_SZ_256;
1767 * Configure RSS if device configured with multiple RX queues.
1769 igb_dev_mq_rx_configure(dev);
1772 * Setup the Checksum Register.
1773 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
1775 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
1776 rxcsum |= E1000_RXCSUM_PCSD;
1778 /* Enable both L3/L4 rx checksum offload */
1779 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
1780 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
1782 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
1783 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
1785 /* Setup the Receive Control Register. */
1786 if (dev->data->dev_conf.rxmode.hw_strip_crc) {
1787 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
1789 /* set STRCRC bit in all queues for Powerville/Springville */
1790 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i210) {
1791 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1792 rxq = dev->data->rx_queues[i];
1793 uint32_t dvmolr = E1000_READ_REG(hw,
1794 E1000_DVMOLR(rxq->reg_idx));
1795 dvmolr |= E1000_DVMOLR_STRCRC;
1796 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
1800 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
1802 /* clear STRCRC bit in all queues for Powerville/Springville */
1803 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i210) {
1804 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1805 rxq = dev->data->rx_queues[i];
1806 uint32_t dvmolr = E1000_READ_REG(hw,
1807 E1000_DVMOLR(rxq->reg_idx));
1808 dvmolr &= ~E1000_DVMOLR_STRCRC;
1809 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
1814 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
1815 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
1816 E1000_RCTL_RDMTS_HALF |
1817 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
1819 /* Make sure VLAN Filters are off. */
1820 rctl &= ~E1000_RCTL_VFE;
1821 /* Don't store bad packets. */
1822 rctl &= ~E1000_RCTL_SBP;
1824 /* Enable Receives. */
1825 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
1828 * Setup the HW Rx Head and Tail Descriptor Pointers.
1829 * This needs to be done after enable.
1831 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1832 rxq = dev->data->rx_queues[i];
1833 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
1834 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
1840 /*********************************************************************
1842 * Enable transmit unit.
1844 **********************************************************************/
1846 eth_igb_tx_init(struct rte_eth_dev *dev)
1848 struct e1000_hw *hw;
1849 struct igb_tx_queue *txq;
1854 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1856 /* Setup the Base and Length of the Tx Descriptor Rings. */
1857 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1859 txq = dev->data->tx_queues[i];
1860 bus_addr = txq->tx_ring_phys_addr;
1862 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
1864 sizeof(union e1000_adv_tx_desc));
1865 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
1866 (uint32_t)(bus_addr >> 32));
1867 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
1869 /* Setup the HW Tx Head and Tail descriptor pointers. */
1870 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
1871 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
1873 /* Setup Transmit threshold registers. */
1874 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
1875 txdctl |= txq->pthresh & 0x1F;
1876 txdctl |= ((txq->hthresh & 0x1F) << 8);
1877 txdctl |= ((txq->wthresh & 0x1F) << 16);
1878 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
1879 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
1882 /* Program the Transmit Control Register. */
1883 tctl = E1000_READ_REG(hw, E1000_TCTL);
1884 tctl &= ~E1000_TCTL_CT;
1885 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
1886 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
1888 e1000_config_collision_dist(hw);
1890 /* This write will effectively turn on the transmit unit. */
1891 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
1894 /*********************************************************************
1896 * Enable VF receive unit.
1898 **********************************************************************/
1900 eth_igbvf_rx_init(struct rte_eth_dev *dev)
1902 struct e1000_hw *hw;
1903 struct igb_rx_queue *rxq;
1904 struct rte_pktmbuf_pool_private *mbp_priv;
1907 uint16_t rctl_bsize;
1911 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1913 /* Configure and enable each RX queue. */
1915 dev->rx_pkt_burst = eth_igb_recv_pkts;
1916 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1920 rxq = dev->data->rx_queues[i];
1922 /* Allocate buffers for descriptor rings and set up queue */
1923 ret = igb_alloc_rx_queue_mbufs(rxq);
1927 bus_addr = rxq->rx_ring_phys_addr;
1928 E1000_WRITE_REG(hw, E1000_RDLEN(i),
1930 sizeof(union e1000_adv_rx_desc));
1931 E1000_WRITE_REG(hw, E1000_RDBAH(i),
1932 (uint32_t)(bus_addr >> 32));
1933 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
1935 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
1938 * Configure RX buffer size.
1940 mbp_priv = (struct rte_pktmbuf_pool_private *)
1941 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
1942 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
1943 RTE_PKTMBUF_HEADROOM);
1944 if (buf_size >= 1024) {
1946 * Configure the BSIZEPACKET field of the SRRCTL
1947 * register of the queue.
1948 * Value is in 1 KB resolution, from 1 KB to 127 KB.
1949 * If this field is equal to 0b, then RCTL.BSIZE
1950 * determines the RX packet buffer size.
1952 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
1953 E1000_SRRCTL_BSIZEPKT_MASK);
1954 buf_size = (uint16_t) ((srrctl &
1955 E1000_SRRCTL_BSIZEPKT_MASK) <<
1956 E1000_SRRCTL_BSIZEPKT_SHIFT);
1958 /* It adds dual VLAN length for supporting dual VLAN */
1959 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
1960 2 * VLAN_TAG_SIZE) > buf_size){
1961 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1962 dev->data->scattered_rx = 1;
1966 * Use BSIZE field of the device RCTL register.
1968 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
1969 rctl_bsize = buf_size;
1970 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1971 dev->data->scattered_rx = 1;
1974 /* Set if packets are dropped when no descriptors available */
1976 srrctl |= E1000_SRRCTL_DROP_EN;
1978 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
1980 /* Enable this RX queue. */
1981 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
1982 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
1983 rxdctl &= 0xFFF00000;
1984 rxdctl |= (rxq->pthresh & 0x1F);
1985 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
1986 if (hw->mac.type == e1000_82576) {
1988 * Workaround of 82576 VF Erratum
1989 * force set WTHRESH to 1
1990 * to avoid Write-Back not triggered sometimes
1993 PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !\n");
1996 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
1997 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2001 * Setup the HW Rx Head and Tail Descriptor Pointers.
2002 * This needs to be done after enable.
2004 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2005 rxq = dev->data->rx_queues[i];
2006 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2007 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2013 /*********************************************************************
2015 * Enable VF transmit unit.
2017 **********************************************************************/
2019 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2021 struct e1000_hw *hw;
2022 struct igb_tx_queue *txq;
2026 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2028 /* Setup the Base and Length of the Tx Descriptor Rings. */
2029 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2032 txq = dev->data->tx_queues[i];
2033 bus_addr = txq->tx_ring_phys_addr;
2034 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2036 sizeof(union e1000_adv_tx_desc));
2037 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2038 (uint32_t)(bus_addr >> 32));
2039 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2041 /* Setup the HW Tx Head and Tail descriptor pointers. */
2042 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2043 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2045 /* Setup Transmit threshold registers. */
2046 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2047 txdctl |= txq->pthresh & 0x1F;
2048 txdctl |= ((txq->hthresh & 0x1F) << 8);
2049 if (hw->mac.type == e1000_82576) {
2051 * Workaround of 82576 VF Erratum
2052 * force set WTHRESH to 1
2053 * to avoid Write-Back not triggered sometimes
2056 PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !\n");
2059 txdctl |= ((txq->wthresh & 0x1F) << 16);
2060 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2061 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);