4 * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
45 #include <rte_interrupts.h>
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
49 #include <rte_debug.h>
51 #include <rte_memory.h>
52 #include <rte_memcpy.h>
53 #include <rte_memzone.h>
54 #include <rte_launch.h>
55 #include <rte_tailq.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
71 #include <rte_string_fns.h>
73 #include "e1000_logs.h"
74 #include "e1000/e1000_api.h"
75 #include "e1000_ethdev.h"
77 static inline struct rte_mbuf *
78 rte_rxmbuf_alloc(struct rte_mempool *mp)
82 m = __rte_mbuf_raw_alloc(mp);
83 __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
87 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
88 (uint64_t) ((mb)->buf_physaddr + \
89 (uint64_t) ((char *)((mb)->pkt.data) - \
90 (char *)(mb)->buf_addr))
92 #define RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mb) \
93 (uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
96 * Structure associated with each descriptor of the RX ring of a RX queue.
99 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
103 * Structure associated with each descriptor of the TX ring of a TX queue.
105 struct igb_tx_entry {
106 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
107 uint16_t next_id; /**< Index of next descriptor in ring. */
108 uint16_t last_id; /**< Index of last scattered descriptor. */
112 * Structure associated with each RX queue.
114 struct igb_rx_queue {
115 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
116 volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
117 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
118 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
119 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
120 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
121 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
122 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
123 uint16_t nb_rx_desc; /**< number of RX descriptors. */
124 uint16_t rx_tail; /**< current value of RDT register. */
125 uint16_t nb_rx_hold; /**< number of held free RX desc. */
126 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
127 uint16_t queue_id; /**< RX queue index. */
128 uint16_t reg_idx; /**< RX queue register index. */
129 uint8_t port_id; /**< Device port identifier. */
130 uint8_t pthresh; /**< Prefetch threshold register. */
131 uint8_t hthresh; /**< Host threshold register. */
132 uint8_t wthresh; /**< Write-back threshold register. */
133 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
134 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
138 * Hardware context number
140 enum igb_advctx_num {
141 IGB_CTX_0 = 0, /**< CTX0 */
142 IGB_CTX_1 = 1, /**< CTX1 */
143 IGB_CTX_NUM = 2, /**< CTX_NUM */
147 * Strucutre to check if new context need be built
149 struct igb_advctx_info {
150 uint16_t flags; /**< ol_flags related to context build. */
151 uint32_t cmp_mask; /**< compare mask for vlan_macip_lens */
152 union rte_vlan_macip vlan_macip_lens; /**< vlan, mac & ip length. */
156 * Structure associated with each TX queue.
158 struct igb_tx_queue {
159 volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
160 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
161 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
162 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
163 uint32_t txd_type; /**< Device-specific TXD type */
164 uint16_t nb_tx_desc; /**< number of TX descriptors. */
165 uint16_t tx_tail; /**< Current value of TDT register. */
167 /**< Index of first used TX descriptor. */
168 uint16_t queue_id; /**< TX queue index. */
169 uint16_t reg_idx; /**< TX queue register index. */
170 uint8_t port_id; /**< Device port identifier. */
171 uint8_t pthresh; /**< Prefetch threshold register. */
172 uint8_t hthresh; /**< Host threshold register. */
173 uint8_t wthresh; /**< Write-back threshold register. */
175 /**< Current used hardware descriptor. */
177 /**< Start context position for transmit queue. */
178 struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
179 /**< Hardware context history.*/
183 #define RTE_PMD_USE_PREFETCH
186 #ifdef RTE_PMD_USE_PREFETCH
187 #define rte_igb_prefetch(p) rte_prefetch0(p)
189 #define rte_igb_prefetch(p) do {} while(0)
192 #ifdef RTE_PMD_PACKET_PREFETCH
193 #define rte_packet_prefetch(p) rte_prefetch1(p)
195 #define rte_packet_prefetch(p) do {} while(0)
198 /*********************************************************************
202 **********************************************************************/
205 * Advanced context descriptor are almost same between igb/ixgbe
206 * This is a separate function, looking for optimization opportunity here
207 * Rework required to go with the pre-defined values.
211 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
212 volatile struct e1000_adv_tx_context_desc *ctx_txd,
213 uint16_t ol_flags, uint32_t vlan_macip_lens)
215 uint32_t type_tucmd_mlhl;
216 uint32_t mss_l4len_idx;
217 uint32_t ctx_idx, ctx_curr;
220 ctx_curr = txq->ctx_curr;
221 ctx_idx = ctx_curr + txq->ctx_start;
226 if (ol_flags & PKT_TX_VLAN_PKT) {
227 cmp_mask |= TX_VLAN_CMP_MASK;
230 if (ol_flags & PKT_TX_IP_CKSUM) {
231 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
232 cmp_mask |= TX_MAC_LEN_CMP_MASK;
235 /* Specify which HW CTX to upload. */
236 mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
237 switch (ol_flags & PKT_TX_L4_MASK) {
238 case PKT_TX_UDP_CKSUM:
239 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
240 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
241 mss_l4len_idx |= sizeof(struct udp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
242 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
244 case PKT_TX_TCP_CKSUM:
245 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
246 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
247 mss_l4len_idx |= sizeof(struct tcp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
248 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
250 case PKT_TX_SCTP_CKSUM:
251 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
252 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
253 mss_l4len_idx |= sizeof(struct sctp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
254 cmp_mask |= TX_MACIP_LEN_CMP_MASK;
257 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
258 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
262 txq->ctx_cache[ctx_curr].flags = ol_flags;
263 txq->ctx_cache[ctx_curr].cmp_mask = cmp_mask;
264 txq->ctx_cache[ctx_curr].vlan_macip_lens.data =
265 vlan_macip_lens & cmp_mask;
267 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
268 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
269 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
270 ctx_txd->seqnum_seed = 0;
274 * Check which hardware context can be used. Use the existing match
275 * or create a new context descriptor.
277 static inline uint32_t
278 what_advctx_update(struct igb_tx_queue *txq, uint16_t flags,
279 uint32_t vlan_macip_lens)
281 /* If match with the current context */
282 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
283 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
284 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
285 return txq->ctx_curr;
288 /* If match with the second context */
290 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
291 (txq->ctx_cache[txq->ctx_curr].vlan_macip_lens.data ==
292 (txq->ctx_cache[txq->ctx_curr].cmp_mask & vlan_macip_lens)))) {
293 return txq->ctx_curr;
296 /* Mismatch, use the previous context */
297 return (IGB_CTX_NUM);
300 static inline uint32_t
301 tx_desc_cksum_flags_to_olinfo(uint16_t ol_flags)
303 static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
304 static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
307 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
308 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
312 static inline uint32_t
313 tx_desc_vlan_flags_to_cmdtype(uint16_t ol_flags)
315 static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
316 return vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
320 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
323 struct igb_tx_queue *txq;
324 struct igb_tx_entry *sw_ring;
325 struct igb_tx_entry *txe, *txn;
326 volatile union e1000_adv_tx_desc *txr;
327 volatile union e1000_adv_tx_desc *txd;
328 struct rte_mbuf *tx_pkt;
329 struct rte_mbuf *m_seg;
330 uint64_t buf_dma_addr;
331 uint32_t olinfo_status;
332 uint32_t cmd_type_len;
341 uint32_t new_ctx = 0;
343 uint32_t vlan_macip_lens;
346 sw_ring = txq->sw_ring;
348 tx_id = txq->tx_tail;
349 txe = &sw_ring[tx_id];
351 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
353 pkt_len = tx_pkt->pkt.pkt_len;
355 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
358 * The number of descriptors that must be allocated for a
359 * packet is the number of segments of that packet, plus 1
360 * Context Descriptor for the VLAN Tag Identifier, if any.
361 * Determine the last TX descriptor to allocate in the TX ring
362 * for the packet, starting from the current position (tx_id)
365 tx_last = (uint16_t) (tx_id + tx_pkt->pkt.nb_segs - 1);
367 ol_flags = tx_pkt->ol_flags;
368 vlan_macip_lens = tx_pkt->pkt.vlan_macip.data;
369 tx_ol_req = (uint16_t)(ol_flags & PKT_TX_OFFLOAD_MASK);
371 /* If a Context Descriptor need be built . */
373 ctx = what_advctx_update(txq, tx_ol_req,
375 /* Only allocate context descriptor if required*/
376 new_ctx = (ctx == IGB_CTX_NUM);
378 tx_last = (uint16_t) (tx_last + new_ctx);
380 if (tx_last >= txq->nb_tx_desc)
381 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
383 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
384 " tx_first=%u tx_last=%u\n",
385 (unsigned) txq->port_id,
386 (unsigned) txq->queue_id,
392 * Check if there are enough free descriptors in the TX ring
393 * to transmit the next packet.
394 * This operation is based on the two following rules:
396 * 1- Only check that the last needed TX descriptor can be
397 * allocated (by construction, if that descriptor is free,
398 * all intermediate ones are also free).
400 * For this purpose, the index of the last TX descriptor
401 * used for a packet (the "last descriptor" of a packet)
402 * is recorded in the TX entries (the last one included)
403 * that are associated with all TX descriptors allocated
406 * 2- Avoid to allocate the last free TX descriptor of the
407 * ring, in order to never set the TDT register with the
408 * same value stored in parallel by the NIC in the TDH
409 * register, which makes the TX engine of the NIC enter
410 * in a deadlock situation.
412 * By extension, avoid to allocate a free descriptor that
413 * belongs to the last set of free descriptors allocated
414 * to the same packet previously transmitted.
418 * The "last descriptor" of the previously sent packet, if any,
419 * which used the last descriptor to allocate.
421 tx_end = sw_ring[tx_last].last_id;
424 * The next descriptor following that "last descriptor" in the
427 tx_end = sw_ring[tx_end].next_id;
430 * The "last descriptor" associated with that next descriptor.
432 tx_end = sw_ring[tx_end].last_id;
435 * Check that this descriptor is free.
437 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
444 * Set common flags of all TX Data Descriptors.
446 * The following bits must be set in all Data Descriptors:
447 * - E1000_ADVTXD_DTYP_DATA
448 * - E1000_ADVTXD_DCMD_DEXT
450 * The following bits must be set in the first Data Descriptor
451 * and are ignored in the other ones:
452 * - E1000_ADVTXD_DCMD_IFCS
453 * - E1000_ADVTXD_MAC_1588
454 * - E1000_ADVTXD_DCMD_VLE
456 * The following bits must only be set in the last Data
458 * - E1000_TXD_CMD_EOP
460 * The following bits can be set in any Data Descriptor, but
461 * are only set in the last Data Descriptor:
464 cmd_type_len = txq->txd_type |
465 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
466 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
467 #if defined(RTE_LIBRTE_IEEE1588)
468 if (ol_flags & PKT_TX_IEEE1588_TMST)
469 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
472 /* Setup TX Advanced context descriptor if required */
474 volatile struct e1000_adv_tx_context_desc *
477 ctx_txd = (volatile struct
478 e1000_adv_tx_context_desc *)
481 txn = &sw_ring[txe->next_id];
482 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
484 if (txe->mbuf != NULL) {
485 rte_pktmbuf_free_seg(txe->mbuf);
489 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
492 txe->last_id = tx_last;
493 tx_id = txe->next_id;
497 /* Setup the TX Advanced Data Descriptor */
498 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(ol_flags);
499 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
500 olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
505 txn = &sw_ring[txe->next_id];
508 if (txe->mbuf != NULL)
509 rte_pktmbuf_free_seg(txe->mbuf);
513 * Set up transmit descriptor.
515 slen = (uint16_t) m_seg->pkt.data_len;
516 buf_dma_addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
517 txd->read.buffer_addr =
518 rte_cpu_to_le_64(buf_dma_addr);
519 txd->read.cmd_type_len =
520 rte_cpu_to_le_32(cmd_type_len | slen);
521 txd->read.olinfo_status =
522 rte_cpu_to_le_32(olinfo_status);
523 txe->last_id = tx_last;
524 tx_id = txe->next_id;
526 m_seg = m_seg->pkt.next;
527 } while (m_seg != NULL);
530 * The last packet data descriptor needs End Of Packet (EOP)
531 * and Report Status (RS).
533 txd->read.cmd_type_len |=
534 rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
540 * Set the Transmit Descriptor Tail (TDT).
542 E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
543 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
544 (unsigned) txq->port_id, (unsigned) txq->queue_id,
545 (unsigned) tx_id, (unsigned) nb_tx);
546 txq->tx_tail = tx_id;
551 /*********************************************************************
555 **********************************************************************/
556 static inline uint16_t
557 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
561 static uint16_t ip_pkt_types_map[16] = {
562 0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
563 PKT_RX_IPV6_HDR, 0, 0, 0,
564 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
565 PKT_RX_IPV6_HDR_EXT, 0, 0, 0,
568 #if defined(RTE_LIBRTE_IEEE1588)
569 static uint32_t ip_pkt_etqf_map[8] = {
570 0, 0, 0, PKT_RX_IEEE1588_PTP,
574 pkt_flags = (uint16_t)((hl_tp_rs & E1000_RXDADV_PKTTYPE_ETQF) ?
575 ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07] :
576 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
578 pkt_flags = (uint16_t)((hl_tp_rs & E1000_RXDADV_PKTTYPE_ETQF) ? 0 :
579 ip_pkt_types_map[(hl_tp_rs >> 4) & 0x0F]);
581 return (uint16_t)(pkt_flags | (((hl_tp_rs & 0x0F) == 0) ?
582 0 : PKT_RX_RSS_HASH));
585 static inline uint16_t
586 rx_desc_status_to_pkt_flags(uint32_t rx_status)
590 /* Check if VLAN present */
591 pkt_flags = (uint16_t)((rx_status & E1000_RXD_STAT_VP) ?
592 PKT_RX_VLAN_PKT : 0);
594 #if defined(RTE_LIBRTE_IEEE1588)
595 if (rx_status & E1000_RXD_STAT_TMST)
596 pkt_flags = (uint16_t)(pkt_flags | PKT_RX_IEEE1588_TMST);
601 static inline uint16_t
602 rx_desc_error_to_pkt_flags(uint32_t rx_status)
605 * Bit 30: IPE, IPv4 checksum error
606 * Bit 29: L4I, L4I integrity error
609 static uint16_t error_to_pkt_flags_map[4] = {
610 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
611 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
613 return error_to_pkt_flags_map[(rx_status >>
614 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
618 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
621 struct igb_rx_queue *rxq;
622 volatile union e1000_adv_rx_desc *rx_ring;
623 volatile union e1000_adv_rx_desc *rxdp;
624 struct igb_rx_entry *sw_ring;
625 struct igb_rx_entry *rxe;
626 struct rte_mbuf *rxm;
627 struct rte_mbuf *nmb;
628 union e1000_adv_rx_desc rxd;
631 uint32_t hlen_type_rss;
641 rx_id = rxq->rx_tail;
642 rx_ring = rxq->rx_ring;
643 sw_ring = rxq->sw_ring;
644 while (nb_rx < nb_pkts) {
646 * The order of operations here is important as the DD status
647 * bit must not be read after any other descriptor fields.
648 * rx_ring and rxdp are pointing to volatile data so the order
649 * of accesses cannot be reordered by the compiler. If they were
650 * not volatile, they could be reordered which could lead to
651 * using invalid descriptor fields when read from rxd.
653 rxdp = &rx_ring[rx_id];
654 staterr = rxdp->wb.upper.status_error;
655 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
662 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
663 * likely to be invalid and to be dropped by the various
664 * validation checks performed by the network stack.
666 * Allocate a new mbuf to replenish the RX ring descriptor.
667 * If the allocation fails:
668 * - arrange for that RX descriptor to be the first one
669 * being parsed the next time the receive function is
670 * invoked [on the same queue].
672 * - Stop parsing the RX ring and return immediately.
674 * This policy do not drop the packet received in the RX
675 * descriptor for which the allocation of a new mbuf failed.
676 * Thus, it allows that packet to be later retrieved if
677 * mbuf have been freed in the mean time.
678 * As a side effect, holding RX descriptors instead of
679 * systematically giving them back to the NIC may lead to
680 * RX ring exhaustion situations.
681 * However, the NIC can gracefully prevent such situations
682 * to happen by sending specific "back-pressure" flow control
683 * frames to its peer(s).
685 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
686 "staterr=0x%x pkt_len=%u\n",
687 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
688 (unsigned) rx_id, (unsigned) staterr,
689 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
691 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
693 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
694 "queue_id=%u\n", (unsigned) rxq->port_id,
695 (unsigned) rxq->queue_id);
696 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
701 rxe = &sw_ring[rx_id];
703 if (rx_id == rxq->nb_rx_desc)
706 /* Prefetch next mbuf while processing current one. */
707 rte_igb_prefetch(sw_ring[rx_id].mbuf);
710 * When next RX descriptor is on a cache-line boundary,
711 * prefetch the next 4 RX descriptors and the next 8 pointers
714 if ((rx_id & 0x3) == 0) {
715 rte_igb_prefetch(&rx_ring[rx_id]);
716 rte_igb_prefetch(&sw_ring[rx_id]);
722 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
723 rxdp->read.hdr_addr = dma_addr;
724 rxdp->read.pkt_addr = dma_addr;
727 * Initialize the returned mbuf.
728 * 1) setup generic mbuf fields:
729 * - number of segments,
732 * - RX port identifier.
733 * 2) integrate hardware offload data, if any:
735 * - IP checksum flag,
736 * - VLAN TCI, if any,
739 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
741 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
742 rte_packet_prefetch(rxm->pkt.data);
743 rxm->pkt.nb_segs = 1;
744 rxm->pkt.next = NULL;
745 rxm->pkt.pkt_len = pkt_len;
746 rxm->pkt.data_len = pkt_len;
747 rxm->pkt.in_port = rxq->port_id;
749 rxm->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
750 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
751 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
752 rxm->pkt.vlan_macip.f.vlan_tci =
753 rte_le_to_cpu_16(rxd.wb.upper.vlan);
755 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
756 pkt_flags = (uint16_t)(pkt_flags |
757 rx_desc_status_to_pkt_flags(staterr));
758 pkt_flags = (uint16_t)(pkt_flags |
759 rx_desc_error_to_pkt_flags(staterr));
760 rxm->ol_flags = pkt_flags;
763 * Store the mbuf address into the next entry of the array
764 * of returned packets.
766 rx_pkts[nb_rx++] = rxm;
768 rxq->rx_tail = rx_id;
771 * If the number of free RX descriptors is greater than the RX free
772 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
774 * Update the RDT with the value of the last processed RX descriptor
775 * minus 1, to guarantee that the RDT register is never equal to the
776 * RDH register, which creates a "full" ring situtation from the
777 * hardware point of view...
779 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
780 if (nb_hold > rxq->rx_free_thresh) {
781 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
782 "nb_hold=%u nb_rx=%u\n",
783 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
784 (unsigned) rx_id, (unsigned) nb_hold,
786 rx_id = (uint16_t) ((rx_id == 0) ?
787 (rxq->nb_rx_desc - 1) : (rx_id - 1));
788 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
791 rxq->nb_rx_hold = nb_hold;
796 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
799 struct igb_rx_queue *rxq;
800 volatile union e1000_adv_rx_desc *rx_ring;
801 volatile union e1000_adv_rx_desc *rxdp;
802 struct igb_rx_entry *sw_ring;
803 struct igb_rx_entry *rxe;
804 struct rte_mbuf *first_seg;
805 struct rte_mbuf *last_seg;
806 struct rte_mbuf *rxm;
807 struct rte_mbuf *nmb;
808 union e1000_adv_rx_desc rxd;
809 uint64_t dma; /* Physical address of mbuf data buffer */
811 uint32_t hlen_type_rss;
821 rx_id = rxq->rx_tail;
822 rx_ring = rxq->rx_ring;
823 sw_ring = rxq->sw_ring;
826 * Retrieve RX context of current packet, if any.
828 first_seg = rxq->pkt_first_seg;
829 last_seg = rxq->pkt_last_seg;
831 while (nb_rx < nb_pkts) {
834 * The order of operations here is important as the DD status
835 * bit must not be read after any other descriptor fields.
836 * rx_ring and rxdp are pointing to volatile data so the order
837 * of accesses cannot be reordered by the compiler. If they were
838 * not volatile, they could be reordered which could lead to
839 * using invalid descriptor fields when read from rxd.
841 rxdp = &rx_ring[rx_id];
842 staterr = rxdp->wb.upper.status_error;
843 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
850 * Allocate a new mbuf to replenish the RX ring descriptor.
851 * If the allocation fails:
852 * - arrange for that RX descriptor to be the first one
853 * being parsed the next time the receive function is
854 * invoked [on the same queue].
856 * - Stop parsing the RX ring and return immediately.
858 * This policy does not drop the packet received in the RX
859 * descriptor for which the allocation of a new mbuf failed.
860 * Thus, it allows that packet to be later retrieved if
861 * mbuf have been freed in the mean time.
862 * As a side effect, holding RX descriptors instead of
863 * systematically giving them back to the NIC may lead to
864 * RX ring exhaustion situations.
865 * However, the NIC can gracefully prevent such situations
866 * to happen by sending specific "back-pressure" flow control
867 * frames to its peer(s).
869 PMD_RX_LOG(DEBUG, "\nport_id=%u queue_id=%u rx_id=%u "
870 "staterr=0x%x data_len=%u\n",
871 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
872 (unsigned) rx_id, (unsigned) staterr,
873 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
875 nmb = rte_rxmbuf_alloc(rxq->mb_pool);
877 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
878 "queue_id=%u\n", (unsigned) rxq->port_id,
879 (unsigned) rxq->queue_id);
880 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
885 rxe = &sw_ring[rx_id];
887 if (rx_id == rxq->nb_rx_desc)
890 /* Prefetch next mbuf while processing current one. */
891 rte_igb_prefetch(sw_ring[rx_id].mbuf);
894 * When next RX descriptor is on a cache-line boundary,
895 * prefetch the next 4 RX descriptors and the next 8 pointers
898 if ((rx_id & 0x3) == 0) {
899 rte_igb_prefetch(&rx_ring[rx_id]);
900 rte_igb_prefetch(&sw_ring[rx_id]);
904 * Update RX descriptor with the physical address of the new
905 * data buffer of the new allocated mbuf.
909 dma = rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(nmb));
910 rxdp->read.pkt_addr = dma;
911 rxdp->read.hdr_addr = dma;
914 * Set data length & data buffer address of mbuf.
916 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
917 rxm->pkt.data_len = data_len;
918 rxm->pkt.data = (char*) rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
921 * If this is the first buffer of the received packet,
922 * set the pointer to the first mbuf of the packet and
923 * initialize its context.
924 * Otherwise, update the total length and the number of segments
925 * of the current scattered packet, and update the pointer to
926 * the last mbuf of the current packet.
928 if (first_seg == NULL) {
930 first_seg->pkt.pkt_len = data_len;
931 first_seg->pkt.nb_segs = 1;
933 first_seg->pkt.pkt_len += data_len;
934 first_seg->pkt.nb_segs++;
935 last_seg->pkt.next = rxm;
939 * If this is not the last buffer of the received packet,
940 * update the pointer to the last mbuf of the current scattered
941 * packet and continue to parse the RX ring.
943 if (! (staterr & E1000_RXD_STAT_EOP)) {
949 * This is the last buffer of the received packet.
950 * If the CRC is not stripped by the hardware:
951 * - Subtract the CRC length from the total packet length.
952 * - If the last buffer only contains the whole CRC or a part
953 * of it, free the mbuf associated to the last buffer.
954 * If part of the CRC is also contained in the previous
955 * mbuf, subtract the length of that CRC part from the
956 * data length of the previous mbuf.
958 rxm->pkt.next = NULL;
959 if (unlikely(rxq->crc_len > 0)) {
960 first_seg->pkt.pkt_len -= ETHER_CRC_LEN;
961 if (data_len <= ETHER_CRC_LEN) {
962 rte_pktmbuf_free_seg(rxm);
963 first_seg->pkt.nb_segs--;
964 last_seg->pkt.data_len = (uint16_t)
965 (last_seg->pkt.data_len -
966 (ETHER_CRC_LEN - data_len));
967 last_seg->pkt.next = NULL;
970 (uint16_t) (data_len - ETHER_CRC_LEN);
974 * Initialize the first mbuf of the returned packet:
975 * - RX port identifier,
976 * - hardware offload data, if any:
978 * - IP checksum flag,
979 * - VLAN TCI, if any,
982 first_seg->pkt.in_port = rxq->port_id;
983 first_seg->pkt.hash.rss = rxd.wb.lower.hi_dword.rss;
986 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
987 * set in the pkt_flags field.
989 first_seg->pkt.vlan_macip.f.vlan_tci =
990 rte_le_to_cpu_16(rxd.wb.upper.vlan);
991 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
992 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
993 pkt_flags = (uint16_t)(pkt_flags |
994 rx_desc_status_to_pkt_flags(staterr));
995 pkt_flags = (uint16_t)(pkt_flags |
996 rx_desc_error_to_pkt_flags(staterr));
997 first_seg->ol_flags = pkt_flags;
999 /* Prefetch data of first segment, if configured to do so. */
1000 rte_packet_prefetch(first_seg->pkt.data);
1003 * Store the mbuf address into the next entry of the array
1004 * of returned packets.
1006 rx_pkts[nb_rx++] = first_seg;
1009 * Setup receipt context for a new packet.
1015 * Record index of the next RX descriptor to probe.
1017 rxq->rx_tail = rx_id;
1020 * Save receive context.
1022 rxq->pkt_first_seg = first_seg;
1023 rxq->pkt_last_seg = last_seg;
1026 * If the number of free RX descriptors is greater than the RX free
1027 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1029 * Update the RDT with the value of the last processed RX descriptor
1030 * minus 1, to guarantee that the RDT register is never equal to the
1031 * RDH register, which creates a "full" ring situtation from the
1032 * hardware point of view...
1034 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1035 if (nb_hold > rxq->rx_free_thresh) {
1036 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1037 "nb_hold=%u nb_rx=%u\n",
1038 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1039 (unsigned) rx_id, (unsigned) nb_hold,
1041 rx_id = (uint16_t) ((rx_id == 0) ?
1042 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1043 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1046 rxq->nb_rx_hold = nb_hold;
1051 * Rings setup and release.
1053 * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
1054 * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary.
1055 * This will also optimize cache line size effect.
1056 * H/W supports up to cache line size 128.
1058 #define IGB_ALIGN 128
1061 * Maximum number of Ring Descriptors.
1063 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1064 * desscriptors should meet the following condition:
1065 * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1067 #define IGB_MIN_RING_DESC 32
1068 #define IGB_MAX_RING_DESC 4096
1070 static const struct rte_memzone *
1071 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
1072 uint16_t queue_id, uint32_t ring_size, int socket_id)
1074 char z_name[RTE_MEMZONE_NAMESIZE];
1075 const struct rte_memzone *mz;
1077 rte_snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
1078 dev->driver->pci_drv.name, ring_name,
1079 dev->data->port_id, queue_id);
1080 mz = rte_memzone_lookup(z_name);
1084 return rte_memzone_reserve_aligned(z_name, ring_size,
1085 socket_id, 0, IGB_ALIGN);
1089 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1093 if (txq->sw_ring != NULL) {
1094 for (i = 0; i < txq->nb_tx_desc; i++) {
1095 if (txq->sw_ring[i].mbuf != NULL) {
1096 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1097 txq->sw_ring[i].mbuf = NULL;
1104 igb_tx_queue_release(struct igb_tx_queue *txq)
1107 igb_tx_queue_release_mbufs(txq);
1108 rte_free(txq->sw_ring);
1114 eth_igb_tx_queue_release(void *txq)
1116 igb_tx_queue_release(txq);
1120 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1125 memset((void*)&txq->ctx_cache, 0,
1126 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1130 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1132 struct igb_tx_entry *txe = txq->sw_ring;
1135 struct e1000_hw *hw;
1137 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1138 size = sizeof(union e1000_adv_tx_desc) * txq->nb_tx_desc;
1139 /* Zero out HW ring memory */
1140 for (i = 0; i < size; i++) {
1141 ((volatile char *)txq->tx_ring)[i] = 0;
1144 /* Initialize ring entries */
1145 prev = (uint16_t)(txq->nb_tx_desc - 1);
1146 for (i = 0; i < txq->nb_tx_desc; i++) {
1147 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1149 txd->wb.status = E1000_TXD_STAT_DD;
1152 txe[prev].next_id = i;
1156 txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1157 /* 82575 specific, each tx queue will use 2 hw contexts */
1158 if (hw->mac.type == e1000_82575)
1159 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1161 igb_reset_tx_queue_stat(txq);
1165 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1168 unsigned int socket_id,
1169 const struct rte_eth_txconf *tx_conf)
1171 const struct rte_memzone *tz;
1172 struct igb_tx_queue *txq;
1173 struct e1000_hw *hw;
1176 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1179 * Validate number of transmit descriptors.
1180 * It must not exceed hardware maximum, and must be multiple
1183 if (((nb_desc * sizeof(union e1000_adv_tx_desc)) % IGB_ALIGN) != 0 ||
1184 (nb_desc > IGB_MAX_RING_DESC) || (nb_desc < IGB_MIN_RING_DESC)) {
1189 * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1192 if (tx_conf->tx_free_thresh != 0)
1193 RTE_LOG(WARNING, PMD,
1194 "The tx_free_thresh parameter is not "
1195 "used for the 1G driver.\n");
1196 if (tx_conf->tx_rs_thresh != 0)
1197 RTE_LOG(WARNING, PMD,
1198 "The tx_rs_thresh parameter is not "
1199 "used for the 1G driver.\n");
1200 if (tx_conf->tx_thresh.wthresh == 0)
1201 RTE_LOG(WARNING, PMD,
1202 "To improve 1G driver performance, consider setting "
1203 "the TX WTHRESH value to 4, 8, or 16.\n");
1205 /* Free memory prior to re-allocation if needed */
1206 if (dev->data->tx_queues[queue_idx] != NULL)
1207 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1209 /* First allocate the tx queue data structure */
1210 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1216 * Allocate TX ring hardware descriptors. A memzone large enough to
1217 * handle the maximum ring size is allocated in order to allow for
1218 * resizing in later calls to the queue setup function.
1220 size = sizeof(union e1000_adv_tx_desc) * IGB_MAX_RING_DESC;
1221 tz = ring_dma_zone_reserve(dev, "tx_ring", queue_idx,
1224 igb_tx_queue_release(txq);
1228 txq->nb_tx_desc = nb_desc;
1229 txq->pthresh = tx_conf->tx_thresh.pthresh;
1230 txq->hthresh = tx_conf->tx_thresh.hthresh;
1231 txq->wthresh = tx_conf->tx_thresh.wthresh;
1232 txq->queue_id = queue_idx;
1233 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1234 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1235 txq->port_id = dev->data->port_id;
1237 txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1238 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
1239 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1241 /* Allocate software ring */
1242 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1243 sizeof(struct igb_tx_entry) * nb_desc,
1245 if (txq->sw_ring == NULL) {
1246 igb_tx_queue_release(txq);
1249 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1250 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1252 igb_reset_tx_queue(txq, dev);
1253 dev->tx_pkt_burst = eth_igb_xmit_pkts;
1254 dev->data->tx_queues[queue_idx] = txq;
1260 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1264 if (rxq->sw_ring != NULL) {
1265 for (i = 0; i < rxq->nb_rx_desc; i++) {
1266 if (rxq->sw_ring[i].mbuf != NULL) {
1267 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1268 rxq->sw_ring[i].mbuf = NULL;
1275 igb_rx_queue_release(struct igb_rx_queue *rxq)
1278 igb_rx_queue_release_mbufs(rxq);
1279 rte_free(rxq->sw_ring);
1285 eth_igb_rx_queue_release(void *rxq)
1287 igb_rx_queue_release(rxq);
1291 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1296 /* Zero out HW ring memory */
1297 size = sizeof(union e1000_adv_rx_desc) * rxq->nb_rx_desc;
1298 for (i = 0; i < size; i++) {
1299 ((volatile char *)rxq->rx_ring)[i] = 0;
1303 rxq->pkt_first_seg = NULL;
1304 rxq->pkt_last_seg = NULL;
1308 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1311 unsigned int socket_id,
1312 const struct rte_eth_rxconf *rx_conf,
1313 struct rte_mempool *mp)
1315 const struct rte_memzone *rz;
1316 struct igb_rx_queue *rxq;
1317 struct e1000_hw *hw;
1320 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1323 * Validate number of receive descriptors.
1324 * It must not exceed hardware maximum, and must be multiple
1327 if (((nb_desc * sizeof(union e1000_adv_rx_desc)) % IGB_ALIGN) != 0 ||
1328 (nb_desc > IGB_MAX_RING_DESC) || (nb_desc < IGB_MIN_RING_DESC)) {
1332 /* Free memory prior to re-allocation if needed */
1333 if (dev->data->rx_queues[queue_idx] != NULL) {
1334 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1335 dev->data->rx_queues[queue_idx] = NULL;
1338 /* First allocate the RX queue data structure. */
1339 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1344 rxq->nb_rx_desc = nb_desc;
1345 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1346 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1347 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1348 rxq->drop_en = rx_conf->rx_drop_en;
1349 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1350 rxq->queue_id = queue_idx;
1351 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1352 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1353 rxq->port_id = dev->data->port_id;
1354 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1358 * Allocate RX ring hardware descriptors. A memzone large enough to
1359 * handle the maximum ring size is allocated in order to allow for
1360 * resizing in later calls to the queue setup function.
1362 size = sizeof(union e1000_adv_rx_desc) * IGB_MAX_RING_DESC;
1363 rz = ring_dma_zone_reserve(dev, "rx_ring", queue_idx, size, socket_id);
1365 igb_rx_queue_release(rxq);
1368 rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1369 rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1370 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
1371 rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1373 /* Allocate software ring. */
1374 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1375 sizeof(struct igb_rx_entry) * nb_desc,
1377 if (rxq->sw_ring == NULL) {
1378 igb_rx_queue_release(rxq);
1381 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
1382 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1384 dev->data->rx_queues[queue_idx] = rxq;
1385 igb_reset_rx_queue(rxq);
1391 eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1393 #define IGB_RXQ_SCAN_INTERVAL 4
1394 volatile union e1000_adv_rx_desc *rxdp;
1395 struct igb_rx_queue *rxq;
1398 if (rx_queue_id >= dev->data->nb_rx_queues) {
1399 PMD_RX_LOG(ERR, "Invalid RX queue id=%d\n", rx_queue_id);
1403 rxq = dev->data->rx_queues[rx_queue_id];
1404 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1406 while ((desc < rxq->nb_rx_desc) &&
1407 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1408 desc += IGB_RXQ_SCAN_INTERVAL;
1409 rxdp += IGB_RXQ_SCAN_INTERVAL;
1410 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1411 rxdp = &(rxq->rx_ring[rxq->rx_tail +
1412 desc - rxq->nb_rx_desc]);
1419 eth_igb_rx_descriptor_done(void *rx_queue, uint16_t offset)
1421 volatile union e1000_adv_rx_desc *rxdp;
1422 struct igb_rx_queue *rxq = rx_queue;
1425 if (unlikely(offset >= rxq->nb_rx_desc))
1427 desc = rxq->rx_tail + offset;
1428 if (desc >= rxq->nb_rx_desc)
1429 desc -= rxq->nb_rx_desc;
1431 rxdp = &rxq->rx_ring[desc];
1432 return !!(rxdp->wb.upper.status_error & E1000_RXD_STAT_DD);
1436 igb_dev_clear_queues(struct rte_eth_dev *dev)
1439 struct igb_tx_queue *txq;
1440 struct igb_rx_queue *rxq;
1442 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1443 txq = dev->data->tx_queues[i];
1445 igb_tx_queue_release_mbufs(txq);
1446 igb_reset_tx_queue(txq, dev);
1450 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1451 rxq = dev->data->rx_queues[i];
1453 igb_rx_queue_release_mbufs(rxq);
1454 igb_reset_rx_queue(rxq);
1460 * Receive Side Scaling (RSS).
1461 * See section 7.1.1.7 in the following document:
1462 * "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1465 * The source and destination IP addresses of the IP header and the source and
1466 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1467 * against a configurable random key to compute a 32-bit RSS hash result.
1468 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1469 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1470 * RSS output index which is used as the RX queue index where to store the
1472 * The following output is supplied in the RX write-back descriptor:
1473 * - 32-bit result of the Microsoft RSS hash function,
1474 * - 4-bit RSS type field.
1478 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1479 * Used as the default key.
1481 static uint8_t rss_intel_key[40] = {
1482 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1483 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1484 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1485 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1486 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1490 igb_rss_disable(struct rte_eth_dev *dev)
1492 struct e1000_hw *hw;
1495 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1496 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1497 mrqc &= ~E1000_MRQC_ENABLE_MASK;
1498 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1502 igb_rss_configure(struct rte_eth_dev *dev)
1504 struct e1000_hw *hw;
1512 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1514 rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1515 if (rss_hf == 0) /* Disable RSS. */ {
1516 igb_rss_disable(dev);
1519 hash_key = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1520 if (hash_key == NULL)
1521 hash_key = rss_intel_key; /* Default hash key. */
1523 /* Fill in RSS hash key. */
1524 for (i = 0; i < 10; i++) {
1525 rss_key = hash_key[(i * 4)];
1526 rss_key |= hash_key[(i * 4) + 1] << 8;
1527 rss_key |= hash_key[(i * 4) + 2] << 16;
1528 rss_key |= hash_key[(i * 4) + 3] << 24;
1529 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1532 /* Fill in redirection table. */
1533 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
1534 for (i = 0; i < 128; i++) {
1541 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
1542 i % dev->data->nb_rx_queues : 0);
1543 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
1545 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
1548 /* Set configured hashing functions in MRQC register. */
1549 mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1550 if (rss_hf & ETH_RSS_IPV4)
1551 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1552 if (rss_hf & ETH_RSS_IPV4_TCP)
1553 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1554 if (rss_hf & ETH_RSS_IPV6)
1555 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1556 if (rss_hf & ETH_RSS_IPV6_EX)
1557 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1558 if (rss_hf & ETH_RSS_IPV6_TCP)
1559 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1560 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1561 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1562 if (rss_hf & ETH_RSS_IPV4_UDP)
1563 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1564 if (rss_hf & ETH_RSS_IPV6_UDP)
1565 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1566 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1567 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1568 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1571 /*********************************************************************
1573 * Enable receive unit.
1575 **********************************************************************/
1578 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
1580 struct igb_rx_entry *rxe = rxq->sw_ring;
1584 /* Initialize software ring entries. */
1585 for (i = 0; i < rxq->nb_rx_desc; i++) {
1586 volatile union e1000_adv_rx_desc *rxd;
1587 struct rte_mbuf *mbuf = rte_rxmbuf_alloc(rxq->mb_pool);
1590 PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
1591 "queue_id=%hu\n", rxq->queue_id);
1592 igb_rx_queue_release(rxq);
1596 rte_cpu_to_le_64(RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf));
1597 rxd = &rxq->rx_ring[i];
1598 rxd->read.hdr_addr = dma_addr;
1599 rxd->read.pkt_addr = dma_addr;
1606 #define E1000_MRQC_DEF_Q_SHIFT (3)
1608 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
1610 struct e1000_hw *hw =
1611 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1614 if (RTE_ETH_DEV_SRIOV(dev).active == ETH_8_POOLS) {
1616 * SRIOV active scheme
1617 * FIXME if support RSS together with VMDq & SRIOV
1619 mrqc = E1000_MRQC_ENABLE_VMDQ;
1620 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
1621 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
1622 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1623 } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
1625 * SRIOV inactive scheme
1627 if (dev->data->nb_rx_queues > 1)
1628 igb_rss_configure(dev);
1630 igb_rss_disable(dev);
1637 eth_igb_rx_init(struct rte_eth_dev *dev)
1639 struct e1000_hw *hw;
1640 struct igb_rx_queue *rxq;
1641 struct rte_pktmbuf_pool_private *mbp_priv;
1646 uint16_t rctl_bsize;
1650 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1654 * Make sure receives are disabled while setting
1655 * up the descriptor ring.
1657 rctl = E1000_READ_REG(hw, E1000_RCTL);
1658 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
1661 * Configure support of jumbo frames, if any.
1663 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
1664 rctl |= E1000_RCTL_LPE;
1667 * Set maximum packet length by default, and might be updated
1668 * together with enabling/disabling dual VLAN.
1670 E1000_WRITE_REG(hw, E1000_RLPML,
1671 dev->data->dev_conf.rxmode.max_rx_pkt_len +
1674 rctl &= ~E1000_RCTL_LPE;
1676 /* Configure and enable each RX queue. */
1678 dev->rx_pkt_burst = eth_igb_recv_pkts;
1679 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1683 rxq = dev->data->rx_queues[i];
1685 /* Allocate buffers for descriptor rings and set up queue */
1686 ret = igb_alloc_rx_queue_mbufs(rxq);
1691 * Reset crc_len in case it was changed after queue setup by a
1695 (uint8_t)(dev->data->dev_conf.rxmode.hw_strip_crc ?
1698 bus_addr = rxq->rx_ring_phys_addr;
1699 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
1701 sizeof(union e1000_adv_rx_desc));
1702 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
1703 (uint32_t)(bus_addr >> 32));
1704 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
1706 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
1709 * Configure RX buffer size.
1711 mbp_priv = (struct rte_pktmbuf_pool_private *)
1712 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
1713 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
1714 RTE_PKTMBUF_HEADROOM);
1715 if (buf_size >= 1024) {
1717 * Configure the BSIZEPACKET field of the SRRCTL
1718 * register of the queue.
1719 * Value is in 1 KB resolution, from 1 KB to 127 KB.
1720 * If this field is equal to 0b, then RCTL.BSIZE
1721 * determines the RX packet buffer size.
1723 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
1724 E1000_SRRCTL_BSIZEPKT_MASK);
1725 buf_size = (uint16_t) ((srrctl &
1726 E1000_SRRCTL_BSIZEPKT_MASK) <<
1727 E1000_SRRCTL_BSIZEPKT_SHIFT);
1729 /* It adds dual VLAN length for supporting dual VLAN */
1730 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
1731 2 * VLAN_TAG_SIZE) > buf_size){
1732 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1733 dev->data->scattered_rx = 1;
1737 * Use BSIZE field of the device RCTL register.
1739 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
1740 rctl_bsize = buf_size;
1741 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1742 dev->data->scattered_rx = 1;
1745 /* Set if packets are dropped when no descriptors available */
1747 srrctl |= E1000_SRRCTL_DROP_EN;
1749 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
1751 /* Enable this RX queue. */
1752 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
1753 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
1754 rxdctl &= 0xFFF00000;
1755 rxdctl |= (rxq->pthresh & 0x1F);
1756 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
1757 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
1758 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
1762 * Setup BSIZE field of RCTL register, if needed.
1763 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
1764 * register, since the code above configures the SRRCTL register of
1765 * the RX queue in such a case.
1766 * All configurable sizes are:
1767 * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
1768 * 8192: rctl |= (E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX);
1769 * 4096: rctl |= (E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX);
1770 * 2048: rctl |= E1000_RCTL_SZ_2048;
1771 * 1024: rctl |= E1000_RCTL_SZ_1024;
1772 * 512: rctl |= E1000_RCTL_SZ_512;
1773 * 256: rctl |= E1000_RCTL_SZ_256;
1775 if (rctl_bsize > 0) {
1776 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
1777 rctl |= E1000_RCTL_SZ_512;
1778 else /* 256 <= buf_size < 512 - use 256 */
1779 rctl |= E1000_RCTL_SZ_256;
1783 * Configure RSS if device configured with multiple RX queues.
1785 igb_dev_mq_rx_configure(dev);
1788 * Setup the Checksum Register.
1789 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
1791 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
1792 rxcsum |= E1000_RXCSUM_PCSD;
1794 /* Enable both L3/L4 rx checksum offload */
1795 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
1796 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
1798 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
1799 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
1801 /* Setup the Receive Control Register. */
1802 if (dev->data->dev_conf.rxmode.hw_strip_crc) {
1803 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
1805 /* set STRCRC bit in all queues for Powerville/Springville */
1806 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i210) {
1807 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1808 rxq = dev->data->rx_queues[i];
1809 uint32_t dvmolr = E1000_READ_REG(hw,
1810 E1000_DVMOLR(rxq->reg_idx));
1811 dvmolr |= E1000_DVMOLR_STRCRC;
1812 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
1816 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
1818 /* clear STRCRC bit in all queues for Powerville/Springville */
1819 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i210) {
1820 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1821 rxq = dev->data->rx_queues[i];
1822 uint32_t dvmolr = E1000_READ_REG(hw,
1823 E1000_DVMOLR(rxq->reg_idx));
1824 dvmolr &= ~E1000_DVMOLR_STRCRC;
1825 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
1830 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
1831 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
1832 E1000_RCTL_RDMTS_HALF |
1833 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
1835 /* Make sure VLAN Filters are off. */
1836 rctl &= ~E1000_RCTL_VFE;
1837 /* Don't store bad packets. */
1838 rctl &= ~E1000_RCTL_SBP;
1840 /* Enable Receives. */
1841 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
1844 * Setup the HW Rx Head and Tail Descriptor Pointers.
1845 * This needs to be done after enable.
1847 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1848 rxq = dev->data->rx_queues[i];
1849 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
1850 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
1856 /*********************************************************************
1858 * Enable transmit unit.
1860 **********************************************************************/
1862 eth_igb_tx_init(struct rte_eth_dev *dev)
1864 struct e1000_hw *hw;
1865 struct igb_tx_queue *txq;
1870 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1872 /* Setup the Base and Length of the Tx Descriptor Rings. */
1873 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1875 txq = dev->data->tx_queues[i];
1876 bus_addr = txq->tx_ring_phys_addr;
1878 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
1880 sizeof(union e1000_adv_tx_desc));
1881 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
1882 (uint32_t)(bus_addr >> 32));
1883 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
1885 /* Setup the HW Tx Head and Tail descriptor pointers. */
1886 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
1887 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
1889 /* Setup Transmit threshold registers. */
1890 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
1891 txdctl |= txq->pthresh & 0x1F;
1892 txdctl |= ((txq->hthresh & 0x1F) << 8);
1893 txdctl |= ((txq->wthresh & 0x1F) << 16);
1894 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
1895 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
1898 /* Program the Transmit Control Register. */
1899 tctl = E1000_READ_REG(hw, E1000_TCTL);
1900 tctl &= ~E1000_TCTL_CT;
1901 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
1902 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
1904 e1000_config_collision_dist(hw);
1906 /* This write will effectively turn on the transmit unit. */
1907 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
1910 /*********************************************************************
1912 * Enable VF receive unit.
1914 **********************************************************************/
1916 eth_igbvf_rx_init(struct rte_eth_dev *dev)
1918 struct e1000_hw *hw;
1919 struct igb_rx_queue *rxq;
1920 struct rte_pktmbuf_pool_private *mbp_priv;
1923 uint16_t rctl_bsize;
1927 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1929 /* Configure and enable each RX queue. */
1931 dev->rx_pkt_burst = eth_igb_recv_pkts;
1932 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1936 rxq = dev->data->rx_queues[i];
1938 /* Allocate buffers for descriptor rings and set up queue */
1939 ret = igb_alloc_rx_queue_mbufs(rxq);
1943 bus_addr = rxq->rx_ring_phys_addr;
1944 E1000_WRITE_REG(hw, E1000_RDLEN(i),
1946 sizeof(union e1000_adv_rx_desc));
1947 E1000_WRITE_REG(hw, E1000_RDBAH(i),
1948 (uint32_t)(bus_addr >> 32));
1949 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
1951 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
1954 * Configure RX buffer size.
1956 mbp_priv = (struct rte_pktmbuf_pool_private *)
1957 ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
1958 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
1959 RTE_PKTMBUF_HEADROOM);
1960 if (buf_size >= 1024) {
1962 * Configure the BSIZEPACKET field of the SRRCTL
1963 * register of the queue.
1964 * Value is in 1 KB resolution, from 1 KB to 127 KB.
1965 * If this field is equal to 0b, then RCTL.BSIZE
1966 * determines the RX packet buffer size.
1968 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
1969 E1000_SRRCTL_BSIZEPKT_MASK);
1970 buf_size = (uint16_t) ((srrctl &
1971 E1000_SRRCTL_BSIZEPKT_MASK) <<
1972 E1000_SRRCTL_BSIZEPKT_SHIFT);
1974 /* It adds dual VLAN length for supporting dual VLAN */
1975 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
1976 2 * VLAN_TAG_SIZE) > buf_size){
1977 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1978 dev->data->scattered_rx = 1;
1982 * Use BSIZE field of the device RCTL register.
1984 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
1985 rctl_bsize = buf_size;
1986 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
1987 dev->data->scattered_rx = 1;
1990 /* Set if packets are dropped when no descriptors available */
1992 srrctl |= E1000_SRRCTL_DROP_EN;
1994 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
1996 /* Enable this RX queue. */
1997 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
1998 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
1999 rxdctl &= 0xFFF00000;
2000 rxdctl |= (rxq->pthresh & 0x1F);
2001 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2002 if (hw->mac.type == e1000_82576) {
2004 * Workaround of 82576 VF Erratum
2005 * force set WTHRESH to 1
2006 * to avoid Write-Back not triggered sometimes
2009 PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !\n");
2012 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2013 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2017 * Setup the HW Rx Head and Tail Descriptor Pointers.
2018 * This needs to be done after enable.
2020 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2021 rxq = dev->data->rx_queues[i];
2022 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2023 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2029 /*********************************************************************
2031 * Enable VF transmit unit.
2033 **********************************************************************/
2035 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2037 struct e1000_hw *hw;
2038 struct igb_tx_queue *txq;
2042 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2044 /* Setup the Base and Length of the Tx Descriptor Rings. */
2045 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2048 txq = dev->data->tx_queues[i];
2049 bus_addr = txq->tx_ring_phys_addr;
2050 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2052 sizeof(union e1000_adv_tx_desc));
2053 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2054 (uint32_t)(bus_addr >> 32));
2055 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2057 /* Setup the HW Tx Head and Tail descriptor pointers. */
2058 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2059 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2061 /* Setup Transmit threshold registers. */
2062 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2063 txdctl |= txq->pthresh & 0x1F;
2064 txdctl |= ((txq->hthresh & 0x1F) << 8);
2065 if (hw->mac.type == e1000_82576) {
2067 * Workaround of 82576 VF Erratum
2068 * force set WTHRESH to 1
2069 * to avoid Write-Back not triggered sometimes
2072 PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !\n");
2075 txdctl |= ((txq->wthresh & 0x1F) << 16);
2076 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2077 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);